4.2.2. Receptor Binding Pocket Descriptor

The script gen_kuvek_bp_descr.py generates and outputs a binding pocket shape/electrostatics descriptor for a given receptor structure according to the procedure devised by Kuvek et al.

Synopsis

python gen_kuvek_bp_descr.py [-h] -i <file> -o <file> -c <float> <float> <float> [-r <float>] [-x <float> <float>] [-y <float> <float>] [-z <float> <float>] [-s <res-id> [<res-id> …]] [-n <int>] [-t] [-p] [-q]

Mandatory options

-i <file>

Receptor structure input file (.mol2, .pdb, .mmtf, .cif, .mmcif)

-o <file>

Descriptor output file

-c <float> <float> <float>

Other options

-h, --help

Show help message and exit

-n <integer>

Number of intersection test vectors (default: 492)

-r <float>

Probe sphere radius (default: 20.0)

-s <res-id> [<res-id> …]

Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. The individual components are optional and the whole string is interpreted as a regular expression that gets matched against the residue id of each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, _300$ -> remove all residues with sequ. number 300

-x <float> <float>

Allowed test vector x coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])

-y <float> <float>

Allowed test vector y coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])

-z <float> <float>

Allowed test vector z coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])

-t

Output test vector x, y and z coordinates for each descriptor element (default: false)

-p

Output test vector atom intersection point x, y and z coordinates for each descriptor element (default: false)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3import re
  4
  5import CDPL.Chem as Chem
  6import CDPL.Biomol as Biomol
  7import CDPL.ForceField as ForceField
  8import CDPL.Descr as Descr
  9import CDPL.Math as Math
 10
 11
 12# reads and preprocesses the specified receptor structure
 13def readAndPrepareReceptorStructure(args: argparse.Namespace) -> Chem.Molecule:
 14    sup_fmts = [ Chem.DataFormat.MOL2,
 15                 Biomol.DataFormat.PDB,
 16                 Biomol.DataFormat.MMTF,
 17                 Biomol.DataFormat.MMCIF ]
 18
 19    # create reader for receptor structure (format specified by file extension)
 20    reader = Chem.MoleculeReader(args.receptor_file) 
 21                           
 22    if reader.getDataFormat() not in sup_fmts:   # check if the format is supported by this script
 23        sys.exit('Error: receptor input file format not supported')
 24    
 25    rec_mol = Chem.BasicMolecule()    # create an instance of the default implementation of the
 26                                      # Chem.Molecule interface that will store the receptor struct.
 27    try:
 28        if not reader.read(rec_mol):  # read receptor structure
 29            sys.exit('Error: reading receptor structure failed')
 30
 31    except Exception as e:
 32        sys.exit(f'Error: reading receptor structure failed:\n{str(e)}')            
 33
 34    # preprocess the receptor structure (removal of residues and
 35    # calculation of properties required by the pharm. generation procedure)
 36    try:
 37        rem_atoms = False
 38
 39        # delete atoms belonging to residues that should be stripped
 40        if args.strip_res_list:            
 41            atoms_to_rem = Chem.Fragment() # will store the atoms to delete
 42            res_to_strip_ptns = { re.compile(tlc, re.IGNORECASE) for tlc in args.strip_res_list }
 43
 44            # identify and note atoms belonging to the stripped residues
 45            for atom in rec_mol.atoms:
 46                # compose identifier of the residue the atom belongs to
 47                atom_res_id = f'{Biomol.getChainID(atom).upper()}_{Biomol.getResidueCode(atom).upper()}_{Biomol.getResidueSequenceNumber(atom)}'
 48
 49                # check if atom belongs to an excluded residue
 50                for res_ptn in res_to_strip_ptns:
 51                    if res_ptn.search(atom_res_id):
 52                        atoms_to_rem.addAtom(atom)
 53                        break
 54
 55            if atoms_to_rem.numAtoms > 0:
 56                rec_mol -= atoms_to_rem    # delete atoms from the receptor structure
 57                rem_atoms = True
 58
 59                if not args.quiet:
 60                    print(f'! Removed {atoms_to_rem.numAtoms} atom(s) from the receptor structure')
 61
 62        # calculate required receptor structure properties
 63        # (force recalculation if already present and atoms were removed)
 64        Chem.perceiveSSSR(rec_mol, rem_atoms)
 65        Chem.setRingFlags(rec_mol, rem_atoms)
 66        Chem.calcImplicitHydrogenCounts(rec_mol, rem_atoms)
 67        Chem.perceiveHybridizationStates(rec_mol, rem_atoms)
 68        Chem.setAromaticityFlags(rec_mol, rem_atoms)
 69
 70        if Chem.makeHydrogenComplete(rec_mol):                    # make implicit hydrogens (if any) explicit
 71            Chem.calcHydrogen3DCoordinates(rec_mol)               # calculate 3D coordinates for the added expl. hydrogens
 72            Biomol.setHydrogenResidueSequenceInfo(rec_mol, False) # set residue information for the added expl. hydrogens
 73
 74        # calc. MMFF94 partial charges
 75        ForceField.perceiveMMFF94AromaticRings(rec_mol, False)        # perceive aromatic rings according to the MMFF94 aroamticity model and store data as Chem.MolecularGraph property
 76        ForceField.assignMMFF94AtomTypes(rec_mol, False, False)       # perceive MMFF94 atom types (tolerant mode) set corresponding property for all atoms
 77        ForceField.assignMMFF94BondTypeIndices(rec_mol, False, False) # perceive MMFF94 bond types (tolerant mode) set corresponding property for all bonds
 78        ForceField.calcMMFF94AtomCharges(rec_mol, False, False)       # calculate MMFF94 atom charges (tolerant mode) set corresponding property for all atoms
 79
 80    except Exception as e:
 81        sys.exit(f'Error: processing of receptor structure failed:\n{str(e)}')            
 82
 83    return rec_mol
 84   
 85def parseArgs() -> argparse.Namespace:
 86    parser = argparse.ArgumentParser(description='Generates a binding pocket shape/electrostatics descriptor for a given receptor '\
 87                                    'structure according to the procedure devised by Kuvek et al.')
 88
 89    parser.add_argument('-i',
 90                        dest='receptor_file',
 91                        required=True,
 92                        metavar='<file>',
 93                        help='Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)')
 94    parser.add_argument('-o',
 95                        dest='out_file',
 96                        required=True,
 97                        metavar='<file>',
 98                        help='Descriptor output file')
 99    parser.add_argument('-s',
100                        dest='strip_res_list',
101                        required=False,
102                        metavar='<res-id>',
103                        nargs='+',
104                        help='Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). '\
105                        'Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. '\
106                        'The individual components are optional and the whole string is interpreted '\
107                        'as a regular expression that gets matched against the residue id of '\
108                        'each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, '\
109                        '_300$ -> remove all residues with sequ. number 300')
110    parser.add_argument('-c',
111                        dest='sphere_ctr',
112                        required=True,
113                        metavar='<float>',
114                        nargs=3,
115                        type=float,
116                        help='Whitespace separated list of probe sphere center x, y and z coordinates')
117    parser.add_argument('-r',
118                        dest='sphere_rad',
119                        required=False,
120                        metavar='<float>',
121                        type=float,
122                        default=20.0,
123                        help='Probe sphere radius (default: 20.0)')
124    parser.add_argument('-n',
125                        dest='num_test_vecs',
126                        required=False,
127                        metavar='<integer>',
128                        type=int,
129                        default=492,
130                        help='Number of intersection test vectors (default: 492)')
131    parser.add_argument('-t',
132                        dest='write_test_vecs',
133                        required=False,
134                        action='store_true',
135                        default=False,
136                        help='Output test vector x, y and z coordinates for each descriptor element (default: false)')
137    parser.add_argument('-p',
138                        dest='write_inters_pts',
139                        required=False,
140                        action='store_true',
141                        default=False,
142                        help='Output test vector atom intersection point x, y and z coordinates for each descriptor element (default: false)')
143    parser.add_argument('-x',
144                        dest='test_vec_x_range',
145                        required=False,
146                        metavar='<float>',
147                        nargs=2,
148                        type=float,
149                        help='Allowed test vector x coordinate range. If a test vector is outside of '\
150                        'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
151    parser.add_argument('-y',
152                        dest='test_vec_y_range',
153                        required=False,
154                        metavar='<float>',
155                        nargs=2,
156                        type=float,
157                        help='Allowed test vector y coordinate range. If a test vector is outside of '\
158                        'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
159    parser.add_argument('-z',
160                        dest='test_vec_z_range',
161                        required=False,
162                        metavar='<float>',
163                        nargs=2,
164                        type=float,
165                        help='Allowed test vector z coordinate range. If a test vector is outside of '\
166                        'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
167    parser.add_argument('-q',
168                        dest='quiet',
169                        required=False,
170                        action='store_true',
171                        default=False,
172                        help='Disable progress output (default: false)')
173
174    return parser.parse_args()
175
176def main() -> None:
177    args = parseArgs()
178
179    # read and preprocess the receptor structure
180    rec_mol = readAndPrepareReceptorStructure(args)
181
182    # create and initialize an instance of the class Descr.KuvekPocketDescriptorCalculator which
183    # will perform the descriptor generation
184    descr_calc = Descr.KuvekPocketDescriptorCalculator()
185
186    descr_calc.sphereRadius = args.sphere_rad       # apply the -r argument
187    descr_calc.numTestVectors = args.num_test_vecs  # apply the -n argument
188
189    # modify the atom charge function to retrieve MMFF94 partial charges
190    descr_calc.atomChargeFunction = ForceField.getMMFF94Charge
191
192    # vector that will store the binding pocket descriptor values
193    descr = Math.DVector()
194
195    try:
196        # calculate descriptor
197        descr_calc.calculate(args.sphere_ctr, rec_mol, descr)
198
199        # output descriptor vector elements
200        with open(args.out_file, 'w') as out_file:
201            for i in range(descr_calc.numTestVectors):
202                tv = descr_calc.getTestVector(i)
203
204                if args.test_vec_x_range and (tv(0) < args.test_vec_x_range[0] or tv(0) > args.test_vec_x_range[1]):
205                    continue
206
207                if args.test_vec_y_range and (tv(1) < args.test_vec_y_range[0] or tv(1) > args.test_vec_y_range[1]):
208                    continue
209
210                if args.test_vec_z_range and (tv(2) < args.test_vec_z_range[0] or tv(2) > args.test_vec_z_range[1]):
211                    continue
212                
213                out_file.write(f'{descr(i * 2):.4f} {descr(i * 2 + 1):.4f}')
214                
215                if args.write_test_vecs:
216                    out_file.write(f' {tv(0):.4f} {tv(1):.4f} {tv(2):.4f}')
217
218                if args.write_inters_pts:
219                    ip = tv * descr(i * 2)
220                    out_file.write(f' {ip(0):.4f} {ip(1):.4f} {ip(2):.4f}')
221                    
222                out_file.write('\n')
223         
224    except Exception as e:    # handle exception raised in case of severe errors
225        sys.exit(f'Error: descriptor calculation failed:\n{str(e)}')
226   
227    if not args.quiet:
228        print('Done!')
229
230    sys.exit(0)
231        
232if __name__ == '__main__':
233    main()

Download source file