4.2.2. Receptor Binding Pocket Descriptor
The script gen_kuvek_bp_descr.py generates and outputs a binding pocket shape/electrostatics descriptor for a given receptor structure according to the procedure devised by Kuvek et al.
Synopsis
python gen_kuvek_bp_descr.py [-h] -i <file> -o <file> -c <float> <float> <float> [-r <float>] [-x <float> <float>] [-y <float> <float>] [-z <float> <float>] [-s <res-id> [<res-id> …]] [-n <int>] [-t] [-p] [-q]
Mandatory options
- -i <file>
Receptor structure input file (.mol2, .pdb, .mmtf, .cif, .mmcif)
- -o <file>
Descriptor output file
-c <float> <float> <float>
Other options
- -h, --help
Show help message and exit
- -n <integer>
Number of intersection test vectors (default: 492)
- -r <float>
Probe sphere radius (default: 20.0)
-s <res-id> [<res-id> …]
Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. The individual components are optional and the whole string is interpreted as a regular expression that gets matched against the residue id of each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, _300$ -> remove all residues with sequ. number 300
-x <float> <float>
Allowed test vector x coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])
-y <float> <float>
Allowed test vector y coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])
-z <float> <float>
Allowed test vector z coordinate range. If a test vector is outside of this range then the associated descr. element will not be output (default: [-1.0, 1.0])
- -t
Output test vector x, y and z coordinates for each descriptor element (default: false)
- -p
Output test vector atom intersection point x, y and z coordinates for each descriptor element (default: false)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3import re
4
5import CDPL.Chem as Chem
6import CDPL.Biomol as Biomol
7import CDPL.ForceField as ForceField
8import CDPL.Descr as Descr
9import CDPL.Math as Math
10
11
12# reads and preprocesses the specified receptor structure
13def readAndPrepareReceptorStructure(args: argparse.Namespace) -> Chem.Molecule:
14 sup_fmts = [ Chem.DataFormat.MOL2,
15 Biomol.DataFormat.PDB,
16 Biomol.DataFormat.MMTF,
17 Biomol.DataFormat.MMCIF ]
18
19 # create reader for receptor structure (format specified by file extension)
20 reader = Chem.MoleculeReader(args.receptor_file)
21
22 if reader.getDataFormat() not in sup_fmts: # check if the format is supported by this script
23 sys.exit('Error: receptor input file format not supported')
24
25 rec_mol = Chem.BasicMolecule() # create an instance of the default implementation of the
26 # Chem.Molecule interface that will store the receptor struct.
27 try:
28 if not reader.read(rec_mol): # read receptor structure
29 sys.exit('Error: reading receptor structure failed')
30
31 except Exception as e:
32 sys.exit(f'Error: reading receptor structure failed:\n{str(e)}')
33
34 # preprocess the receptor structure (removal of residues and
35 # calculation of properties required by the pharm. generation procedure)
36 try:
37 rem_atoms = False
38
39 # delete atoms belonging to residues that should be stripped
40 if args.strip_res_list:
41 atoms_to_rem = Chem.Fragment() # will store the atoms to delete
42 res_to_strip_ptns = { re.compile(tlc, re.IGNORECASE) for tlc in args.strip_res_list }
43
44 # identify and note atoms belonging to the stripped residues
45 for atom in rec_mol.atoms:
46 # compose identifier of the residue the atom belongs to
47 atom_res_id = f'{Biomol.getChainID(atom).upper()}_{Biomol.getResidueCode(atom).upper()}_{Biomol.getResidueSequenceNumber(atom)}'
48
49 # check if atom belongs to an excluded residue
50 for res_ptn in res_to_strip_ptns:
51 if res_ptn.search(atom_res_id):
52 atoms_to_rem.addAtom(atom)
53 break
54
55 if atoms_to_rem.numAtoms > 0:
56 rec_mol -= atoms_to_rem # delete atoms from the receptor structure
57 rem_atoms = True
58
59 if not args.quiet:
60 print(f'! Removed {atoms_to_rem.numAtoms} atom(s) from the receptor structure')
61
62 # calculate required receptor structure properties
63 # (force recalculation if already present and atoms were removed)
64 Chem.perceiveSSSR(rec_mol, rem_atoms)
65 Chem.setRingFlags(rec_mol, rem_atoms)
66 Chem.calcImplicitHydrogenCounts(rec_mol, rem_atoms)
67 Chem.perceiveHybridizationStates(rec_mol, rem_atoms)
68 Chem.setAromaticityFlags(rec_mol, rem_atoms)
69
70 if Chem.makeHydrogenComplete(rec_mol): # make implicit hydrogens (if any) explicit
71 Chem.calcHydrogen3DCoordinates(rec_mol) # calculate 3D coordinates for the added expl. hydrogens
72 Biomol.setHydrogenResidueSequenceInfo(rec_mol, False) # set residue information for the added expl. hydrogens
73
74 # calc. MMFF94 partial charges
75 ForceField.perceiveMMFF94AromaticRings(rec_mol, False) # perceive aromatic rings according to the MMFF94 aroamticity model and store data as Chem.MolecularGraph property
76 ForceField.assignMMFF94AtomTypes(rec_mol, False, False) # perceive MMFF94 atom types (tolerant mode) set corresponding property for all atoms
77 ForceField.assignMMFF94BondTypeIndices(rec_mol, False, False) # perceive MMFF94 bond types (tolerant mode) set corresponding property for all bonds
78 ForceField.calcMMFF94AtomCharges(rec_mol, False, False) # calculate MMFF94 atom charges (tolerant mode) set corresponding property for all atoms
79
80 except Exception as e:
81 sys.exit(f'Error: processing of receptor structure failed:\n{str(e)}')
82
83 return rec_mol
84
85def parseArgs() -> argparse.Namespace:
86 parser = argparse.ArgumentParser(description='Generates a binding pocket shape/electrostatics descriptor for a given receptor '\
87 'structure according to the procedure devised by Kuvek et al.')
88
89 parser.add_argument('-i',
90 dest='receptor_file',
91 required=True,
92 metavar='<file>',
93 help='Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)')
94 parser.add_argument('-o',
95 dest='out_file',
96 required=True,
97 metavar='<file>',
98 help='Descriptor output file')
99 parser.add_argument('-s',
100 dest='strip_res_list',
101 required=False,
102 metavar='<res-id>',
103 nargs='+',
104 help='Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). '\
105 'Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. '\
106 'The individual components are optional and the whole string is interpreted '\
107 'as a regular expression that gets matched against the residue id of '\
108 'each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, '\
109 '_300$ -> remove all residues with sequ. number 300')
110 parser.add_argument('-c',
111 dest='sphere_ctr',
112 required=True,
113 metavar='<float>',
114 nargs=3,
115 type=float,
116 help='Whitespace separated list of probe sphere center x, y and z coordinates')
117 parser.add_argument('-r',
118 dest='sphere_rad',
119 required=False,
120 metavar='<float>',
121 type=float,
122 default=20.0,
123 help='Probe sphere radius (default: 20.0)')
124 parser.add_argument('-n',
125 dest='num_test_vecs',
126 required=False,
127 metavar='<integer>',
128 type=int,
129 default=492,
130 help='Number of intersection test vectors (default: 492)')
131 parser.add_argument('-t',
132 dest='write_test_vecs',
133 required=False,
134 action='store_true',
135 default=False,
136 help='Output test vector x, y and z coordinates for each descriptor element (default: false)')
137 parser.add_argument('-p',
138 dest='write_inters_pts',
139 required=False,
140 action='store_true',
141 default=False,
142 help='Output test vector atom intersection point x, y and z coordinates for each descriptor element (default: false)')
143 parser.add_argument('-x',
144 dest='test_vec_x_range',
145 required=False,
146 metavar='<float>',
147 nargs=2,
148 type=float,
149 help='Allowed test vector x coordinate range. If a test vector is outside of '\
150 'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
151 parser.add_argument('-y',
152 dest='test_vec_y_range',
153 required=False,
154 metavar='<float>',
155 nargs=2,
156 type=float,
157 help='Allowed test vector y coordinate range. If a test vector is outside of '\
158 'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
159 parser.add_argument('-z',
160 dest='test_vec_z_range',
161 required=False,
162 metavar='<float>',
163 nargs=2,
164 type=float,
165 help='Allowed test vector z coordinate range. If a test vector is outside of '\
166 'this range then the associated descr. element will not be output (default: [-1.0, 1.0])')
167 parser.add_argument('-q',
168 dest='quiet',
169 required=False,
170 action='store_true',
171 default=False,
172 help='Disable progress output (default: false)')
173
174 return parser.parse_args()
175
176def main() -> None:
177 args = parseArgs()
178
179 # read and preprocess the receptor structure
180 rec_mol = readAndPrepareReceptorStructure(args)
181
182 # create and initialize an instance of the class Descr.KuvekPocketDescriptorCalculator which
183 # will perform the descriptor generation
184 descr_calc = Descr.KuvekPocketDescriptorCalculator()
185
186 descr_calc.sphereRadius = args.sphere_rad # apply the -r argument
187 descr_calc.numTestVectors = args.num_test_vecs # apply the -n argument
188
189 # modify the atom charge function to retrieve MMFF94 partial charges
190 descr_calc.atomChargeFunction = ForceField.getMMFF94Charge
191
192 # vector that will store the binding pocket descriptor values
193 descr = Math.DVector()
194
195 try:
196 # calculate descriptor
197 descr_calc.calculate(args.sphere_ctr, rec_mol, descr)
198
199 # output descriptor vector elements
200 with open(args.out_file, 'w') as out_file:
201 for i in range(descr_calc.numTestVectors):
202 tv = descr_calc.getTestVector(i)
203
204 if args.test_vec_x_range and (tv(0) < args.test_vec_x_range[0] or tv(0) > args.test_vec_x_range[1]):
205 continue
206
207 if args.test_vec_y_range and (tv(1) < args.test_vec_y_range[0] or tv(1) > args.test_vec_y_range[1]):
208 continue
209
210 if args.test_vec_z_range and (tv(2) < args.test_vec_z_range[0] or tv(2) > args.test_vec_z_range[1]):
211 continue
212
213 out_file.write(f'{descr(i * 2):.4f} {descr(i * 2 + 1):.4f}')
214
215 if args.write_test_vecs:
216 out_file.write(f' {tv(0):.4f} {tv(1):.4f} {tv(2):.4f}')
217
218 if args.write_inters_pts:
219 ip = tv * descr(i * 2)
220 out_file.write(f' {ip(0):.4f} {ip(1):.4f} {ip(2):.4f}')
221
222 out_file.write('\n')
223
224 except Exception as e: # handle exception raised in case of severe errors
225 sys.exit(f'Error: descriptor calculation failed:\n{str(e)}')
226
227 if not args.quiet:
228 print('Done!')
229
230 sys.exit(0)
231
232if __name__ == '__main__':
233 main()