5.3. Torsion Driving
The script tor_drive.py generates a conformer ensemble for each molecule read from the specified input file by performing torsion driving on the provided 3D structure and writes the resulting ensembles to the desired output file.
Synopsis
python tor_drive.py [-h] -i <file> -o <file> [-f <SMARTS>] [-e <float>] [-n <int>] [-a] [-r] [-q]
Mandatory options
- -i <file>
Molecule input file
- -o <file>
Conformer ensemble output file
Other options
- -h, --help
Show help message and exit
- -f <SMARTS>
SMARTS pattern describing substructures that shall be kept fixed during torsion driving
- -e <float>
Output conformer energy window (default: 20.0)
- -n <int>
Max. output ensemble size (default: 100; if <= 0 -> no limit)
- -a
Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)
- -r
Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.ConfGen as ConfGen
6import CDPL.Util as Util
7import CDPL.Math as Math
8
9
10# generates a conformer ensemble by performing torsion driving on the argument molecule
11# using the provided ConfGen.TorsionDriver instance
12def performTorsionDriving(mol: Chem.Molecule, tor_driver: ConfGen.TorsionDriver, sub_srch: Chem.SubstructureSearch, args: argparse.Namespace) -> (int, int):
13 # check if a fixed substructure pattern has been specified and if there are matches
14 if sub_srch != None and sub_srch.findMappings(mol):
15 if not args.quiet:
16 print(' -> Found %s fixed substructure pattern match(es)' % str(sub_srch.numMappings))
17
18 fixed_substruct = Chem.Fragment() # will store atoms and bonds of the matched substructure(s)
19
20 # extract all bonds (and associated atoms) of the substructure(s) matched by the pattern
21 for mapping in sub_srch:
22 for bond in mapping.bondMapping.getValues():
23 fixed_substruct.addBond(bond)
24
25 # bond mask for the torsion driver specifying rotatable bonds
26 rot_bonds = Util.BitSet(mol.numBonds)
27
28 # in case of a fixed substructure, rotatable bonds have to be identified manually
29 for bond in mol.bonds:
30 if fixed_substruct.containsBond(bond): # if bond is part of the fixed substruct. -> ignore
31 continue
32
33 if ConfGen.isRotatableBond(bond, mol, args.rot_term_het_grps):
34 rot_bonds.set(bond.getIndex())
35
36 status = tor_driver.setup(mol, rot_bonds) # setup torsion driver
37
38 else: # without a fixed substructure, let the torsion driver identify rotatable bonds
39 if sub_srch != None and not args.quiet:
40 print(' -> No fixed substructure pattern matches found!')
41
42 fixed_substruct = None
43 status = tor_driver.setup(mol) # setup torsion driver
44
45 # check if the torsion driver setup was successful
46 if status != ConfGen.ReturnCode.SUCCESS:
47 return (status, 0)
48
49 input_conf = Math.Vector3DArray()
50
51 # get input conformation
52 Chem.get3DCoordinates(mol, input_conf, Chem.Atom3DCoordinatesFunctor())
53
54 # use input conformation for tor. driving
55 tor_driver.addInputCoordinates(input_conf)
56
57 # perform torsion driving
58 status = tor_driver.generateConformers()
59
60 # check if torsion driving was successful
61 if status != ConfGen.ReturnCode.SUCCESS:
62 return (status, 0)
63
64 num_confs = tor_driver.numConformers # get num gen. conformers
65
66 if args.max_confs > 0:
67 num_confs = min(args.max_confs, num_confs) # clamp to spec. max. num output confs
68
69 output_confs = []
70
71 # fetch generated confs. up to max. count
72 for i in range(num_confs):
73 output_confs.append(tor_driver.getConformer(i))
74
75 # if desired, align output conformers
76 if args.align_confs:
77 if fixed_substruct != None:
78 # align on fixed substructure
79 alignConformers(fixed_substruct, input_conf, output_confs)
80 else:
81 # align on whole molecule
82 alignConformers(mol, input_conf, output_confs)
83
84 # erase existing conformers (if any)
85 Chem.clearConformations(mol)
86
87 # transfer output confs. one-by-one to molecule
88 for conf in output_confs:
89 Chem.addConformation(mol, conf)
90
91 # return status code and the number of generated conformers
92 return (status, num_confs)
93
94# aligns a set of conformers on the heavy atoms of a given reference structure (fixed substructure
95# or input molecule)
96def alignConformers(ref_struct: Chem.AtomContainer, ref_conf: Math.Vector3DArray, confs: []) -> None:
97 # first, try to use only the heavy atoms of the reference structure for alignment
98 ref_atom_inds = [atom.index for atom in ref_struct.atoms if Chem.getType(atom) != Chem.AtomType.H]
99 num_ref_atoms = len(ref_atom_inds)
100
101 # if num. heavy atoms < 3 (3 atoms are required for a defined orientation) use all atoms
102 if num_ref_atoms < 3:
103 ref_atom_inds = [atom.index for atom in ref_struct.atoms]
104 num_ref_atoms = len(ref_atom_inds)
105
106 if num_ref_atoms < 1: # in this case, an alignment is not possible
107 return
108
109 # create instance of the class implementing Kabsch's alignment algorithm
110 kabsch_algo = Math.DKabschAlgorithm()
111
112 ref_coords = Math.DMatrix(3, num_ref_atoms) # matrix storing the reference 3D coordinates
113 algnd_coords = Math.DMatrix(3, num_ref_atoms) # matrix storing the 3D coordinates to align
114
115 # initialize matrix holding the reference coordinates (stored as columns)
116 for i in range(num_ref_atoms):
117 coords = ref_conf[ref_atom_inds[i]]
118 ref_coords[0, i] = coords[0]
119 ref_coords[1, i] = coords[1]
120 ref_coords[2, i] = coords[2]
121
122 for algnd_conf in confs: # for each conformer to align
123 # fetch coordinates of the atoms on which to align from the current conformer
124 for j in range(num_ref_atoms):
125 coords = algnd_conf[ref_atom_inds[j]]
126 algnd_coords[0, j] = coords[0]
127 algnd_coords[1, j] = coords[1]
128 algnd_coords[2, j] = coords[2]
129
130 # calculate alignment transformation and, if calc. was successful (should always be the case),
131 # apply the transformation to all atom positions assoc. with the current conformation
132 if kabsch_algo.align(algnd_coords, ref_coords):
133 Math.transform(algnd_conf, Math.Matrix4D(kabsch_algo.transform))
134
135def parseArgs() -> argparse.Namespace:
136 parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules by performing torsion driving on the provided 3D structure.')
137
138 parser.add_argument('-i',
139 dest='in_file',
140 required=True,
141 metavar='<file>',
142 help='Molecule input file')
143 parser.add_argument('-o',
144 dest='out_file',
145 required=True,
146 metavar='<file>',
147 help='Conformer ensemble output file')
148 parser.add_argument('-f',
149 dest='fix_ptn',
150 required=False,
151 metavar='<SMARTS>',
152 help='SMARTS pattern describing substructures that shall be kept fixed during torsion driving')
153 parser.add_argument('-e',
154 dest='e_window',
155 required=False,
156 metavar='<float>',
157 type=float,
158 default=20.0,
159 help='Output conformer energy window (default: 20.0)')
160 parser.add_argument('-n',
161 dest='max_confs',
162 required=False,
163 metavar='<int>',
164 type=int,
165 default=0,
166 help='Max. output ensemble size (default: 100; if <= 0 -> no limit)')
167 parser.add_argument('-a',
168 dest='align_confs',
169 required=False,
170 action='store_true',
171 default=False,
172 help='Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)')
173 parser.add_argument('-r',
174 dest='rot_term_het_grps',
175 required=False,
176 action='store_true',
177 default=False,
178 help='Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)')
179 parser.add_argument('-q',
180 dest='quiet',
181 required=False,
182 action='store_true',
183 default=False,
184 help='Disable progress output (default: false)')
185
186 return parser.parse_args()
187
188def main() -> None:
189 args = parseArgs()
190
191 # create reader for input molecules (format specified by file extension)
192 reader = Chem.MoleculeReader(args.in_file)
193
194 # create writer for the generated conformer ensembles (format specified by file extension)
195 writer = Chem.MolecularGraphWriter(args.out_file)
196
197 # parse the SMARTS pattern describing fixed substructures, if specified
198 if args.fix_ptn:
199 try:
200 sub_srch_ptn = Chem.parseSMARTS(args.fix_ptn)
201 except Exception as e:
202 sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
203
204 # create and initialize an instance of the class Chem.SubstructureSearch that
205 # implements the substructure searching algorithm
206 sub_srch = Chem.SubstructureSearch(sub_srch_ptn)
207 sub_srch.uniqueMappings = True # report only mappings that differ in matched atoms/bonds
208 else:
209 sub_srch = None
210
211 # create and initialize an instance of the class ConfGen.TorsionDriver which
212 # will perform conformer generation by enumerating energetically favorable
213 # torsion angle combinations
214 tor_driver = ConfGen.TorsionDriver()
215
216 tor_driver.settings.energyWindow = args.e_window # apply the -e argument
217 tor_driver.settings.sampleHetAtomHydrogens = args.rot_term_het_grps # apply the -r argument
218 tor_driver.energyOrdered = True # order generated confs. by energy
219 tor_driver.strictForceFieldParam = False # be tolerant
220
221 # dictionary mapping status codes to human readable strings
222 status_to_str = { ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
223 ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed' }
224
225 # create an instance of the default implementation of the Chem.Molecule interface
226 mol = Chem.BasicMolecule()
227 i = 1
228
229 # read and process molecules one after the other until the end of input has been reached
230 try:
231 while reader.read(mol):
232 # compose a simple molecule identifier
233 mol_id = Chem.getName(mol).strip()
234
235 if mol_id == '':
236 mol_id = '#' + str(i) # fallback if name is empty
237 else:
238 mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
239
240 if not args.quiet:
241 print('- Generating conformers for molecule %s...' % mol_id)
242
243 try:
244 # prepare the molecule for torsion driving
245 ConfGen.prepareForConformerGeneration(mol)
246
247 # check if all atoms have 3D coordinates, print an error message if not
248 if not Chem.hasCoordinates(mol, 3):
249 if args.quiet:
250 print('Error: torsion driving on molecule %s failed: no atom 3D coordinates provided' % mol_id)
251 else:
252 print(' -> Torsion driving failed: no atom 3D coordinates provided')
253 continue
254
255 # generate conformer ensemble for read molecule
256 status, num_confs = performTorsionDriving(mol, tor_driver, sub_srch, args)
257
258 # check for severe error reported by status code
259 if status != ConfGen.ReturnCode.SUCCESS:
260 if args.quiet:
261 print('Error: torsion driving on molecule %s failed: %s' % (mol_id, status_to_str[status]))
262 else:
263 print(' -> Torsion driving failed: %s' % status_to_str[status])
264
265 elif not args.quiet: # arrives here only if no severe error occurred
266 print(' -> Generated %s conformer(s)' % str(num_confs))
267
268 # output generated ensemble (if available)
269 if num_confs > 0:
270 if not writer.write(mol):
271 sys.exit('Error: output of generated conformers for molecule %s failed' % mol_id)
272
273 except Exception as e:
274 sys.exit('Error: torsion driving or output of generated conformers for molecule %s failed: %s' % (mol_id, str(e)))
275
276 i += 1
277
278 except Exception as e: # handle exception raised in case of severe read errors
279 sys.exit('Error: reading molecule failed: ' + str(e))
280
281 writer.close()
282 sys.exit(0)
283
284if __name__ == '__main__':
285 main()