5.3. Torsion Driving
The script tor_drive.py generates a conformer ensemble for each molecule read from the specified input file by performing torsion driving on the provided 3D structure and writes the resulting ensembles to the desired output file.
Synopsis
python tor_drive.py [-h] -i <file> -o <file> [-f <SMARTS>] [-e <float>] [-n <int>] [-a] [-r] [-q]
Mandatory options
- -i <file>
Molecule input file
- -o <file>
Conformer ensemble output file
Other options
- -h, --help
Show help message and exit
- -f <SMARTS>
SMARTS pattern describing substructures that shall be kept fixed during torsion driving
- -e <float>
Output conformer energy window (default: 20.0)
- -n <int>
Max. output ensemble size (default: 100; if <= 0 -> no limit)
- -a
Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)
- -r
Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.ConfGen as ConfGen
6import CDPL.Util as Util
7import CDPL.Math as Math
8
9
10# generates a conformer ensemble by performing torsion driving on the argument molecule
11# using the provided ConfGen.TorsionDriver instance
12def performTorsionDriving(mol: Chem.Molecule, tor_driver: ConfGen.TorsionDriver, sub_srch: Chem.SubstructureSearch, args: argparse.Namespace) -> (int, int):
13 # check if a fixed substructure pattern has been specified and if there are matches
14 if sub_srch != None and sub_srch.findMappings(mol):
15 if not args.quiet:
16 print(' -> Found %s fixed substructure pattern match(es)' % str(sub_srch.numMappings))
17
18 fixed_substruct = Chem.Fragment() # will store atoms and bonds of the matched substructure(s)
19
20 # extract all bonds (and associated atoms) of the substructure(s) matched by the pattern
21 for mapping in sub_srch:
22 for bond in mapping.bondMapping.getValues():
23 fixed_substruct.addBond(bond)
24
25 # bond mask for the torsion driver specifying rotatable bonds
26 rot_bonds = Util.BitSet(mol.numBonds)
27
28 # in case of a fixed substructure, rotatable bonds have to be identified manually
29 for bond in mol.bonds:
30 if fixed_substruct.containsBond(bond): # if bond is part of the fixed substruct. -> ignore
31 continue
32
33 if ConfGen.isRotatableBond(bond, mol, args.rot_term_het_grps):
34 rot_bonds.set(bond.getIndex())
35
36 status = tor_driver.setup(mol, rot_bonds) # setup torsion driver
37
38 else: # without a fixed substructure, let the torsion driver identify rotatable bonds
39 if sub_srch != None and not args.quiet:
40 print(' -> No fixed substructure pattern matches found!')
41
42 fixed_substruct = None
43 status = tor_driver.setup(mol) # setup torsion driver
44
45 # check if the torsion driver setup was successful
46 if status != ConfGen.ReturnCode.SUCCESS:
47 return (status, 0)
48
49 input_conf = Math.Vector3DArray()
50
51 # get input conformation
52 Chem.get3DCoordinates(mol, input_conf, Chem.Atom3DCoordinatesFunctor())
53
54 # use input conformation for tor. driving
55 tor_driver.addInputCoordinates(input_conf)
56
57 # perform torsion driving
58 status = tor_driver.generateConformers()
59
60 # check if torsion driving was successful
61 if status != ConfGen.ReturnCode.SUCCESS:
62 return (status, 0)
63
64 num_confs = tor_driver.numConformers # get num gen. conformers
65
66 if args.max_confs > 0:
67 num_confs = min(args.max_confs, num_confs) # clamp to spec. max. num output confs
68
69 output_confs = []
70
71 # fetch generated confs. up to max. count
72 for i in range(num_confs):
73 output_confs.append(tor_driver.getConformer(i))
74
75 # if desired, align output conformers
76 if args.align_confs:
77 if fixed_substruct != None:
78 # align on fixed substructure
79 alignConformers(fixed_substruct, input_conf, output_confs)
80 else:
81 # align on whole molecule
82 alignConformers(mol, input_conf, output_confs)
83
84 # erase existing conformers (if any)
85 Chem.clearConformations(mol)
86
87 # transfer output confs. one-by-one to molecule
88 for conf in output_confs:
89 Chem.addConformation(mol, conf)
90
91 # return status code and the number of generated conformers
92 return (status, num_confs)
93
94# aligns a set of conformers on the heavy atoms of a given reference structure (fixed substructure
95# or input molecule)
96def alignConformers(ref_struct: Chem.AtomContainer, ref_conf: Math.Vector3DArray, confs: []) -> None:
97 # first, try to use only the heavy atoms of the reference structure for alignment
98 ref_atom_inds = [atom.index for atom in ref_struct.atoms if Chem.getType(atom) != Chem.AtomType.H]
99 num_ref_atoms = len(ref_atom_inds)
100
101 # if num. heavy atoms < 3 (3 atoms are required for a defined orientation) use all atoms
102 if num_ref_atoms < 3:
103 ref_atom_inds = [atom.index for atom in ref_struct.atoms]
104 num_ref_atoms = len(ref_atom_inds)
105
106 if num_ref_atoms < 1: # in this case, an alignment is not possible
107 return
108
109 # create instance of the class implementing Kabsch's alignment algorithm
110 kabsch_algo = Math.DKabschAlgorithm()
111
112 ref_coords = Math.DMatrix(3, num_ref_atoms) # matrix storing the reference 3D coordinates
113 algnd_coords = Math.DMatrix(3, num_ref_atoms) # matrix storing the 3D coordinates to align
114
115 # initialize matrix holding the reference coordinates (stored as columns)
116 for i in range(num_ref_atoms):
117 coords = ref_conf[ref_atom_inds[i]]
118 ref_coords[0, i] = coords[0]
119 ref_coords[1, i] = coords[1]
120 ref_coords[2, i] = coords[2]
121
122 for algnd_conf in confs: # for each conformer to align
123 # fetch coordinates of the atoms on which to align from the current conformer
124 for j in range(num_ref_atoms):
125 coords = algnd_conf[ref_atom_inds[j]]
126 algnd_coords[0, j] = coords[0]
127 algnd_coords[1, j] = coords[1]
128 algnd_coords[2, j] = coords[2]
129
130 # calculate alignment transformation and, if calc. was successful (should always be the case),
131 # apply the transformation to all atom positions assoc. with the current conformation
132 if kabsch_algo.align(algnd_coords, ref_coords):
133 Math.transform(algnd_conf, Math.Matrix4D(kabsch_algo.transform))
134
135def main() -> None:
136 args = parseArgs()
137
138 # create reader for input molecules (format specified by file extension)
139 reader = Chem.MoleculeReader(args.in_file)
140
141 # create writer for the generated conformer ensembles (format specified by file extension)
142 writer = Chem.MolecularGraphWriter(args.out_file)
143
144 # parse the SMARTS pattern describing fixed substructures, if specified
145 if args.fix_ptn:
146 try:
147 sub_srch_ptn = Chem.parseSMARTS(args.fix_ptn)
148 except Exception as e:
149 sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
150
151 # create and initialize an instance of the class Chem.SubstructureSearch that
152 # implements the substructure searching algorithm
153 sub_srch = Chem.SubstructureSearch(sub_srch_ptn)
154 sub_srch.uniqueMappings = True # report only mappings that differ in matched atoms/bonds
155 else:
156 sub_srch = None
157
158 # create and initialize an instance of the class ConfGen.TorsionDriver which
159 # will perform conformer generation by enumerating energetically favorable
160 # torsion angle combinations
161 tor_driver = ConfGen.TorsionDriver()
162
163 tor_driver.settings.energyWindow = args.e_window # apply the -e argument
164 tor_driver.settings.sampleHetAtomHydrogens = args.rot_term_het_grps # apply the -r argument
165 tor_driver.energyOrdered = True # order generated confs. by energy
166 tor_driver.strictForceFieldParam = False # be tolerant
167
168 # dictionary mapping status codes to human readable strings
169 status_to_str = { ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
170 ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed' }
171
172 # create an instance of the default implementation of the Chem.Molecule interface
173 mol = Chem.BasicMolecule()
174 i = 1
175
176 # read and process molecules one after the other until the end of input has been reached
177 try:
178 while reader.read(mol):
179 # compose a simple molecule identifier
180 mol_id = Chem.getName(mol).strip()
181
182 if mol_id == '':
183 mol_id = '#' + str(i) # fallback if name is empty
184 else:
185 mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
186
187 if not args.quiet:
188 print('- Generating conformers for molecule %s...' % mol_id)
189
190 try:
191 # prepare the molecule for torsion driving
192 ConfGen.prepareForConformerGeneration(mol)
193
194 # check if all atoms have 3D coordinates, print an error message if not
195 if not Chem.hasCoordinates(mol, 3):
196 if args.quiet:
197 print('Error: torsion driving on molecule %s failed: no atom 3D coordinates provided' % mol_id)
198 else:
199 print(' -> Torsion driving failed: no atom 3D coordinates provided')
200 continue
201
202 # generate conformer ensemble for read molecule
203 status, num_confs = performTorsionDriving(mol, tor_driver, sub_srch, args)
204
205 # check for severe error reported by status code
206 if status != ConfGen.ReturnCode.SUCCESS:
207 if args.quiet:
208 print('Error: torsion driving on molecule %s failed: %s' % (mol_id, status_to_str[status]))
209 else:
210 print(' -> Torsion driving failed: %s' % status_to_str[status])
211
212 elif not args.quiet: # arrives here only if no severe error occurred
213 print(' -> Generated %s conformer(s)' % str(num_confs))
214
215 # output generated ensemble (if available)
216 if num_confs > 0:
217 if not writer.write(mol):
218 sys.exit('Error: output of generated conformers for molecule %s failed' % mol_id)
219
220 except Exception as e:
221 sys.exit('Error: torsion driving or output of generated conformers for molecule %s failed: %s' % (mol_id, str(e)))
222
223 i += 1
224
225 except Exception as e: # handle exception raised in case of severe read errors
226 sys.exit('Error: reading molecule failed: ' + str(e))
227
228 writer.close()
229 sys.exit(0)
230
231def parseArgs() -> argparse.Namespace:
232 parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules by performing torsion driving on the provided 3D structure.')
233
234 parser.add_argument('-i',
235 dest='in_file',
236 required=True,
237 metavar='<file>',
238 help='Molecule input file')
239 parser.add_argument('-o',
240 dest='out_file',
241 required=True,
242 metavar='<file>',
243 help='Conformer ensemble output file')
244 parser.add_argument('-f',
245 dest='fix_ptn',
246 required=False,
247 metavar='<SMARTS>',
248 help='SMARTS pattern describing substructures that shall be kept fixed during torsion driving')
249 parser.add_argument('-e',
250 dest='e_window',
251 required=False,
252 metavar='<float>',
253 type=float,
254 default=20.0,
255 help='Output conformer energy window (default: 20.0)')
256 parser.add_argument('-n',
257 dest='max_confs',
258 required=False,
259 metavar='<int>',
260 type=int,
261 default=0,
262 help='Max. output ensemble size (default: 100; if <= 0 -> no limit)')
263 parser.add_argument('-a',
264 dest='align_confs',
265 required=False,
266 action='store_true',
267 default=False,
268 help='Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)')
269 parser.add_argument('-r',
270 dest='rot_term_het_grps',
271 required=False,
272 action='store_true',
273 default=False,
274 help='Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)')
275 parser.add_argument('-q',
276 dest='quiet',
277 required=False,
278 action='store_true',
279 default=False,
280 help='Disable progress output (default: false)')
281
282 return parser.parse_args()
283
284if __name__ == '__main__':
285 main()