5.3. Torsion Driving

The script tor_drive.py generates a conformer ensemble for each molecule read from the specified input file by performing torsion driving on the provided 3D structure and writes the resulting ensembles to the desired output file.

Synopsis

python tor_drive.py [-h] -i <file> -o <file> [-f <SMARTS>] [-e <float>] [-n <int>] [-a] [-r] [-q]

Mandatory options

-i <file>

Molecule input file

-o <file>

Conformer ensemble output file

Other options

-h, --help

Show help message and exit

-f <SMARTS>

SMARTS pattern describing substructures that shall be kept fixed during torsion driving

-e <float>

Output conformer energy window (default: 20.0)

-n <int>

Max. output ensemble size (default: 100; if <= 0 -> no limit)

-a

Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)

-r

Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.ConfGen as ConfGen
  6import CDPL.Util as Util
  7import CDPL.Math as Math
  8
  9
 10# generates a conformer ensemble by performing torsion driving on the argument molecule
 11# using the provided ConfGen.TorsionDriver instance
 12def performTorsionDriving(mol: Chem.Molecule, tor_driver: ConfGen.TorsionDriver, sub_srch: Chem.SubstructureSearch, args: argparse.Namespace) -> (int, int):
 13    # check if a fixed substructure pattern has been specified and if there are matches
 14    if sub_srch != None and sub_srch.findMappings(mol): 
 15        if not args.quiet:
 16            print(' -> Found %s fixed substructure pattern match(es)' % str(sub_srch.numMappings))
 17            
 18        fixed_substruct = Chem.Fragment()  # will store atoms and bonds of the matched substructure(s)
 19
 20        # extract all bonds (and associated atoms) of the substructure(s) matched by the pattern
 21        for mapping in sub_srch:
 22            for bond in mapping.bondMapping.getValues():
 23                fixed_substruct.addBond(bond)
 24
 25        # bond mask for the torsion driver specifying rotatable bonds
 26        rot_bonds = Util.BitSet(mol.numBonds)
 27
 28        # in case of a fixed substructure, rotatable bonds have to be identified manually
 29        for bond in mol.bonds:
 30            if fixed_substruct.containsBond(bond): # if bond is part of the fixed substruct. -> ignore
 31                continue
 32            
 33            if ConfGen.isRotatableBond(bond, mol, args.rot_term_het_grps):
 34                rot_bonds.set(bond.getIndex())
 35
 36        status = tor_driver.setup(mol, rot_bonds)  # setup torsion driver
 37
 38    else: # without a fixed substructure, let the torsion driver identify rotatable bonds
 39        if sub_srch != None and not args.quiet:
 40            print(' -> No fixed substructure pattern matches found!')
 41
 42        fixed_substruct = None
 43        status = tor_driver.setup(mol)             # setup torsion driver
 44
 45    # check if the torsion driver setup was successful
 46    if status != ConfGen.ReturnCode.SUCCESS: 
 47        return (status, 0) 
 48
 49    input_conf = Math.Vector3DArray()
 50
 51    # get input conformation
 52    Chem.get3DCoordinates(mol, input_conf, Chem.Atom3DCoordinatesFunctor()) 
 53
 54    # use input conformation for tor. driving
 55    tor_driver.addInputCoordinates(input_conf) 
 56
 57    # perform torsion driving
 58    status = tor_driver.generateConformers() 
 59
 60    # check if torsion driving was successful
 61    if status != ConfGen.ReturnCode.SUCCESS: 
 62         return (status, 0)
 63     
 64    num_confs = tor_driver.numConformers  # get num gen. conformers
 65    
 66    if args.max_confs > 0:              
 67        num_confs = min(args.max_confs, num_confs) # clamp to spec. max. num output confs
 68
 69    output_confs = []
 70
 71    # fetch generated confs. up to max. count
 72    for i in range(num_confs): 
 73        output_confs.append(tor_driver.getConformer(i)) 
 74
 75    # if desired, align output conformers
 76    if args.align_confs:
 77        if fixed_substruct != None:
 78            # align on fixed substructure
 79            alignConformers(fixed_substruct, input_conf, output_confs)
 80        else:
 81            # align on whole molecule
 82            alignConformers(mol, input_conf, output_confs)
 83            
 84    # erase existing conformers (if any)
 85    Chem.clearConformations(mol)
 86
 87    # transfer output confs. one-by-one to molecule
 88    for conf in output_confs:  
 89        Chem.addConformation(mol, conf)
 90    
 91    # return status code and the number of generated conformers
 92    return (status, num_confs)
 93
 94# aligns a set of conformers on the heavy atoms of a given reference structure (fixed substructure
 95# or input molecule)
 96def alignConformers(ref_struct: Chem.AtomContainer, ref_conf: Math.Vector3DArray, confs: []) -> None:
 97    # first, try to use only the heavy atoms of the reference structure for alignment
 98    ref_atom_inds = [atom.index for atom in ref_struct.atoms if Chem.getType(atom) != Chem.AtomType.H] 
 99    num_ref_atoms = len(ref_atom_inds)
100
101    # if num. heavy atoms < 3 (3 atoms are required for a defined orientation) use all atoms
102    if num_ref_atoms < 3: 
103        ref_atom_inds = [atom.index for atom in ref_struct.atoms] 
104        num_ref_atoms = len(ref_atom_inds)
105
106    if num_ref_atoms < 1: # in this case, an alignment is not possible
107        return
108        
109    # create instance of the class implementing Kabsch's alignment algorithm
110    kabsch_algo = Math.DKabschAlgorithm()  
111
112    ref_coords = Math.DMatrix(3, num_ref_atoms)   # matrix storing the reference 3D coordinates
113    algnd_coords = Math.DMatrix(3, num_ref_atoms) # matrix storing the 3D coordinates to align
114
115    # initialize matrix holding the reference coordinates (stored as columns)
116    for i in range(num_ref_atoms):
117        coords = ref_conf[ref_atom_inds[i]]
118        ref_coords[0, i] = coords[0]
119        ref_coords[1, i] = coords[1]
120        ref_coords[2, i] = coords[2]
121            
122    for algnd_conf in confs:   # for each conformer to align
123        # fetch coordinates of the atoms on which to align from the current conformer
124        for j in range(num_ref_atoms):
125            coords = algnd_conf[ref_atom_inds[j]]
126            algnd_coords[0, j] = coords[0]
127            algnd_coords[1, j] = coords[1]
128            algnd_coords[2, j] = coords[2]
129
130        # calculate alignment transformation and, if calc. was successful (should always be the case),
131        # apply the transformation to all atom positions assoc. with the current conformation
132        if kabsch_algo.align(algnd_coords, ref_coords):
133            Math.transform(algnd_conf, Math.Matrix4D(kabsch_algo.transform))
134        
135def parseArgs() -> argparse.Namespace:
136    parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules by performing torsion driving on the provided 3D structure.')
137
138    parser.add_argument('-i',
139                        dest='in_file',
140                        required=True,
141                        metavar='<file>',
142                        help='Molecule input file')
143    parser.add_argument('-o',
144                        dest='out_file',
145                        required=True,
146                        metavar='<file>',
147                        help='Conformer ensemble output file')
148    parser.add_argument('-f',
149                        dest='fix_ptn',
150                        required=False,
151                        metavar='<SMARTS>',
152                        help='SMARTS pattern describing substructures that shall be kept fixed during torsion driving')
153    parser.add_argument('-e',
154                        dest='e_window',
155                        required=False,
156                        metavar='<float>',
157                        type=float,
158                        default=20.0,
159                        help='Output conformer energy window (default: 20.0)')
160    parser.add_argument('-n',
161                        dest='max_confs',
162                        required=False,
163                        metavar='<int>',
164                        type=int,
165                        default=0,
166                        help='Max. output ensemble size (default: 100; if <= 0 -> no limit)')
167    parser.add_argument('-a',
168                        dest='align_confs',
169                        required=False,
170                        action='store_true',
171                        default=False,
172                        help='Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)')
173    parser.add_argument('-r',
174                        dest='rot_term_het_grps',
175                        required=False,
176                        action='store_true',
177                        default=False,
178                        help='Consider single bonds to terminal hetero atoms (= N, O, or S) as rotatable (default: false)')
179    parser.add_argument('-q',
180                        dest='quiet',
181                        required=False,
182                        action='store_true',
183                        default=False,
184                        help='Disable progress output (default: false)')
185    
186    return parser.parse_args()
187            
188def main() -> None:
189    args = parseArgs()
190    
191    # create reader for input molecules (format specified by file extension)
192    reader = Chem.MoleculeReader(args.in_file) 
193
194    # create writer for the generated conformer ensembles (format specified by file extension)
195    writer = Chem.MolecularGraphWriter(args.out_file) 
196
197    # parse the SMARTS pattern describing fixed substructures, if specified
198    if args.fix_ptn:
199        try:
200            sub_srch_ptn = Chem.parseSMARTS(args.fix_ptn)
201        except Exception as e:
202            sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
203
204        # create and initialize an instance of the class Chem.SubstructureSearch that
205        # implements the substructure searching algorithm
206        sub_srch = Chem.SubstructureSearch(sub_srch_ptn)
207        sub_srch.uniqueMappings = True # report only mappings that differ in matched atoms/bonds
208    else:
209        sub_srch = None
210        
211    # create and initialize an instance of the class ConfGen.TorsionDriver which
212    # will perform conformer generation by enumerating energetically favorable
213    # torsion angle combinations
214    tor_driver = ConfGen.TorsionDriver()
215
216    tor_driver.settings.energyWindow = args.e_window                    # apply the -e argument
217    tor_driver.settings.sampleHetAtomHydrogens = args.rot_term_het_grps # apply the -r argument
218    tor_driver.energyOrdered = True                                     # order generated confs. by energy
219    tor_driver.strictForceFieldParam = False                            # be tolerant
220    
221    # dictionary mapping status codes to human readable strings
222    status_to_str = { ConfGen.ReturnCode.UNINITIALIZED                  : 'uninitialized',
223                      ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED        : 'force field setup failed' }
224    
225    # create an instance of the default implementation of the Chem.Molecule interface
226    mol = Chem.BasicMolecule()
227    i = 1
228    
229    # read and process molecules one after the other until the end of input has been reached
230    try:
231        while reader.read(mol):
232            # compose a simple molecule identifier
233            mol_id = Chem.getName(mol).strip() 
234
235            if mol_id == '':
236                mol_id = '#' + str(i) # fallback if name is empty
237            else:
238                mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
239       
240            if not args.quiet:
241                print('- Generating conformers for molecule %s...' % mol_id)
242
243            try:
244                # prepare the molecule for torsion driving
245                ConfGen.prepareForConformerGeneration(mol) 
246
247                # check if all atoms have 3D coordinates, print an error message if not
248                if not Chem.hasCoordinates(mol, 3):
249                    if args.quiet:
250                        print('Error: torsion driving on molecule %s failed: no atom 3D coordinates provided' % mol_id)
251                    else:
252                        print(' -> Torsion driving failed: no atom 3D coordinates provided')
253                    continue
254                
255                # generate conformer ensemble for read molecule
256                status, num_confs = performTorsionDriving(mol, tor_driver, sub_srch, args) 
257
258                # check for severe error reported by status code
259                if status != ConfGen.ReturnCode.SUCCESS:
260                    if args.quiet:
261                        print('Error: torsion driving on molecule %s failed: %s' % (mol_id, status_to_str[status]))
262                    else:
263                        print(' -> Torsion driving failed: %s' % status_to_str[status])
264
265                elif not args.quiet:  # arrives here only if no severe error occurred
266                    print(' -> Generated %s conformer(s)' % str(num_confs))
267                        
268                # output generated ensemble (if available)
269                if num_confs > 0:
270                    if not writer.write(mol):   
271                        sys.exit('Error: output of generated conformers for molecule %s failed' % mol_id)
272                        
273            except Exception as e:
274                sys.exit('Error: torsion driving or output of generated conformers for molecule %s failed: %s' % (mol_id, str(e)))
275
276            i += 1
277                
278    except Exception as e: # handle exception raised in case of severe read errors
279        sys.exit('Error: reading molecule failed: ' + str(e))
280
281    writer.close()
282    sys.exit(0)
283
284if __name__ == '__main__':
285    main()

Download source file