5.2. Conformer Ensembles

The script gen_confs.py generates a conformer ensemble [5] for each molecule read from the specified input file and writes the resulting ensembles to the desired output file.

Synopsis

python gen_confs.py [-h] -i <file> -o <file> [-e <float>] [-r <float>] [-t <int>] [-n <int>] [-q]

Mandatory options

-i <file>

Molecule input file

-o <file>

Conformer ensemble output file

Other options

-h, --help

Show help message and exit

-e <float>

Output conformer energy window (default: 20.0)

-r <float>

Output conformer RMSD threshold (default: 0.5)

-t <int>

Max. allowed molecule processing time (default: 3600 sec)

-n <int>

Max. output ensemble size (default: 100)

-f <SMARTS>

SMARTS pattern describing a substructure that shall be kept fixed during conformer generation

-a

Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.ConfGen as ConfGen
  6
  7
  8# generates a conformer ensemble of the argument molecule using
  9# the provided initialized ConfGen.ConformerGenerator instance
 10def genConfEnsemble(mol: Chem.Molecule, conf_gen: ConfGen.ConformerGenerator, sub_srch: Chem.SubstructureSearch, align_confs: bool, quiet: bool) -> (int, int):
 11    # prepare the molecule for conformer generation
 12    ConfGen.prepareForConformerGeneration(mol)
 13
 14    # check if a fixed substructure pattern has been specified and if there are matches
 15    if sub_srch != None and sub_srch.findMappings(mol): 
 16        if not quiet:
 17            print(' -> Found fixed substructure pattern match')
 18            
 19        fixed_substruct = Chem.Fragment()  # will store atoms and bonds of the matched substructure(s)
 20
 21        # extract all bonds (and associated atoms) of the first substructure matched by the pattern
 22        for bond in sub_srch.getMapping(0).bondMapping.getValues():
 23            fixed_substruct.addBond(bond)
 24
 25        # generate the conformer ensemble keeping the spec. substructure fixed
 26        status = conf_gen.generate(mol, fixed_substruct)
 27
 28    else:
 29        if sub_srch != None and not quiet:
 30            print(' -> No fixed substructure pattern matches found!')
 31
 32        fixed_substruct = None
 33
 34        # generate the conformer ensemble
 35        status = conf_gen.generate(mol)       
 36    
 37    num_confs = conf_gen.getNumConformers()
 38    
 39    # if successful, store the generated conformer ensemble as
 40    # per atom 3D coordinates arrays (= the way conformers are represented in CDPKit)
 41    # and perform conformer alignment (if requested)
 42    if status == ConfGen.ReturnCode.SUCCESS or status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
 43        conf_gen.setConformers(mol)
 44
 45        # if enabled, perform an alignment of the generated conformers
 46        # using the coordinates of the first one as reference
 47        if align_confs:
 48            if fixed_substruct != None:           # align on fixed substructure atoms
 49                Chem.alignConformations(mol, fixed_substruct)
 50            else:
 51                Chem.alignConformations(mol, mol) # align on all molecule atoms
 52    else:
 53        num_confs = 0
 54        
 55    # return status code and the number of generated conformers
 56    return (status, num_confs)
 57        
 58def parseArgs() -> argparse.Namespace:
 59    parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules.')
 60
 61    parser.add_argument('-i',
 62                        dest='in_file',
 63                        required=True,
 64                        metavar='<file>',
 65                        help='Molecule input file')
 66    parser.add_argument('-o',
 67                        dest='out_file',
 68                        required=True,
 69                        metavar='<file>',
 70                        help='Conformer ensemble output file')
 71    parser.add_argument('-e',
 72                        dest='e_window',
 73                        required=False,
 74                        metavar='<float>',
 75                        type=float,
 76                        default=20.0,
 77                        help='Output conformer energy window (default: 20.0)')
 78    parser.add_argument('-r',
 79                        dest='min_rmsd',
 80                        required=False,
 81                        metavar='<float>',
 82                        type=float,
 83                        default=0.5,
 84                        help='Output conformer RMSD threshold (default: 0.5)')
 85    parser.add_argument('-t',
 86                        dest='max_time',
 87                        required=False,
 88                        metavar='<int>',
 89                        type=int,
 90                        default=3600,
 91                        help='Max. allowed molecule processing time (default: 3600 sec)')
 92    parser.add_argument('-n',
 93                        dest='max_confs',
 94                        required=False,
 95                        metavar='<int>',
 96                        type=int,
 97                        default=100,
 98                        help='Max. output ensemble size (default: 100)')
 99    parser.add_argument('-f',
100                        dest='fix_ptn',
101                        required=False,
102                        metavar='<SMARTS>',
103                        help='SMARTS pattern describing a substructure that shall be kept fixed during conformer generation')
104    parser.add_argument('-a',
105                        dest='align_confs',
106                        required=False,
107                        action='store_true',
108                        default=False,
109                        help='Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)')
110    parser.add_argument('-q',
111                        dest='quiet',
112                        required=False,
113                        action='store_true',
114                        default=False,
115                        help='Disable progress output (default: false)')
116    
117    return parser.parse_args()
118
119def main() -> None:
120    args = parseArgs()
121    
122    # create reader for input molecules (format specified by file extension)
123    reader = Chem.MoleculeReader(args.in_file) 
124
125    # create writer for the generated conformer ensembles (format specified by file extension)
126    writer = Chem.MolecularGraphWriter(args.out_file) 
127
128    # parse the SMARTS pattern describing fixed substructures, if specified
129    if args.fix_ptn:
130        try:
131            sub_srch_ptn = Chem.parseSMARTS(args.fix_ptn)
132        except Exception as e:
133            sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
134
135        # create and initialize an instance of the class Chem.SubstructureSearch that
136        # implements the substructure searching algorithm
137        sub_srch = Chem.SubstructureSearch(sub_srch_ptn)
138        sub_srch.maxNumMappings = 1  # find only only one match
139    else:
140        sub_srch = None
141
142    # create and initialize an instance of the class ConfGen.ConformerGenerator which
143    # will perform the actual conformer ensemble generation work
144    conf_gen = ConfGen.ConformerGenerator()
145
146    conf_gen.settings.timeout = args.max_time * 1000          # apply the -t argument
147    conf_gen.settings.minRMSD = args.min_rmsd                 # apply the -r argument
148    conf_gen.settings.energyWindow = args.e_window            # apply the -e argument
149    conf_gen.settings.maxNumOutputConformers = args.max_confs # apply the -n argument
150
151    # dictionary mapping status codes to human readable strings
152    status_to_str = { ConfGen.ReturnCode.UNINITIALIZED                  : 'uninitialized',
153                      ConfGen.ReturnCode.TIMEOUT                        : 'max. processing time exceeded',
154                      ConfGen.ReturnCode.ABORTED                        : 'aborted',
155                      ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED        : 'force field setup failed',
156                      ConfGen.ReturnCode.FORCEFIELD_MINIMIZATION_FAILED : 'force field structure refinement failed',
157                      ConfGen.ReturnCode.FRAGMENT_LIBRARY_NOT_SET       : 'fragment library not available',
158                      ConfGen.ReturnCode.FRAGMENT_CONF_GEN_FAILED       : 'fragment conformer generation failed',
159                      ConfGen.ReturnCode.FRAGMENT_CONF_GEN_TIMEOUT      : 'fragment conformer generation timeout',
160                      ConfGen.ReturnCode.FRAGMENT_ALREADY_PROCESSED     : 'fragment already processed',
161                      ConfGen.ReturnCode.TORSION_DRIVING_FAILED         : 'torsion driving failed',
162                      ConfGen.ReturnCode.CONF_GEN_FAILED                : 'conformer generation failed',
163                      ConfGen.ReturnCode.NO_FIXED_SUBSTRUCT_COORDS      : 'fixed substructure atoms do not provide 3D coordinates' }
164    
165    # create an instance of the default implementation of the Chem.Molecule interface
166    mol = Chem.BasicMolecule()
167    i = 1
168    
169    # read and process molecules one after the other until the end of input has been reached
170    try:
171        while reader.read(mol):
172            # compose a simple molecule identifier
173            mol_id = Chem.getName(mol).strip() 
174
175            if mol_id == '':
176                mol_id = '#' + str(i) # fallback if name is empty
177            else:
178                mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
179
180            if not args.quiet:
181                print('- Generating conformers for molecule %s...' % mol_id)
182
183            try:
184                # generate conformer ensemble for read molecule
185                status, num_confs = genConfEnsemble(mol, conf_gen, sub_srch, args.align_confs, args.quiet) 
186
187                # check for severe error reported by status code
188                if status != ConfGen.ReturnCode.SUCCESS and status != ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
189                    if args.quiet:
190                        print('Error: conformer ensemble generation for molecule %s failed: %s' % (mol_id, status_to_str[status]))
191                    else:
192                        print(' -> Conformer ensemble generation failed: %s' % status_to_str[status])
193
194                elif not args.quiet:  # arrives here only if no severe error occurred
195                    if status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
196                        print(' -> Generated %s conformers (warning: too much top. symmetry - output ensemble may contain duplicates)' % str(num_confs))
197                    else:
198                        print(' -> Generated %s conformer(s)' % str(num_confs))
199                        
200                # output generated ensemble (if available)
201                if num_confs > 0:
202                    if not writer.write(mol):   
203                        sys.exit('Error: output of conformer ensemble for molecule %s failed' % mol_id)
204                        
205            except Exception as e:
206                sys.exit('Error: conformer ensemble generation or output for molecule %s failed: %s' % (mol_id, str(e)))
207
208            i += 1
209                
210    except Exception as e: # handle exception raised in case of severe read errors
211        sys.exit('Error: reading molecule failed: ' + str(e))
212
213    writer.close()
214    sys.exit(0)
215
216if __name__ == '__main__':
217    main()

Download source file