5.2. Conformer Ensembles
The script gen_confs.py generates a conformer ensemble [5] for each molecule read from the specified input file and writes the resulting ensembles to the desired output file.
Synopsis
python gen_confs.py [-h] -i <file> -o <file> [-e <float>] [-r <float>] [-t <int>] [-n <int>] [-q]
Mandatory options
- -i <file>
Molecule input file
- -o <file>
Conformer ensemble output file
Other options
- -h, --help
Show help message and exit
- -e <float>
Output conformer energy window (default: 20.0)
- -r <float>
Output conformer RMSD threshold (default: 0.5)
- -t <int>
Max. allowed molecule processing time (default: 3600 sec)
- -n <int>
Max. output ensemble size (default: 100)
- -f <SMARTS>
SMARTS pattern describing a substructure that shall be kept fixed during conformer generation
- -a
Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.ConfGen as ConfGen
6
7
8# generates a conformer ensemble of the argument molecule using
9# the provided initialized ConfGen.ConformerGenerator instance
10def genConfEnsemble(mol: Chem.Molecule, conf_gen: ConfGen.ConformerGenerator, sub_srch: Chem.SubstructureSearch, align_confs: bool, quiet: bool) -> (int, int):
11 # prepare the molecule for conformer generation
12 ConfGen.prepareForConformerGeneration(mol)
13
14 # check if a fixed substructure pattern has been specified and if there are matches
15 if sub_srch != None and sub_srch.findMappings(mol):
16 if not quiet:
17 print(' -> Found fixed substructure pattern match')
18
19 fixed_substruct = Chem.Fragment() # will store atoms and bonds of the matched substructure(s)
20
21 # extract all bonds (and associated atoms) of the first substructure matched by the pattern
22 for bond in sub_srch.getMapping(0).bondMapping.getValues():
23 fixed_substruct.addBond(bond)
24
25 # generate the conformer ensemble keeping the spec. substructure fixed
26 status = conf_gen.generate(mol, fixed_substruct)
27
28 else:
29 if sub_srch != None and not quiet:
30 print(' -> No fixed substructure pattern matches found!')
31
32 fixed_substruct = None
33
34 # generate the conformer ensemble
35 status = conf_gen.generate(mol)
36
37 num_confs = conf_gen.getNumConformers()
38
39 # if successful, store the generated conformer ensemble as
40 # per atom 3D coordinates arrays (= the way conformers are represented in CDPKit)
41 # and perform conformer alignment (if requested)
42 if status == ConfGen.ReturnCode.SUCCESS or status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
43 conf_gen.setConformers(mol)
44
45 # if enabled, perform an alignment of the generated conformers
46 # using the coordinates of the first one as reference
47 if align_confs:
48 if fixed_substruct != None: # align on fixed substructure atoms
49 Chem.alignConformations(mol, fixed_substruct)
50 else:
51 Chem.alignConformations(mol, mol) # align on all molecule atoms
52 else:
53 num_confs = 0
54
55 # return status code and the number of generated conformers
56 return (status, num_confs)
57
58def parseArgs() -> argparse.Namespace:
59 parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules.')
60
61 parser.add_argument('-i',
62 dest='in_file',
63 required=True,
64 metavar='<file>',
65 help='Molecule input file')
66 parser.add_argument('-o',
67 dest='out_file',
68 required=True,
69 metavar='<file>',
70 help='Conformer ensemble output file')
71 parser.add_argument('-e',
72 dest='e_window',
73 required=False,
74 metavar='<float>',
75 type=float,
76 default=20.0,
77 help='Output conformer energy window (default: 20.0)')
78 parser.add_argument('-r',
79 dest='min_rmsd',
80 required=False,
81 metavar='<float>',
82 type=float,
83 default=0.5,
84 help='Output conformer RMSD threshold (default: 0.5)')
85 parser.add_argument('-t',
86 dest='max_time',
87 required=False,
88 metavar='<int>',
89 type=int,
90 default=3600,
91 help='Max. allowed molecule processing time (default: 3600 sec)')
92 parser.add_argument('-n',
93 dest='max_confs',
94 required=False,
95 metavar='<int>',
96 type=int,
97 default=100,
98 help='Max. output ensemble size (default: 100)')
99 parser.add_argument('-f',
100 dest='fix_ptn',
101 required=False,
102 metavar='<SMARTS>',
103 help='SMARTS pattern describing a substructure that shall be kept fixed during conformer generation')
104 parser.add_argument('-a',
105 dest='align_confs',
106 required=False,
107 action='store_true',
108 default=False,
109 help='Align generated conformers on the fixed part of the input structure (if specified) or on the whole structure (default: false)')
110 parser.add_argument('-q',
111 dest='quiet',
112 required=False,
113 action='store_true',
114 default=False,
115 help='Disable progress output (default: false)')
116
117 return parser.parse_args()
118
119def main() -> None:
120 args = parseArgs()
121
122 # create reader for input molecules (format specified by file extension)
123 reader = Chem.MoleculeReader(args.in_file)
124
125 # create writer for the generated conformer ensembles (format specified by file extension)
126 writer = Chem.MolecularGraphWriter(args.out_file)
127
128 # parse the SMARTS pattern describing fixed substructures, if specified
129 if args.fix_ptn:
130 try:
131 sub_srch_ptn = Chem.parseSMARTS(args.fix_ptn)
132 except Exception as e:
133 sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
134
135 # create and initialize an instance of the class Chem.SubstructureSearch that
136 # implements the substructure searching algorithm
137 sub_srch = Chem.SubstructureSearch(sub_srch_ptn)
138 sub_srch.maxNumMappings = 1 # find only only one match
139 else:
140 sub_srch = None
141
142 # create and initialize an instance of the class ConfGen.ConformerGenerator which
143 # will perform the actual conformer ensemble generation work
144 conf_gen = ConfGen.ConformerGenerator()
145
146 conf_gen.settings.timeout = args.max_time * 1000 # apply the -t argument
147 conf_gen.settings.minRMSD = args.min_rmsd # apply the -r argument
148 conf_gen.settings.energyWindow = args.e_window # apply the -e argument
149 conf_gen.settings.maxNumOutputConformers = args.max_confs # apply the -n argument
150
151 # dictionary mapping status codes to human readable strings
152 status_to_str = { ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
153 ConfGen.ReturnCode.TIMEOUT : 'max. processing time exceeded',
154 ConfGen.ReturnCode.ABORTED : 'aborted',
155 ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed',
156 ConfGen.ReturnCode.FORCEFIELD_MINIMIZATION_FAILED : 'force field structure refinement failed',
157 ConfGen.ReturnCode.FRAGMENT_LIBRARY_NOT_SET : 'fragment library not available',
158 ConfGen.ReturnCode.FRAGMENT_CONF_GEN_FAILED : 'fragment conformer generation failed',
159 ConfGen.ReturnCode.FRAGMENT_CONF_GEN_TIMEOUT : 'fragment conformer generation timeout',
160 ConfGen.ReturnCode.FRAGMENT_ALREADY_PROCESSED : 'fragment already processed',
161 ConfGen.ReturnCode.TORSION_DRIVING_FAILED : 'torsion driving failed',
162 ConfGen.ReturnCode.CONF_GEN_FAILED : 'conformer generation failed',
163 ConfGen.ReturnCode.NO_FIXED_SUBSTRUCT_COORDS : 'fixed substructure atoms do not provide 3D coordinates' }
164
165 # create an instance of the default implementation of the Chem.Molecule interface
166 mol = Chem.BasicMolecule()
167 i = 1
168
169 # read and process molecules one after the other until the end of input has been reached
170 try:
171 while reader.read(mol):
172 # compose a simple molecule identifier
173 mol_id = Chem.getName(mol).strip()
174
175 if mol_id == '':
176 mol_id = '#' + str(i) # fallback if name is empty
177 else:
178 mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
179
180 if not args.quiet:
181 print('- Generating conformers for molecule %s...' % mol_id)
182
183 try:
184 # generate conformer ensemble for read molecule
185 status, num_confs = genConfEnsemble(mol, conf_gen, sub_srch, args.align_confs, args.quiet)
186
187 # check for severe error reported by status code
188 if status != ConfGen.ReturnCode.SUCCESS and status != ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
189 if args.quiet:
190 print('Error: conformer ensemble generation for molecule %s failed: %s' % (mol_id, status_to_str[status]))
191 else:
192 print(' -> Conformer ensemble generation failed: %s' % status_to_str[status])
193
194 elif not args.quiet: # arrives here only if no severe error occurred
195 if status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
196 print(' -> Generated %s conformers (warning: too much top. symmetry - output ensemble may contain duplicates)' % str(num_confs))
197 else:
198 print(' -> Generated %s conformer(s)' % str(num_confs))
199
200 # output generated ensemble (if available)
201 if num_confs > 0:
202 if not writer.write(mol):
203 sys.exit('Error: output of conformer ensemble for molecule %s failed' % mol_id)
204
205 except Exception as e:
206 sys.exit('Error: conformer ensemble generation or output for molecule %s failed: %s' % (mol_id, str(e)))
207
208 i += 1
209
210 except Exception as e: # handle exception raised in case of severe read errors
211 sys.exit('Error: reading molecule failed: ' + str(e))
212
213 writer.close()
214 sys.exit(0)
215
216if __name__ == '__main__':
217 main()