5.2. Conformer Ensembles
The script gen_confs.py generates a conformer ensemble [5] for each molecule read from the specified input file and writes the resulting ensembles to the desired output file.
Synopsis
python gen_confs.py [-h] -i <file> -o <file> [-e <float>] [-r <float>] [-t <int>] [-n <int>] [-q]
Mandatory options
- -i <file>
Molecule input file
- -o <file>
Conformer ensemble output file
Other options
- -h, --help
Show help message and exit
- -e <float>
Output conformer energy window (default: 20.0)
- -r <float>
Output conformer RMSD threshold (default: 0.5)
- -t <int>
Max. allowed molecule processing time (default: 3600 sec)
- -n <int>
Max. output ensemble size (default: 100)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.ConfGen as ConfGen
6
7
8# generates a conformer ensemble of the argument molecule using
9# the provided initialized ConfGen.ConformerGenerator instance
10def genConfEnsemble(mol: Chem.Molecule, conf_gen: ConfGen.ConformerGenerator) -> (int, int):
11 # prepare the molecule for conformer generation
12 ConfGen.prepareForConformerGeneration(mol)
13
14 # generate the conformer ensemble
15 status = conf_gen.generate(mol)
16 num_confs = conf_gen.getNumConformers()
17
18 # if successful, store the generated conformer ensemble as
19 # per atom 3D coordinates arrays (= the way conformers are represented in CDPKit)
20 if status == ConfGen.ReturnCode.SUCCESS or status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
21 conf_gen.setConformers(mol)
22 else:
23 num_confs = 0
24
25 # return status code and the number of generated conformers
26 return (status, num_confs)
27
28def parseArgs() -> argparse.Namespace:
29 parser = argparse.ArgumentParser(description='Generates conformer ensembles for the given input molecules.')
30
31 parser.add_argument('-i',
32 dest='in_file',
33 required=True,
34 metavar='<file>',
35 help='Molecule input file')
36 parser.add_argument('-o',
37 dest='out_file',
38 required=True,
39 metavar='<file>',
40 help='Conformer ensemble output file')
41 parser.add_argument('-e',
42 dest='e_window',
43 required=False,
44 metavar='<float>',
45 type=float,
46 default=20.0,
47 help='Output conformer energy window (default: 20.0)')
48 parser.add_argument('-r',
49 dest='min_rmsd',
50 required=False,
51 metavar='<float>',
52 type=float,
53 default=0.5,
54 help='Output conformer RMSD threshold (default: 0.5)')
55 parser.add_argument('-t',
56 dest='max_time',
57 required=False,
58 metavar='<int>',
59 type=int,
60 default=3600,
61 help='Max. allowed molecule processing time (default: 3600 sec)')
62 parser.add_argument('-n',
63 dest='max_confs',
64 required=False,
65 metavar='<int>',
66 type=int,
67 default=100,
68 help='Max. output ensemble size (default: 100)')
69 parser.add_argument('-q',
70 dest='quiet',
71 required=False,
72 action='store_true',
73 default=False,
74 help='Disable progress output (default: false)')
75
76 return parser.parse_args()
77
78def main() -> None:
79 args = parseArgs()
80
81 # create reader for input molecules (format specified by file extension)
82 reader = Chem.MoleculeReader(args.in_file)
83
84 # create writer for the generated conformer ensembles (format specified by file extension)
85 writer = Chem.MolecularGraphWriter(args.out_file)
86
87 # create and initialize an instance of the class ConfGen.ConformerGenerator which
88 # will perform the actual conformer ensemble generation work
89 conf_gen = ConfGen.ConformerGenerator()
90
91 conf_gen.settings.timeout = args.max_time * 1000 # apply the -t argument
92 conf_gen.settings.minRMSD = args.min_rmsd # apply the -r argument
93 conf_gen.settings.energyWindow = args.e_window # apply the -e argument
94 conf_gen.settings.maxNumOutputConformers = args.max_confs # apply the -n argument
95
96 # dictionary mapping status codes to human readable strings
97 status_to_str = { ConfGen.ReturnCode.UNINITIALIZED : 'uninitialized',
98 ConfGen.ReturnCode.TIMEOUT : 'max. processing time exceeded',
99 ConfGen.ReturnCode.ABORTED : 'aborted',
100 ConfGen.ReturnCode.FORCEFIELD_SETUP_FAILED : 'force field setup failed',
101 ConfGen.ReturnCode.FORCEFIELD_MINIMIZATION_FAILED : 'force field structure refinement failed',
102 ConfGen.ReturnCode.FRAGMENT_LIBRARY_NOT_SET : 'fragment library not available',
103 ConfGen.ReturnCode.FRAGMENT_CONF_GEN_FAILED : 'fragment conformer generation failed',
104 ConfGen.ReturnCode.FRAGMENT_CONF_GEN_TIMEOUT : 'fragment conformer generation timeout',
105 ConfGen.ReturnCode.FRAGMENT_ALREADY_PROCESSED : 'fragment already processed',
106 ConfGen.ReturnCode.TORSION_DRIVING_FAILED : 'torsion driving failed',
107 ConfGen.ReturnCode.CONF_GEN_FAILED : 'conformer generation failed' }
108
109 # create an instance of the default implementation of the Chem.Molecule interface
110 mol = Chem.BasicMolecule()
111 i = 1
112
113 # read and process molecules one after the other until the end of input has been reached
114 try:
115 while reader.read(mol):
116 # compose a simple molecule identifier
117 mol_id = Chem.getName(mol).strip()
118
119 if mol_id == '':
120 mol_id = '#' + str(i) # fallback if name is empty
121 else:
122 mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
123
124 if not args.quiet:
125 print('- Generating conformers for molecule %s...' % mol_id)
126
127 try:
128 # generate conformer ensemble for read molecule
129 status, num_confs = genConfEnsemble(mol, conf_gen)
130
131 # check for severe error reported by status code
132 if status != ConfGen.ReturnCode.SUCCESS and status != ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
133 if args.quiet:
134 print('Error: conformer ensemble generation for molecule %s failed: %s' % (mol_id, status_to_str[status]))
135 else:
136 print(' -> Conformer ensemble generation failed: %s' % status_to_str[status])
137
138 elif not args.quiet: # arrives here only if no severe error occurred
139 if status == ConfGen.ReturnCode.TOO_MUCH_SYMMETRY:
140 print(' -> Generated %s conformers (warning: too much top. symmetry - output ensemble may contain duplicates)' % str(num_confs))
141 else:
142 print(' -> Generated %s conformer(s)' % str(num_confs))
143
144 # output generated ensemble (if available)
145 if num_confs > 0:
146 if not writer.write(mol):
147 sys.exit('Error: output of conformer ensemble for molecule %s failed' % mol_id)
148
149 except Exception as e:
150 sys.exit('Error: conformer ensemble generation or output for molecule %s failed: %s' % (mol_id, str(e)))
151
152 i += 1
153
154 except Exception as e: # handle exception raised in case of severe read errors
155 sys.exit('Error: reading molecule failed: ' + str(e))
156
157 writer.close()
158 sys.exit(0)
159
160if __name__ == '__main__':
161 main()