1.1.5.1. Enumeration of Stereoisomers

The script enum_stereo.py reads molecules from a given input file, enumerates all possible stereoisomers and writes the result to the specified output file.

Synopsis

python enum_stereo.py [-h] -i <file> -o <file> [-q]

Mandatory options

-i <file>

Molecule input file

-o <file>

Stereoisomer output file

Other options

-h, --help

Show help message and exit

-m <int>

Maximum number of output stereoisomers per molecule (default: 0, must be >= 0, 0 disables limit)

-a <true|false>

Enumerate configurations of atom stereocenters (default: true)

-b <true|false>

Enumerate configurations of bond stereocenters (default: true)

-s

Include specified atom/bond stereocenters (default: false)

-x

Include atom/bond stereocenters with topological symmetry (default: false)

-n

Include invertible nitrogen stereocenters (default: false)

-g

Include bridgehead atom stereocenters (default: false)

-r

Include ring bond stereocenters (default: false)

-R <int>

Minimum size of rings below which the configuration of member bonds shall not be altered (only effective if option -r has been specified; default: 8)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5
  6
  7# enumerates and outputs stereoisomers of the argument molecule using
  8# the provided initialized Chem.StereoisomerGenerator instance
  9def enumStereoisomers(mol: Chem.Molecule, iso_gen: Chem.StereoisomerGenerator, mol_id: str,
 10                      writer: Chem.MolecularGraphWriter, args: argparse.Namespace) -> int:
 11
 12    # compute required atom, bond and molecule properties
 13    Chem.calcBasicProperties(mol, False)
 14    Chem.perceiveAtomStereoCenters(mol, False, False, not args.inc_inv_n, not args.inc_inv_n)
 15    Chem.perceiveBondStereoCenters(mol, False, False, True, True, min(8, args.min_ring_size))
 16    Chem.calcAtomStereoDescriptors(mol, False, 0)
 17    Chem.calcBondStereoDescriptors(mol, False, 0)
 18    Chem.calcCIPPriorities(mol, False)
 19    Chem.perceiveComponents(mol, False)
 20    Chem.setAtomSymbolsFromTypes(mol, False)
 21
 22    # setup stereoisomer enumeration for the given molecule
 23    iso_gen.setup(mol)
 24
 25    num_gen_iso = 0
 26
 27    # save original molecule name
 28    mol_name = Chem.getName(mol) 
 29
 30    # enumerate all stereoisomers until done
 31    while args.max_num_iso == 0 or num_gen_iso < args.max_num_iso:
 32        # apply stereoisomer atom config. descriptors
 33        for i in range(mol.numAtoms):
 34            Chem.setStereoDescriptor(mol.atoms[i], iso_gen.atomDescriptors[i])
 35
 36        # apply stereoisomer bond config. descriptors
 37        for i in range(mol.numBonds):
 38            Chem.setStereoDescriptor(mol.bonds[i], iso_gen.bondDescriptors[i])
 39
 40        num_gen_iso += 1
 41
 42        # append stereoisomer number suffix to molecule name
 43        Chem.setName(mol, f'{mol_name}_{num_gen_iso}')
 44
 45        # write stereoisomer to output file
 46        if not writer.write(mol):   
 47            sys.exit(f'Error: output of generated stereoisomer #{num_gen_iso} of molecule {mol_id} failed')
 48
 49        # generate next stereoisomer (if available)
 50        if not iso_gen.generate():
 51            break
 52
 53    return num_gen_iso
 54                        
 55def parseArgs() -> argparse.Namespace:
 56    def strtobool(value: str) -> bool:
 57        value = value.lower()
 58        
 59        if value in ('y', 'yes', 'on', '1', 'true', 't'):
 60            return True
 61        
 62        return False
 63
 64    parser = argparse.ArgumentParser(description='Enumerates stereoisomers of the given input molecules.')
 65
 66    parser.add_argument('-i',
 67                        dest='in_file',
 68                        required=True,
 69                        metavar='<file>',
 70                        help='Molecule input file')
 71    parser.add_argument('-o',
 72                        dest='out_file',
 73                        required=True,
 74                        metavar='<file>',
 75                        help='Stereoisomer output file')
 76    parser.add_argument('-m',
 77                        dest='max_num_iso',
 78                        required=False,
 79                        metavar='<int>',
 80                        type=int,
 81                        default=0,
 82                        help='Maximum number of output stereoisomers per molecule (default: 0, must be >= 0, 0 disables limit)')
 83    parser.add_argument('-a',
 84                        dest='enum_atom_cfg',
 85                        required=False,
 86                        metavar='<true|false>',
 87                        type=lambda x:bool(strtobool(x)),
 88                        default=True,
 89                        help='Enumerate configurations of atom stereocenters (default: true)')
 90    parser.add_argument('-b',
 91                        dest='enum_bond_cfg',
 92                        required=False,
 93                        metavar='<true|false>',
 94                        type=lambda x:bool(strtobool(x)),
 95                        default=True,
 96                        help='Enumerate configurations of bond stereocenters (default: true)')
 97    parser.add_argument('-s',
 98                        dest='inc_spec_ctrs',
 99                        required=False,
100                        action='store_true',
101                        default=False,
102                        help='Include specified atom/bond stereocenters (default: false)')
103    parser.add_argument('-x',
104                        dest='inc_sym_ctrs',
105                        required=False,
106                        action='store_true',
107                        default=False,
108                        help='Include atom/bond stereocenters with topological symmetry (default: false)')
109    parser.add_argument('-n',
110                        dest='inc_inv_n',
111                        required=False,
112                        action='store_true',
113                        default=False,
114                        help='Include invertible nitrogen stereocenters (default: false)')
115    parser.add_argument('-g',
116                        dest='inc_bh_atoms',
117                        required=False,
118                        action='store_true',
119                        default=False,
120                        help='Include bridgehead atom stereocenters (default: false)')
121    parser.add_argument('-r',
122                        dest='inc_ring_bonds',
123                        required=False,
124                        action='store_true',
125                        default=False,
126                        help='Include ring bond stereocenters (default: false)')
127    parser.add_argument('-R',
128                        dest='min_ring_size',
129                        required=False,
130                        metavar='<int>',
131                        type=int,
132                        default=8,
133                        help='Minimum size of rings below which the configuration of member bonds shall not be\
134                              altered (only effective if option -r is true; default: 8)')
135    parser.add_argument('-q',
136                        dest='quiet',
137                        required=False,
138                        action='store_true',
139                        default=False,
140                        help='Disable progress output (default: false)')
141    
142    return parser.parse_args()
143
144def main() -> None:
145    args = parseArgs()
146    
147    # create reader for input molecules (format specified by file extension)
148    reader = Chem.MoleculeReader(args.in_file) 
149
150    # create writer for the generated stereoisomers (format specified by file extension)
151    writer = Chem.MolecularGraphWriter(args.out_file) 
152
153    # create and initialize an instance of the class Chem.StereoisomerGenerator which
154    # will perform the actual stereoisomer enumeration work
155    iso_gen = Chem.StereoisomerGenerator()
156
157    iso_gen.enumAtomConfig = args.enum_atom_cfg      # apply option -a
158    iso_gen.enumBondConfig = args.enum_atom_cfg      # apply option -b
159    iso_gen.incSpecifiedCenters = args.inc_spec_ctrs # apply option -s
160    iso_gen.incSymmetricCenters = args.inc_sym_ctrs  # apply option -x
161    iso_gen.incBridgeheadAtoms = args.inc_bh_atoms   # apply option -g
162    iso_gen.incInvertibleNitrogens = args.inc_inv_n  # apply option -n
163    iso_gen.incRingBonds = args.inc_ring_bonds       # apply option -r
164    iso_gen.minRingSize = args.min_ring_size         # apply option -R
165    
166    # create an instance of the default implementation of the Chem.Molecule interface
167    mol = Chem.BasicMolecule()
168    i = 1
169    
170    # read and process molecules one after the other until the end of input has been reached
171    try:
172        while reader.read(mol):
173            # compose a simple molecule identifier
174            mol_id = Chem.getName(mol).strip() 
175
176            if mol_id == '':
177                mol_id = '#' + str(i) # fallback if name is empty
178            else:
179                mol_id = f'\'{mol_id}\' (#{i})'
180
181            if not args.quiet:
182                print(f'- Enumerating stereoisomers for molecule {mol_id}...')
183
184            try:
185                # enumerate and output stereoisomers of the read molecule
186                num_gen = enumStereoisomers(mol, iso_gen, mol_id, writer, args) 
187
188                if not args.quiet:  # arrives here only if no severe error occurred
189                    print(f' -> Generated {num_gen} isomer(s)')
190                        
191            except Exception as e:
192                sys.exit(f'Error: enumerating stereoisomers of molecule {mol_id} failed: ' + str(e))
193
194            i += 1
195                
196    except Exception as e: # handle exception raised in case of severe read errors
197        sys.exit('Error: reading molecule failed: ' + str(e))
198
199    writer.close()
200    sys.exit(0)
201
202if __name__ == '__main__':
203    main()

Download source file