1.1.3.4. Atom/Bond Order Canonicalization

The script canon_mols.py reads molecules from a given input file, canonicalizes the atom and bond order and writes the result to the specified output file.

Synopsis

python canon_mols.py [-h] -i <file> -o <file> [-d] [-c] [-q]

Mandatory options

-i <file>

Molecule input file

-o <file>

Canonicalized molecule output file

Other options

-h, --help

Show help message and exit

-x

Ignore atom and bond stereochemistry (default: false)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5
  6
  7# calcutates a canonical atom numbering for the provided molecular graph and
  8# then reorders the atoms and bond according to the calculated numbering
  9def canonicalize(mol: Chem.MolecularGraph, ignore_stereo: bool) -> None:
 10    # calculate required basic atom and bond properties
 11    Chem.calcBasicProperties(mol, False)
 12
 13    # if necessary, do some atom/bond stereochemistry related setup work
 14    if not ignore_stereo:
 15        Chem.calcCIPPriorities(mol, False)
 16        Chem.perceiveAtomStereoCenters(mol, False, True)
 17        Chem.perceiveBondStereoCenters(mol, False, True)
 18        Chem.calcAtomStereoDescriptors(mol, False)
 19        Chem.calcBondStereoDescriptors(mol, False)
 20
 21    # flags specifying the default set of atom properties considered by the canonicalization algo. implementation
 22    atom_flags = Chem.CanonicalNumberingCalculator.DEF_ATOM_PROPERTY_FLAGS
 23
 24    # flags specifying the default set of bond properties considered by the canonicalization algo. implementation
 25    bond_flags = Chem.CanonicalNumberingCalculator.DEF_BOND_PROPERTY_FLAGS
 26
 27    # if stereochemistry is not considered then clear the corresponding property flags
 28    if ignore_stereo:
 29        atom_flags ^= Chem.AtomPropertyFlag.CONFIGURATION
 30        bond_flags ^= Chem.BondPropertyFlag.CONFIGURATION
 31
 32    # calculate canonical atom numbers (numbers will be stored as atom property Chem.AtomProperty.CANONICAL_NUMBER)
 33    Chem.calcCanonicalNumbering(mol, True, atom_flags, bond_flags)
 34
 35    # reorder all internal atom and bond lists according to the calculated canonical atom numbering
 36    Chem.canonicalize(mol)
 37        
 38def parseArgs() -> argparse.Namespace:
 39    parser = argparse.ArgumentParser(description='Canonicalized the atom and bond order of the given input molecules.')
 40
 41    parser.add_argument('-i',
 42                        dest='in_file',
 43                        required=True,
 44                        metavar='<file>',
 45                        help='Molecule input file')
 46    parser.add_argument('-o',
 47                        dest='out_file',
 48                        required=True,
 49                        metavar='<file>',
 50                        help='Canonicalized molecule output file')
 51    parser.add_argument('-x',
 52                        dest='ignore_stereo',
 53                        required=False,
 54                        action='store_true',
 55                        default=False,
 56                        help='Ignore atom and bond stereochemistry (default: false)')
 57    parser.add_argument('-q',
 58                        dest='quiet',
 59                        required=False,
 60                        action='store_true',
 61                        default=False,
 62                        help='Disable progress output (default: false)')
 63      
 64    return parser.parse_args()
 65
 66def main() -> None:
 67    args = parseArgs()
 68    
 69    # create reader for input molecules (format specified by file extension)
 70    reader = Chem.MoleculeReader(args.in_file) 
 71
 72    # create writer for the canonicalized molecules (format specified by file extension)
 73    writer = Chem.MolecularGraphWriter(args.out_file) 
 74 
 75    # create an instance of the default implementation of the Chem.Molecule interface
 76    mol = Chem.BasicMolecule()
 77    i = 1
 78    
 79    # read and process molecules one after the other until the end of input has been reached
 80    try:
 81        while reader.read(mol):
 82            # compose a simple molecule identifier
 83            mol_id = Chem.getName(mol).strip() 
 84
 85            if mol_id == '':
 86                mol_id = f'#{i}' # fallback if name is empty
 87            else:
 88                mol_id = f'\'{mol_id}\' (#{i})'
 89
 90            if not args.quiet:
 91                print(f'- Canonicalizing molecule {mol_id}...')
 92
 93            try:
 94                canonicalize(mol, args.ignore_stereo)
 95
 96                # output the laid out molecule
 97                if not writer.write(mol):   
 98                    sys.exit(f'Error: writing molecule {mol_id} failed')
 99                        
100            except Exception as e:
101                sys.exit(f'Error: Canonicalization or output of molecule {mol_id} failed:\n{str(e)}')
102
103            i += 1
104                
105    except Exception as e: # handle exception raised in case of severe read errors
106        sys.exit(f'Error: reading molecule failed:\n{str(e)}')
107
108    writer.close()
109    sys.exit(0)
110
111if __name__ == '__main__':
112    main()

Download source file