1.1.3.4. Atom/Bond Order Canonicalization
The script canon_mols.py reads molecules from a given input file, canonicalizes the atom and bond order and writes the result to the specified output file.
Synopsis
python canon_mols.py [-h] -i <file> -o <file> [-d] [-c] [-q]
Mandatory options
- -i <file>
Molecule input file
- -o <file>
Canonicalized molecule output file
Other options
- -h, --help
Show help message and exit
- -x
Ignore atom and bond stereochemistry (default: false)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5
6
7# calcutates a canonical atom numbering for the provided molecular graph and
8# then reorders the atoms and bond according to the calculated numbering
9def canonicalize(mol: Chem.MolecularGraph, ignore_stereo: bool) -> None:
10 # calculate required basic atom and bond properties
11 Chem.calcBasicProperties(mol, False)
12
13 # if necessary, do some atom/bond stereochemistry related setup work
14 if not ignore_stereo:
15 Chem.calcCIPPriorities(mol, False)
16 Chem.perceiveAtomStereoCenters(mol, False, True)
17 Chem.perceiveBondStereoCenters(mol, False, True)
18 Chem.calcAtomStereoDescriptors(mol, False)
19 Chem.calcBondStereoDescriptors(mol, False)
20
21 # flags specifying the default set of atom properties considered by the canonicalization algo. implementation
22 atom_flags = Chem.CanonicalNumberingCalculator.DEF_ATOM_PROPERTY_FLAGS
23
24 # flags specifying the default set of bond properties considered by the canonicalization algo. implementation
25 bond_flags = Chem.CanonicalNumberingCalculator.DEF_BOND_PROPERTY_FLAGS
26
27 # if stereochemistry is not considered then clear the corresponding property flags
28 if ignore_stereo:
29 atom_flags ^= Chem.AtomPropertyFlag.CONFIGURATION
30 bond_flags ^= Chem.BondPropertyFlag.CONFIGURATION
31
32 # calculate canonical atom numbers (numbers will be stored as atom property Chem.AtomProperty.CANONICAL_NUMBER)
33 Chem.calcCanonicalNumbering(mol, True, atom_flags, bond_flags)
34
35 # reorder all internal atom and bond lists according to the calculated canonical atom numbering
36 Chem.canonicalize(mol)
37
38def parseArgs() -> argparse.Namespace:
39 parser = argparse.ArgumentParser(description='Canonicalized the atom and bond order of the given input molecules.')
40
41 parser.add_argument('-i',
42 dest='in_file',
43 required=True,
44 metavar='<file>',
45 help='Molecule input file')
46 parser.add_argument('-o',
47 dest='out_file',
48 required=True,
49 metavar='<file>',
50 help='Canonicalized molecule output file')
51 parser.add_argument('-x',
52 dest='ignore_stereo',
53 required=False,
54 action='store_true',
55 default=False,
56 help='Ignore atom and bond stereochemistry (default: false)')
57 parser.add_argument('-q',
58 dest='quiet',
59 required=False,
60 action='store_true',
61 default=False,
62 help='Disable progress output (default: false)')
63
64 return parser.parse_args()
65
66def main() -> None:
67 args = parseArgs()
68
69 # create reader for input molecules (format specified by file extension)
70 reader = Chem.MoleculeReader(args.in_file)
71
72 # create writer for the canonicalized molecules (format specified by file extension)
73 writer = Chem.MolecularGraphWriter(args.out_file)
74
75 # create an instance of the default implementation of the Chem.Molecule interface
76 mol = Chem.BasicMolecule()
77 i = 1
78
79 # read and process molecules one after the other until the end of input has been reached
80 try:
81 while reader.read(mol):
82 # compose a simple molecule identifier
83 mol_id = Chem.getName(mol).strip()
84
85 if mol_id == '':
86 mol_id = f'#{i}' # fallback if name is empty
87 else:
88 mol_id = f'\'{mol_id}\' (#{i})'
89
90 if not args.quiet:
91 print(f'- Canonicalizing molecule {mol_id}...')
92
93 try:
94 canonicalize(mol, args.ignore_stereo)
95
96 # output the laid out molecule
97 if not writer.write(mol):
98 sys.exit(f'Error: writing molecule {mol_id} failed')
99
100 except Exception as e:
101 sys.exit(f'Error: Canonicalization or output of molecule {mol_id} failed:\n{str(e)}')
102
103 i += 1
104
105 except Exception as e: # handle exception raised in case of severe read errors
106 sys.exit(f'Error: reading molecule failed:\n{str(e)}')
107
108 writer.close()
109 sys.exit(0)
110
111if __name__ == '__main__':
112 main()