1import sys
2import os
3import argparse
4
5import CDPL.Chem as Chem
6import CDPL.Descr as Descr
7import CDPL.Util as Util
8
9
10# generates the binary ECFP for the given molecule
11def genECFP(mol: Chem.Molecule, num_bits: int, radius: int, inc_hs: bool, inc_config: bool) -> Util.BitSet:
12 Chem.calcImplicitHydrogenCounts(mol, False) # calculate implicit hydrogen counts (if not yet done)
13 Chem.perceiveHybridizationStates(mol, False) # perceive atom hybridization states and set corresponding property for all atoms
14 Chem.setRingFlags(mol, False) # perceive cycles and set corresponding atom and bond properties
15 Chem.perceiveSSSR(mol, False) # perceive smallest set of smallest rings and store as Chem.MolecularGraph property
16 Chem.setAromaticityFlags(mol, False) # perceive aromaticity and set corresponding atom and bond properties
17
18 ecfp_gen = Descr.CircularFingerprintGenerator() # create ECFP generator instance
19
20 if inc_config:
21 ecfp_gen.includeChirality(True) # allow atom chirality to have an impact on the ECFP generation
22 Chem.calcAtomStereoDescriptors(mol, False) # calculate atom stereo descriptors and set corresponding property for all atoms
23
24 if inc_hs:
25 ecfp_gen.includeHydrogens(True) # include explicit hydrogens in the ECFP generation
26 Chem.makeHydrogenComplete(mol) # make any implicit hydrogens explicit
27
28 fp = Util.BitSet() # create fingerprint bitset
29 fp.resize(num_bits) # set desired fingerprint size
30
31 ecfp_gen.setNumIterations(radius) # set num. iterations (=atom. env. radius)
32 ecfp_gen.generate(mol) # extract chracteristic structural features
33 ecfp_gen.setFeatureBits(fp) # set bits associated with the extracted structural features
34
35 # if needed, fp could be converted into a numpy single precision float array as follows:
36 # fp = numpy.array(fp, dtype=numpy.float32)
37
38 return fp
39
40def parseArgs() -> argparse.Namespace:
41 parser = argparse.ArgumentParser(description='Calculates extended connectivity fingerprints (ECFPs) for given input molecules.')
42
43 parser.add_argument('-i',
44 dest='in_file',
45 required=True,
46 metavar='<file>',
47 help='Input molecule file')
48 parser.add_argument('-o',
49 dest='out_file',
50 required=True,
51 metavar='<file>',
52 help='ECFP fingerprint output file')
53 parser.add_argument('-n',
54 dest='num_bits',
55 required=False,
56 metavar='<integer>',
57 default=1024,
58 help='Fingerprint size in bits (default: 1024)',
59 type=int)
60 parser.add_argument('-r',
61 dest='radius',
62 required=False,
63 metavar='<integer>',
64 default=2,
65 help='Max. atom environment radius in number of bonds (default: 2)',
66 type=int)
67 parser.add_argument('-y',
68 dest='inc_hs',
69 required=False,
70 action='store_true',
71 default=False,
72 help='Do not ignore hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
73 parser.add_argument('-c',
74 dest='inc_config',
75 required=False,
76 action='store_true',
77 default=False,
78 help='Include atom chirality (by default, atom chirality is not considered)')
79
80 return parser.parse_args()
81
82def main() -> None:
83 args = parseArgs()
84
85 # create reader for input molecules (format specified by file extension)
86 reader = Chem.MoleculeReader(args.in_file)
87
88 # open output file storing the generated fingerprints
89 out_file = open(args.out_file, 'w')
90
91 # create an instance of the default implementation of the Chem.Molecule interface
92 mol = Chem.BasicMolecule()
93
94 # read and process molecules one after the other until the end of input has been reached
95 try:
96 while reader.read(mol):
97 try:
98 fp = genECFP(mol, args.num_bits, args.radius, args.inc_hs, args.inc_config)
99
100 out_file.write(str(fp))
101 out_file.write('\n')
102
103 except Exception as e:
104 sys.exit('Error: processing of molecule failed: ' + str(e))
105
106 except Exception as e: # handle exception raised in case of severe read errors
107 sys.exit('Error: reading molecule failed: ' + str(e))
108
109 out_file.close()
110 sys.exit(0)
111
112if __name__ == '__main__':
113 main()