1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.Descr as Descr
6import CDPL.Util as Util
7
8
9# generates the binary ECFP for the given molecule
10def genECFP(mol: Chem.Molecule, num_bits: int, radius: int, inc_hs: bool, inc_config: bool) -> Util.BitSet:
11 Chem.calcImplicitHydrogenCounts(mol, False) # calculate implicit hydrogen counts (if not yet done)
12 Chem.perceiveHybridizationStates(mol, False) # perceive atom hybridization states and set corresponding property for all atoms
13 Chem.setRingFlags(mol, False) # perceive cycles and set corresponding atom and bond properties
14 Chem.perceiveSSSR(mol, False) # perceive smallest set of smallest rings and store as Chem.MolecularGraph property
15 Chem.setAromaticityFlags(mol, False) # perceive aromaticity and set corresponding atom and bond properties
16
17 ecfp_gen = Descr.CircularFingerprintGenerator() # create ECFP generator instance
18
19 if inc_config:
20 ecfp_gen.includeChirality(True) # allow atom chirality to have an impact on the ECFP generation
21 Chem.calcCIPPriorities(mol, False) # calculate atom symmetry classes for chiral atom perception and set corresponding property for all atoms
22 Chem.perceiveAtomStereoCenters(mol, False, True) # perceive chiral atoms and set corresponding property for all atoms
23 Chem.calcAtomStereoDescriptors(mol, False) # calculate atom stereo descriptors and set corresponding property for all atoms
24
25 if inc_hs:
26 ecfp_gen.includeHydrogens(True) # include explicit hydrogens in the ECFP generation
27 Chem.makeHydrogenComplete(mol) # make any implicit hydrogens explicit
28
29 fp = Util.BitSet() # create fingerprint bitset
30 fp.resize(num_bits) # set desired fingerprint size
31
32 ecfp_gen.setNumIterations(radius) # set num. iterations (=atom. env. radius)
33 ecfp_gen.generate(mol) # extract chracteristic structural features
34 ecfp_gen.setFeatureBits(fp) # set bits associated with the extracted structural features
35
36 # if needed, fp could be converted into a numpy single precision float array as follows:
37 # fp = numpy.array(fp, dtype=numpy.float32)
38
39 return fp
40
41def parseArgs() -> argparse.Namespace:
42 parser = argparse.ArgumentParser(description='Generates extended connectivity fingerprints (ECFPs) for given input molecules.')
43
44 parser.add_argument('-i',
45 dest='in_file',
46 required=True,
47 metavar='<file>',
48 help='Input molecule file')
49 parser.add_argument('-o',
50 dest='out_file',
51 required=True,
52 metavar='<file>',
53 help='ECFP fingerprint output file')
54 parser.add_argument('-n',
55 dest='num_bits',
56 required=False,
57 metavar='<integer>',
58 default=1024,
59 help='Fingerprint size in bits (default: 1024)',
60 type=int)
61 parser.add_argument('-r',
62 dest='radius',
63 required=False,
64 metavar='<integer>',
65 default=2,
66 help='Max. atom environment radius in number of bonds (default: 2)',
67 type=int)
68 parser.add_argument('-y',
69 dest='inc_hs',
70 required=False,
71 action='store_true',
72 default=False,
73 help='Do not ignore hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
74 parser.add_argument('-c',
75 dest='inc_config',
76 required=False,
77 action='store_true',
78 default=False,
79 help='Include atom chirality (by default, atom chirality is not considered)')
80
81 return parser.parse_args()
82
83def main() -> None:
84 args = parseArgs()
85
86 # create reader for input molecules (format specified by file extension)
87 reader = Chem.MoleculeReader(args.in_file)
88
89 # open output file storing the generated fingerprints
90 out_file = open(args.out_file, 'w')
91
92 # create an instance of the default implementation of the Chem.Molecule interface
93 mol = Chem.BasicMolecule()
94
95 # read and process molecules one after the other until the end of input has been reached
96 try:
97 while reader.read(mol):
98 try:
99 fp = genECFP(mol, args.num_bits, args.radius, args.inc_hs, args.inc_config)
100
101 out_file.write(str(fp))
102 out_file.write('\n')
103
104 except Exception as e:
105 sys.exit('Error: processing of molecule failed: ' + str(e))
106
107 except Exception as e: # handle exception raised in case of severe read errors
108 sys.exit('Error: reading molecule failed: ' + str(e))
109
110 out_file.close()
111 sys.exit(0)
112
113if __name__ == '__main__':
114 main()