4.1.4. Bond-Path Fingerprint
The script gen_path_fp.py generates and outputs the bond-path (aka Daylight) fingerprint of molecules read from a specified input file.
Synopsis
python gen_path_fp.py [-h] -i <file> -o <file> [-n <integer>] [-l <integer>] [-u <integer>] [-H]
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Fingerprint output file
Other options
- -h, --help
Show help message and exit
- -n <integer>
Fingerprint size in bits (default: 1024)
- -l <integer>
Minimum path length to consider (in number of bonds, default: 0)
- -u <integer>
Maximum path length to consider (in number of bonds, default: 5)
- -H
Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.Descr as Descr
6import CDPL.Util as Util
7
8
9# generates the binary path fingerprint of the given molecule
10def genPathFingerprint(mol: Chem.Molecule, num_bits: int, min_len: int, max_len: int, inc_hs: bool) -> Util.BitSet:
11 Chem.calcBasicProperties(mol, False) # calculate basic molecular properties (if not yet done)
12
13 fp_gen = Descr.PathFingerprintGenerator() # create path fingerprint generator instance
14
15 # apply option -H
16 if inc_hs:
17 Chem.makeHydrogenComplete(mol) # make any implicit hydrogens explicit
18 fp_gen.includeHydrogens(True) # default = exclude hydrogens
19
20 fp_gen.setMinPathLength(min_len) # set min. path length
21 fp_gen.setMaxPathLength(max_len) # set max. path length
22
23 fp = Util.BitSet() # create fingerprint bitset
24 fp.resize(num_bits) # set desired fingerprint size
25
26 # generate the fingerprint
27 fp_gen.generate(mol, fp)
28
29 # if needed, fp could be converted into a numpy single precision float array as follows:
30 # fp = numpy.array(fp, dtype=numpy.float32)
31
32 return fp
33
34def parseArgs() -> argparse.Namespace:
35 parser = argparse.ArgumentParser(description='Generates path (aka Daylight) fingerprints for given input molecules.')
36
37 parser.add_argument('-i',
38 dest='in_file',
39 required=True,
40 metavar='<file>',
41 help='Input molecule file')
42 parser.add_argument('-o',
43 dest='out_file',
44 required=True,
45 metavar='<file>',
46 help='Fingerprint output file')
47 parser.add_argument('-n',
48 dest='num_bits',
49 required=False,
50 metavar='<integer>',
51 default=1024,
52 help='Fingerprint size in bits (default: 1024)',
53 type=int)
54 parser.add_argument('-l',
55 dest='min_path_len',
56 required=False,
57 metavar='<integer>',
58 default=0,
59 help='Minimum path length to consider (in number of bonds, default: 0)',
60 type=int)
61 parser.add_argument('-u',
62 dest='max_path_len',
63 required=False,
64 metavar='<integer>',
65 default=5,
66 help='Maximum path length to consider (in number of bonds, default: 5)',
67 type=int)
68 parser.add_argument('-H',
69 dest='inc_hs',
70 required=False,
71 action='store_true',
72 default=False,
73 help='Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
74
75 return parser.parse_args()
76
77def main() -> None:
78 args = parseArgs()
79
80 # create reader for input molecules (format specified by file extension)
81 reader = Chem.MoleculeReader(args.in_file)
82
83 # open output file storing the generated fingerprints
84 out_file = open(args.out_file, 'w')
85
86 # create an instance of the default implementation of the Chem.Molecule interface
87 mol = Chem.BasicMolecule()
88
89 # read and process molecules one after the other until the end of input has been reached
90 try:
91 while reader.read(mol):
92 try:
93 fp = genPathFingerprint(mol, args.num_bits, args.min_path_len, args.max_path_len, args.inc_hs)
94
95 out_file.write(str(fp))
96 out_file.write('\n')
97
98 except Exception as e:
99 sys.exit('Error: processing of molecule failed:\n' + str(e))
100
101 except Exception as e: # handle exception raised in case of severe read errors
102 sys.exit('Error: reading molecule failed:\n' + str(e))
103
104 out_file.close()
105 sys.exit(0)
106
107if __name__ == '__main__':
108 main()