4.1.4. Bond-Path Fingerprints
The script gen_path_fp.py generates and outputs the bond-path (aka Daylight) fingerprint of molecules read from a specified input file.
Synopsis
python gen_path_fp.py [-h] -i <file> -o <file> [-n <integer>] [-l <integer>] [-u <integer>] [-H]
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Fingerprint output file
Other options
- -h, --help
Show help message and exit
- -n <integer>
Fingerprint size in bits (default: 1024)
- -l <integer>
Minimum path length to consider (in number of bonds, default: 0)
- -u <integer>
Maximum path length to consider (in number of bonds, default: 5)
- -H
Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.Descr as Descr
6import CDPL.Util as Util
7
8
9# generates the binary path fingerprint of the given molecule
10def genPathFingerprint(mol: Chem.Molecule, num_bits: int, min_len: int, max_len: int, inc_hs: bool) -> Util.BitSet:
11 Chem.calcBasicProperties(mol, False) # calculate basic molecular properties (if not yet done)
12
13 # apply option -H
14 if inc_hs:
15 Chem.makeHydrogenComplete(mol) # make any implicit hydrogens explicit
16 else:
17 Chem.makeHydrogenDeplete(mol) # make any explicit hydrogens implicit
18
19 fp_gen = Descr.PathFingerprintGenerator() # create path fingerprint generator instance
20
21 fp_gen.setMinPathLength(min_len) # set min. path length
22 fp_gen.setMaxPathLength(max_len) # set max. path length
23
24 fp = Util.BitSet() # create fingerprint bitset
25 fp.resize(num_bits) # set desired fingerprint size
26
27 # generate the fingerprint
28 fp_gen.generate(mol, fp)
29
30 # if needed, fp could be converted into a numpy single precision float array as follows:
31 # fp = numpy.array(fp, dtype=numpy.float32)
32
33 return fp
34
35def parseArgs() -> argparse.Namespace:
36 parser = argparse.ArgumentParser(description='Generates path (aka Daylight) fingerprints for given input molecules.')
37
38 parser.add_argument('-i',
39 dest='in_file',
40 required=True,
41 metavar='<file>',
42 help='Input molecule file')
43 parser.add_argument('-o',
44 dest='out_file',
45 required=True,
46 metavar='<file>',
47 help='Fingerprint output file')
48 parser.add_argument('-n',
49 dest='num_bits',
50 required=False,
51 metavar='<integer>',
52 default=1024,
53 help='Fingerprint size in bits (default: 1024)',
54 type=int)
55 parser.add_argument('-l',
56 dest='min_path_len',
57 required=False,
58 metavar='<integer>',
59 default=0,
60 help='Minimum path length to consider (in number of bonds, default: 0)',
61 type=int)
62 parser.add_argument('-u',
63 dest='max_path_len',
64 required=False,
65 metavar='<integer>',
66 default=5,
67 help='Maximum path length to consider (in number of bonds, default: 5)',
68 type=int)
69 parser.add_argument('-H',
70 dest='inc_hs',
71 required=False,
72 action='store_true',
73 default=False,
74 help='Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
75
76 return parser.parse_args()
77
78def main() -> None:
79 args = parseArgs()
80
81 # create reader for input molecules (format specified by file extension)
82 reader = Chem.MoleculeReader(args.in_file)
83
84 # open output file storing the generated fingerprints
85 out_file = open(args.out_file, 'w')
86
87 # create an instance of the default implementation of the Chem.Molecule interface
88 mol = Chem.BasicMolecule()
89
90 # read and process molecules one after the other until the end of input has been reached
91 try:
92 while reader.read(mol):
93 try:
94 fp = genPathFingerprint(mol, args.num_bits, args.min_path_len, args.max_path_len, args.inc_hs)
95
96 out_file.write(str(fp))
97 out_file.write('\n')
98
99 except Exception as e:
100 sys.exit('Error: processing of molecule failed: ' + str(e))
101
102 except Exception as e: # handle exception raised in case of severe read errors
103 sys.exit('Error: reading molecule failed: ' + str(e))
104
105 out_file.close()
106 sys.exit(0)
107
108if __name__ == '__main__':
109 main()