4.1.2. PubChem Fingerprints
The script gen_pubchem_fp.py generates and outputs the 881 bit PubChem fingerprint of molecules read from a specified input file.
Synopsis
python gen_pubchem_fp.py [-h] -i <file> -o <file>
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Fingerprint output file
Other options
- -h, --help
Show help message and exit
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.Descr as Descr
6import CDPL.Util as Util
7
8
9# generates the PubChem fingerprint of the given molecular graph
10def genPubChemFingerprint(molgraph: Chem.MolecularGraph) -> Util.BitSet:
11 Chem.calcBasicProperties(molgraph, False) # calculate basic molecular properties (if not yet done)
12
13 fp_gen = Descr.PubChemFingerprintGenerator() # create PubChem fingerprint generator instance
14 fp = Util.BitSet() # create Util.BitSet instance storing the generated fingerprint
15
16 # generate the PubChem fingerprint
17 fp_gen.generate(molgraph, fp)
18
19 # if needed, fp could be converted into a numpy single precision float array as follows:
20 # fp = numpy.array(fp, dtype=numpy.float32)
21
22 return fp
23
24def parseArgs() -> argparse.Namespace:
25 parser = argparse.ArgumentParser(description='Generates 881 bit PubChem fingerprints for given input molecules.')
26
27 parser.add_argument('-i',
28 dest='in_file',
29 required=True,
30 metavar='<file>',
31 help='Input molecule file')
32 parser.add_argument('-o',
33 dest='out_file',
34 required=True,
35 metavar='<file>',
36 help='Fingerprint output file')
37
38 return parser.parse_args()
39
40def main() -> None:
41 args = parseArgs()
42
43 # create reader for input molecules (format specified by file extension)
44 reader = Chem.MoleculeReader(args.in_file)
45
46 # open output file storing the generated fingerprints
47 out_file = open(args.out_file, 'w')
48
49 # create an instance of the default implementation of the Chem.Molecule interface
50 mol = Chem.BasicMolecule()
51
52 # read and process molecules one after the other until the end of input has been reached
53 try:
54 while reader.read(mol):
55 try:
56 fp = genPubChemFingerprint(mol)
57
58 out_file.write(str(fp))
59 out_file.write('\n')
60
61 except Exception as e:
62 sys.exit('Error: processing of molecule failed: ' + str(e))
63
64 except Exception as e: # handle exception raised in case of severe read errors
65 sys.exit('Error: reading molecule failed: ' + str(e))
66
67 out_file.close()
68 sys.exit(0)
69
70if __name__ == '__main__':
71 main()