4.1.2. PubChem Fingerprints

The script gen_pubchem_fp.py generates and outputs the 881 bit PubChem fingerprint of molecules read from a specified input file.

Synopsis

python gen_pubchem_fp.py [-h] -i <file> -o <file>

Mandatory options

-i <file>

Input molecule file

-o <file>

Fingerprint output file

Other options

-h, --help

Show help message and exit

Code

 1import sys
 2import argparse
 3
 4import CDPL.Chem as Chem
 5import CDPL.Descr as Descr
 6import CDPL.Util as Util
 7
 8
 9# generates the PubChem fingerprint of the given molecular graph
10def genPubChemFingerprint(molgraph: Chem.MolecularGraph) -> Util.BitSet:
11    Chem.calcBasicProperties(molgraph, False)    # calculate basic molecular properties (if not yet done)
12   
13    fp_gen = Descr.PubChemFingerprintGenerator() # create PubChem fingerprint generator instance
14    fp = Util.BitSet()                           # create Util.BitSet instance storing the generated fingerprint
15
16    # generate the PubChem fingerprint
17    fp_gen.generate(molgraph, fp)
18
19    # if needed, fp could be converted into a numpy single precision float array as follows:
20    # fp = numpy.array(fp, dtype=numpy.float32)
21
22    return fp
23    
24def parseArgs() -> argparse.Namespace:
25    parser = argparse.ArgumentParser(description='Generates 881 bit PubChem fingerprints for given input molecules.')
26
27    parser.add_argument('-i',
28                        dest='in_file',
29                        required=True,
30                        metavar='<file>',
31                        help='Input molecule file')
32    parser.add_argument('-o',
33                        dest='out_file',
34                        required=True,
35                        metavar='<file>',
36                        help='Fingerprint output file')
37
38    return parser.parse_args()
39    
40def main() -> None:
41    args = parseArgs()
42
43    # create reader for input molecules (format specified by file extension)
44    reader = Chem.MoleculeReader(args.in_file) 
45
46    # open output file storing the generated fingerprints
47    out_file = open(args.out_file, 'w')
48    
49    # create an instance of the default implementation of the Chem.Molecule interface
50    mol = Chem.BasicMolecule()
51
52    # read and process molecules one after the other until the end of input has been reached
53    try:
54        while reader.read(mol):
55            try:
56                fp = genPubChemFingerprint(mol)
57
58                out_file.write(str(fp))
59                out_file.write('\n')
60
61            except Exception as e:
62                sys.exit('Error: processing of molecule failed: ' + str(e))
63                
64    except Exception as e: # handle exception raised in case of severe read errors
65        sys.exit('Error: reading molecule failed: ' + str(e))
66
67    out_file.close()
68    sys.exit(0)
69        
70if __name__ == '__main__':
71    main()

Download source file