4.1.4. Bond-Path Fingerprints

The script gen_path_fp.py generates and outputs the bond-path (aka Daylight) fingerprint of molecules read from a specified input file.

Synopsis

python gen_path_fp.py [-h] -i <file> -o <file> [-n <integer>] [-l <integer>] [-u <integer>] [-H]

Mandatory options

-i <file>

Input molecule file

-o <file>

Fingerprint output file

Other options

-h, --help

Show help message and exit

-n <integer>

Fingerprint size in bits (default: 1024)

-l <integer>

Minimum path length to consider (in number of bonds, default: 0)

-u <integer>

Maximum path length to consider (in number of bonds, default: 5)

-H

Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.Descr as Descr
  6import CDPL.Util as Util
  7
  8
  9# generates the binary path fingerprint of the given molecule
 10def genPathFingerprint(mol: Chem.Molecule, num_bits: int, min_len: int, max_len: int, inc_hs: bool) -> Util.BitSet:
 11    Chem.calcBasicProperties(mol, False)      # calculate basic molecular properties (if not yet done)
 12
 13    # apply option -H
 14    if inc_hs:        
 15        Chem.makeHydrogenComplete(mol)        # make any implicit hydrogens explicit
 16    else:        
 17        Chem.makeHydrogenDeplete(mol)         # make any explicit hydrogens implicit
 18        
 19    fp_gen = Descr.PathFingerprintGenerator() # create path fingerprint generator instance
 20
 21    fp_gen.setMinPathLength(min_len)          # set min. path length
 22    fp_gen.setMaxPathLength(max_len)          # set max. path length
 23    
 24    fp = Util.BitSet()                        # create fingerprint bitset
 25    fp.resize(num_bits)                       # set desired fingerprint size
 26
 27    # generate the fingerprint
 28    fp_gen.generate(mol, fp)                  
 29
 30    # if needed, fp could be converted into a numpy single precision float array as follows:
 31    # fp = numpy.array(fp, dtype=numpy.float32)
 32    
 33    return fp
 34    
 35def parseArgs() -> argparse.Namespace:
 36    parser = argparse.ArgumentParser(description='Generates path (aka Daylight) fingerprints for given input molecules.')
 37
 38    parser.add_argument('-i',
 39                        dest='in_file',
 40                        required=True,
 41                        metavar='<file>',
 42                        help='Input molecule file')
 43    parser.add_argument('-o',
 44                        dest='out_file',
 45                        required=True,
 46                        metavar='<file>',
 47                        help='Fingerprint output file')
 48    parser.add_argument('-n',
 49                        dest='num_bits',
 50                        required=False,
 51                        metavar='<integer>',
 52                        default=1024,
 53                        help='Fingerprint size in bits (default: 1024)',
 54                        type=int)
 55    parser.add_argument('-l',
 56                        dest='min_path_len',
 57                        required=False,
 58                        metavar='<integer>',
 59                        default=0,
 60                        help='Minimum path length to consider (in number of bonds, default: 0)',
 61                        type=int)
 62    parser.add_argument('-u',
 63                        dest='max_path_len',
 64                        required=False,
 65                        metavar='<integer>',
 66                        default=5,
 67                        help='Maximum path length to consider (in number of bonds, default: 5)',
 68                        type=int)
 69    parser.add_argument('-H',
 70                        dest='inc_hs',
 71                        required=False,
 72                        action='store_true',
 73                        default=False,
 74                        help='Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
 75 
 76    return parser.parse_args()
 77    
 78def main() -> None:
 79    args = parseArgs()
 80
 81    # create reader for input molecules (format specified by file extension)
 82    reader = Chem.MoleculeReader(args.in_file) 
 83
 84    # open output file storing the generated fingerprints
 85    out_file = open(args.out_file, 'w')
 86    
 87    # create an instance of the default implementation of the Chem.Molecule interface
 88    mol = Chem.BasicMolecule()
 89
 90    # read and process molecules one after the other until the end of input has been reached
 91    try:
 92        while reader.read(mol):
 93            try:
 94                fp = genPathFingerprint(mol, args.num_bits, args.min_path_len, args.max_path_len, args.inc_hs)
 95
 96                out_file.write(str(fp))
 97                out_file.write('\n')
 98
 99            except Exception as e:
100                sys.exit('Error: processing of molecule failed: ' + str(e))
101                
102    except Exception as e: # handle exception raised in case of severe read errors
103        sys.exit('Error: reading molecule failed: ' + str(e))
104
105    out_file.close()
106    sys.exit(0)
107        
108if __name__ == '__main__':
109    main()

Download source file