4.1.4. Bond-Path Fingerprint

The script gen_path_fp.py generates and outputs the bond-path (aka Daylight) fingerprint of molecules read from a specified input file.

Synopsis

python gen_path_fp.py [-h] -i <file> -o <file> [-n <integer>] [-l <integer>] [-u <integer>] [-H]

Mandatory options

-i <file>

Input molecule file

-o <file>

Fingerprint output file

Other options

-h, --help

Show help message and exit

-n <integer>

Fingerprint size in bits (default: 1024)

-l <integer>

Minimum path length to consider (in number of bonds, default: 0)

-u <integer>

Maximum path length to consider (in number of bonds, default: 5)

-H

Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.Descr as Descr
  6import CDPL.Util as Util
  7
  8
  9# generates the binary path fingerprint of the given molecule
 10def genPathFingerprint(mol: Chem.Molecule, num_bits: int, min_len: int, max_len: int, inc_hs: bool) -> Util.BitSet:
 11    Chem.calcBasicProperties(mol, False)      # calculate basic molecular properties (if not yet done)
 12
 13    fp_gen = Descr.PathFingerprintGenerator() # create path fingerprint generator instance
 14    
 15    # apply option -H
 16    if inc_hs:        
 17        Chem.makeHydrogenComplete(mol)        # make any implicit hydrogens explicit
 18        fp_gen.includeHydrogens(True)         # default = exclude hydrogens
 19
 20    fp_gen.setMinPathLength(min_len)          # set min. path length
 21    fp_gen.setMaxPathLength(max_len)          # set max. path length
 22    
 23    fp = Util.BitSet()                        # create fingerprint bitset
 24    fp.resize(num_bits)                       # set desired fingerprint size
 25
 26    # generate the fingerprint
 27    fp_gen.generate(mol, fp)                  
 28
 29    # if needed, fp could be converted into a numpy single precision float array as follows:
 30    # fp = numpy.array(fp, dtype=numpy.float32)
 31    
 32    return fp
 33    
 34def parseArgs() -> argparse.Namespace:
 35    parser = argparse.ArgumentParser(description='Generates path (aka Daylight) fingerprints for given input molecules.')
 36
 37    parser.add_argument('-i',
 38                        dest='in_file',
 39                        required=True,
 40                        metavar='<file>',
 41                        help='Input molecule file')
 42    parser.add_argument('-o',
 43                        dest='out_file',
 44                        required=True,
 45                        metavar='<file>',
 46                        help='Fingerprint output file')
 47    parser.add_argument('-n',
 48                        dest='num_bits',
 49                        required=False,
 50                        metavar='<integer>',
 51                        default=1024,
 52                        help='Fingerprint size in bits (default: 1024)',
 53                        type=int)
 54    parser.add_argument('-l',
 55                        dest='min_path_len',
 56                        required=False,
 57                        metavar='<integer>',
 58                        default=0,
 59                        help='Minimum path length to consider (in number of bonds, default: 0)',
 60                        type=int)
 61    parser.add_argument('-u',
 62                        dest='max_path_len',
 63                        required=False,
 64                        metavar='<integer>',
 65                        default=5,
 66                        help='Maximum path length to consider (in number of bonds, default: 5)',
 67                        type=int)
 68    parser.add_argument('-H',
 69                        dest='inc_hs',
 70                        required=False,
 71                        action='store_true',
 72                        default=False,
 73                        help='Include hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
 74 
 75    return parser.parse_args()
 76    
 77def main() -> None:
 78    args = parseArgs()
 79
 80    # create reader for input molecules (format specified by file extension)
 81    reader = Chem.MoleculeReader(args.in_file) 
 82
 83    # open output file storing the generated fingerprints
 84    out_file = open(args.out_file, 'w')
 85    
 86    # create an instance of the default implementation of the Chem.Molecule interface
 87    mol = Chem.BasicMolecule()
 88
 89    # read and process molecules one after the other until the end of input has been reached
 90    try:
 91        while reader.read(mol):
 92            try:
 93                fp = genPathFingerprint(mol, args.num_bits, args.min_path_len, args.max_path_len, args.inc_hs)
 94
 95                out_file.write(str(fp))
 96                out_file.write('\n')
 97
 98            except Exception as e:
 99                sys.exit('Error: processing of molecule failed:\n' + str(e))
100                
101    except Exception as e: # handle exception raised in case of severe read errors
102        sys.exit('Error: reading molecule failed:\n' + str(e))
103
104    out_file.close()
105    sys.exit(0)
106        
107if __name__ == '__main__':
108    main()

Download source file