4.1.1. Extended connectivity fingerprints (ECFPs)

  1import sys
  2import os
  3import argparse
  4
  5import CDPL.Chem as Chem
  6import CDPL.Descr as Descr
  7import CDPL.Util as Util
  8
  9
 10# generates the binary ECFP for the given molecule
 11def genECFP(mol: Chem.Molecule, num_bits: int, radius: int, inc_hs: bool, inc_config: bool) -> Util.BitSet:
 12    Chem.calcImplicitHydrogenCounts(mol, False)        # calculate implicit hydrogen counts (if not yet done)
 13    Chem.perceiveHybridizationStates(mol, False)       # perceive atom hybridization states and set corresponding property for all atoms
 14    Chem.setRingFlags(mol, False)                      # perceive cycles and set corresponding atom and bond properties
 15    Chem.perceiveSSSR(mol, False)                      # perceive smallest set of smallest rings and store as Chem.MolecularGraph property
 16    Chem.setAromaticityFlags(mol, False)               # perceive aromaticity and set corresponding atom and bond properties
 17    
 18    ecfp_gen = Descr.CircularFingerprintGenerator()    # create ECFP generator instance
 19
 20    if inc_config:
 21        ecfp_gen.includeChirality(True)                # allow atom chirality to have an impact on the ECFP generation
 22        Chem.calcAtomStereoDescriptors(mol, False)     # calculate atom stereo descriptors and set corresponding property for all atoms
 23
 24    if inc_hs:        
 25        ecfp_gen.includeHydrogens(True)                # include explicit hydrogens in the ECFP generation
 26        Chem.makeHydrogenComplete(mol)                 # make any implicit hydrogens explicit
 27         
 28    fp = Util.BitSet()                                 # create fingerprint bitset
 29    fp.resize(num_bits)                                # set desired fingerprint size
 30
 31    ecfp_gen.setNumIterations(radius)                  # set num. iterations (=atom. env. radius)
 32    ecfp_gen.generate(mol)                             # extract chracteristic structural features
 33    ecfp_gen.setFeatureBits(fp)                        # set bits associated with the extracted structural features
 34
 35    # if needed, fp could be converted into a numpy single precision float array as follows:
 36    # fp = numpy.array(fp, dtype=numpy.float32)
 37    
 38    return fp
 39    
 40def parseArgs() -> argparse.Namespace:
 41    parser = argparse.ArgumentParser(description='Calculates extended connectivity fingerprints (ECFPs) for given input molecules.')
 42
 43    parser.add_argument('-i',
 44                        dest='in_file',
 45                        required=True,
 46                        metavar='<file>',
 47                        help='Input molecule file')
 48    parser.add_argument('-o',
 49                        dest='out_file',
 50                        required=True,
 51                        metavar='<file>',
 52                        help='ECFP fingerprint output file')
 53    parser.add_argument('-n',
 54                        dest='num_bits',
 55                        required=False,
 56                        metavar='<integer>',
 57                        default=1024,
 58                        help='Fingerprint size in bits (default: 1024)',
 59                        type=int)
 60    parser.add_argument('-r',
 61                        dest='radius',
 62                        required=False,
 63                        metavar='<integer>',
 64                        default=2,
 65                        help='Max. atom environment radius in number of bonds (default: 2)',
 66                        type=int)
 67    parser.add_argument('-y',
 68                        dest='inc_hs',
 69                        required=False,
 70                        action='store_true',
 71                        default=False,
 72                        help='Do not ignore hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
 73    parser.add_argument('-c',
 74                        dest='inc_config',
 75                        required=False,
 76                        action='store_true',
 77                        default=False,
 78                        help='Include atom chirality (by default, atom chirality is not considered)')
 79
 80    return parser.parse_args()
 81    
 82def main() -> None:
 83    args = parseArgs()
 84
 85    # create reader for input molecules (format specified by file extension)
 86    reader = Chem.MoleculeReader(args.in_file) 
 87
 88    # open output file storing the generated fingerprints
 89    out_file = open(args.out_file, 'w')
 90    
 91    # create an instance of the default implementation of the Chem.Molecule interface
 92    mol = Chem.BasicMolecule()
 93
 94    # read and process molecules one after the other until the end of input has been reached
 95    try:
 96        while reader.read(mol):
 97            try:
 98                fp = genECFP(mol, args.num_bits, args.radius, args.inc_hs, args.inc_config)
 99
100                out_file.write(str(fp))
101                out_file.write('\n')
102
103            except Exception as e:
104                sys.exit('Error: processing of molecule failed: ' + str(e))
105                
106    except Exception as e: # handle exception raised in case of severe read errors
107        sys.exit('Error: reading molecule failed: ' + str(e))
108
109    out_file.close()
110    sys.exit(0)
111        
112if __name__ == '__main__':
113    main()

Download source file