4.1.1. Extended Connectivity Fingerprints (ECFPs)

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.Descr as Descr
  6import CDPL.Util as Util
  7
  8
  9# generates the binary ECFP for the given molecule
 10def genECFP(mol: Chem.Molecule, num_bits: int, radius: int, inc_hs: bool, inc_config: bool) -> Util.BitSet:
 11    Chem.calcImplicitHydrogenCounts(mol, False)        # calculate implicit hydrogen counts (if not yet done)
 12    Chem.perceiveHybridizationStates(mol, False)       # perceive atom hybridization states and set corresponding property for all atoms
 13    Chem.setRingFlags(mol, False)                      # perceive cycles and set corresponding atom and bond properties
 14    Chem.perceiveSSSR(mol, False)                      # perceive smallest set of smallest rings and store as Chem.MolecularGraph property
 15    Chem.setAromaticityFlags(mol, False)               # perceive aromaticity and set corresponding atom and bond properties
 16    
 17    ecfp_gen = Descr.CircularFingerprintGenerator()    # create ECFP generator instance
 18
 19    if inc_config:
 20        ecfp_gen.includeChirality(True)                  # allow atom chirality to have an impact on the ECFP generation
 21        Chem.calcCIPPriorities(mol, False)               # calculate atom symmetry classes for chiral atom perception and set corresponding property for all atoms
 22        Chem.perceiveAtomStereoCenters(mol, False, True) # perceive chiral atoms and set corresponding property for all atoms
 23        Chem.calcAtomStereoDescriptors(mol, False)       # calculate atom stereo descriptors and set corresponding property for all atoms
 24
 25    if inc_hs:        
 26        ecfp_gen.includeHydrogens(True)                # include explicit hydrogens in the ECFP generation
 27        Chem.makeHydrogenComplete(mol)                 # make any implicit hydrogens explicit
 28         
 29    fp = Util.BitSet()                                 # create fingerprint bitset
 30    fp.resize(num_bits)                                # set desired fingerprint size
 31
 32    ecfp_gen.setNumIterations(radius)                  # set num. iterations (=atom. env. radius)
 33    ecfp_gen.generate(mol)                             # extract chracteristic structural features
 34    ecfp_gen.setFeatureBits(fp)                        # set bits associated with the extracted structural features
 35
 36    # if needed, fp could be converted into a numpy single precision float array as follows:
 37    # fp = numpy.array(fp, dtype=numpy.float32)
 38    
 39    return fp
 40    
 41def parseArgs() -> argparse.Namespace:
 42    parser = argparse.ArgumentParser(description='Generates extended connectivity fingerprints (ECFPs) for given input molecules.')
 43
 44    parser.add_argument('-i',
 45                        dest='in_file',
 46                        required=True,
 47                        metavar='<file>',
 48                        help='Input molecule file')
 49    parser.add_argument('-o',
 50                        dest='out_file',
 51                        required=True,
 52                        metavar='<file>',
 53                        help='ECFP fingerprint output file')
 54    parser.add_argument('-n',
 55                        dest='num_bits',
 56                        required=False,
 57                        metavar='<integer>',
 58                        default=1024,
 59                        help='Fingerprint size in bits (default: 1024)',
 60                        type=int)
 61    parser.add_argument('-r',
 62                        dest='radius',
 63                        required=False,
 64                        metavar='<integer>',
 65                        default=2,
 66                        help='Max. atom environment radius in number of bonds (default: 2)',
 67                        type=int)
 68    parser.add_argument('-y',
 69                        dest='inc_hs',
 70                        required=False,
 71                        action='store_true',
 72                        default=False,
 73                        help='Do not ignore hydrogens (by default, the fingerprint is generated for the H-deplete molecular graph)')
 74    parser.add_argument('-c',
 75                        dest='inc_config',
 76                        required=False,
 77                        action='store_true',
 78                        default=False,
 79                        help='Include atom chirality (by default, atom chirality is not considered)')
 80
 81    return parser.parse_args()
 82    
 83def main() -> None:
 84    args = parseArgs()
 85
 86    # create reader for input molecules (format specified by file extension)
 87    reader = Chem.MoleculeReader(args.in_file) 
 88
 89    # open output file storing the generated fingerprints
 90    out_file = open(args.out_file, 'w')
 91    
 92    # create an instance of the default implementation of the Chem.Molecule interface
 93    mol = Chem.BasicMolecule()
 94
 95    # read and process molecules one after the other until the end of input has been reached
 96    try:
 97        while reader.read(mol):
 98            try:
 99                fp = genECFP(mol, args.num_bits, args.radius, args.inc_hs, args.inc_config)
100
101                out_file.write(str(fp))
102                out_file.write('\n')
103
104            except Exception as e:
105                sys.exit('Error: processing of molecule failed: ' + str(e))
106                
107    except Exception as e: # handle exception raised in case of severe read errors
108        sys.exit('Error: reading molecule failed: ' + str(e))
109
110    out_file.close()
111    sys.exit(0)
112        
113if __name__ == '__main__':
114    main()

Download source file