4.1.6. 2D and 3D Pharmacophore Fingerprints

The script gen_ph4_fp.py generates and outputs the 2D or 3D pharmacophore fingerprint of molecules read from a specified input file.

Synopsis

python gen_ph4_fp.py [-h] -i <file> -o <file> [-n <integer>] [-d <integer>] [-b <float>]

Mandatory options

-i <file>

Input molecule file

-o <file>

Fingerprint output file

Other options

-h, --help

Show help message and exit

-n <integer>

Fingerprint size in bits (default: 4096)

-d <integer>

Type of feature distances to consider: 2 -> topological distance, 3 -> 3D distance (default: 2)

-b <float>

Feature distance bin size (default: 1.0)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.Descr as Descr
  6import CDPL.Util as Util
  7import CDPL.Pharm as Pharm
  8
  9
 10# generates the binary pharmacophore fingerprint of the given molecule
 11def genPharmFingerprint(mol: Chem.Molecule, num_bits: int, bin_size: float, dim: int) -> Util.BitSet:
 12    Pharm.prepareForPharmacophoreGeneration(mol)  # prepare molecule for pharmacophore generation
 13
 14    if dim == 2:
 15        fp_gen = Descr.NPoint2DPharmacophoreFingerprintGenerator() # create 2D pham. fingerprint generator instance
 16    else:
 17        fp_gen = Descr.NPoint3DPharmacophoreFingerprintGenerator() # create 3D pham. fingerprint generator instance
 18
 19    fp_gen.setBinSize(bin_size) # set feature distance bin size
 20        
 21    fp = Util.BitSet()          # create fingerprint bitset
 22    fp.resize(num_bits)         # set desired fingerprint size
 23
 24    fp_gen.generate(mol, fp)    # generate the fingerprint
 25
 26    # if needed, fp could be converted into a numpy single precision float array as follows:
 27    # fp = numpy.array(fp, dtype=numpy.float32)
 28    
 29    return fp
 30    
 31def parseArgs() -> argparse.Namespace:
 32    parser = argparse.ArgumentParser(description='Generates 2D or 3D pharmacophore fingerprints for given input molecules.')
 33
 34    parser.add_argument('-i',
 35                        dest='in_file',
 36                        required=True,
 37                        metavar='<file>',
 38                        help='Input molecule file')
 39    parser.add_argument('-o',
 40                        dest='out_file',
 41                        required=True,
 42                        metavar='<file>',
 43                        help='Fingerprint output file')
 44    parser.add_argument('-n',
 45                        dest='num_bits',
 46                        required=False,
 47                        metavar='<integer>',
 48                        default=4096,
 49                        help='Fingerprint size in bits (default: 4096)',
 50                        type=int)
 51    parser.add_argument('-d',
 52                        dest='dim',
 53                        required=False,
 54                        metavar='<integer>',
 55                        default=2,
 56                        choices=[2, 3],
 57                        help='Type of feature distances to consider: 2 -> topological distance, 3 -> 3D distance (default: 2)',
 58                        type=int)
 59    parser.add_argument('-b',
 60                        dest='bin_size',
 61                        required=False,
 62                        metavar='<float>',
 63                        default=1.0,
 64                        help='Feature distance bin size (default: 1.0)',
 65                        type=float)
 66  
 67    return parser.parse_args()
 68    
 69def main() -> None:
 70    args = parseArgs()
 71
 72    # create reader for input molecules (format specified by file extension)
 73    reader = Chem.MoleculeReader(args.in_file) 
 74
 75    if args.dim == 3:
 76        # disable reading of multi. conf. molecules -> each conf. will be read as separate molecule
 77        Chem.setMultiConfImportParameter(reader, False)
 78    
 79    # open output file storing the generated fingerprints
 80    out_file = open(args.out_file, 'w')
 81    
 82    # create an instance of the default implementation of the Chem.Molecule interface
 83    mol = Chem.BasicMolecule()
 84
 85    # read and process molecules one after the other until the end of input has been reached
 86    try:
 87        while reader.read(mol):
 88            try:
 89                fp = genPharmFingerprint(mol, args.num_bits, args.bin_size, args.dim)
 90
 91                out_file.write(str(fp))
 92                out_file.write('\n')
 93
 94            except Exception as e:
 95                sys.exit('Error: processing of molecule failed: ' + str(e))
 96                
 97    except Exception as e: # handle exception raised in case of severe read errors
 98        sys.exit('Error: reading molecule failed: ' + str(e))
 99
100    out_file.close()
101    sys.exit(0)
102        
103if __name__ == '__main__':
104    main()

Download source file