4.1.5. FAME Atom Environment Fingerprints

The script gen_fame_fp.py generates and outputs the FAME environment descriptor [8, 16] for each atom of molecules read from a specified input file.

Synopsis

python gen_fame_fp.py <file>

Code

 1import sys
 2import numpy
 3
 4import CDPL.Chem as Chem
 5
 6
 7# generates the FAME descriptor for a given atom of the provided molecular graph
 8def genFAMEDescriptor(ctr_atom: Chem.Atom, molgraph: Chem.MolecularGraph, radius: int) -> numpy.array:
 9    env = Chem.Fragment()                                                      # for storing of extracted environment atoms
10    descr = numpy.zeros((Chem.SybylAtomType.MAX_TYPE + 1) * (radius + 1))
11    
12    Chem.getEnvironment(ctr_atom, molgraph, radius, env)                       # extract environment of center atom reaching
13                                                                               # out up to 'radius' bonds
14    for atom in env.atoms:                                                     # iterate over extracted environment atoms
15        sybyl_type = Chem.getSybylType(atom)                                   # retrieve Sybyl type of environment atom
16        top_dist = Chem.getTopologicalDistance(ctr_atom, atom, molgraph)       # get top. distance between center atom and environment atom
17        descr[top_dist * (Chem.SybylAtomType.MAX_TYPE + 1) + sybyl_type] += 1  # instead of 1 (= Sybyl type presence) also any other numeric atom
18                                                                               # property could be summed up here
19    return descr
20        
21# function called for each read molecule
22def procMolecule(molgraph: Chem.MolecularGraph) -> None: 
23    Chem.calcBasicProperties(molgraph, False)            # calculate basic molecular properties (if not yet done)
24    Chem.perceiveSybylAtomTypes(molgraph, False)         # perceive Sybyl atom types and set corresponding property for all atoms
25    Chem.calcTopologicalDistanceMatrix(molgraph, False)  # calculate topological distance matrix and store as Chem.MolecularGraph property
26   
27    for atom in molgraph.atoms:
28        descr = genFAMEDescriptor(atom, molgraph, 5)     # generate atom environment descriptor using a radius of five bonds
29
30        print(descr)
31
32def main() -> None:
33    if len(sys.argv) < 2:
34        sys.exit('Usage: %s <input mol. file>' % sys.argv[0])
35
36    # create reader for input molecules (format specified by file extension)
37    reader = Chem.MoleculeReader(sys.argv[1]) 
38   
39    # create an instance of the default implementation of the Chem.Molecule interface
40    mol = Chem.BasicMolecule()
41    
42    # read and process molecules one after the other until the end of input has been reached
43    try:
44        while reader.read(mol): 
45            try:
46                procMolecule(mol)
47            except Exception as e:
48                sys.exit('Error: processing of molecule failed: ' + str(e))
49                
50    except Exception as e: # handle exception raised in case of severe read errors
51        sys.exit('Error: reading molecule failed: ' + str(e))
52
53    sys.exit(0)
54        
55if __name__ == '__main__':
56    main()

Download source file