2.2.2. Ligand-receptor Interaction Pharmacophores

The script gen_ia_ph4s.py generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.

Synopsis

python gen_ia_ph4s.py [-h] -r <file> -l <file> -o <file> [-i <file>] [-s <res-id> [<res-id> …]] [-q] [-x]

Mandatory options

-r <file>

Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)

-l <file>

Ligand structure input file (*.sdf, *.mol2, *.cdf)

-o <file>

Pharmacophore output file (*.pml, *.cdf)

Other options

-h, --help

Show help message and exit

-i

Interaction data output file

-s <res-id> [<res-id> …]

Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. The individual components are optional and the whole string is interpreted as a regular expression that gets matched against the residue id of each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, _300$ -> remove all residues with sequ. number 300

-q

Disable progress output (default: false)

-x

Generate exclusion volumes (default: false)

Code

  1import sys
  2import argparse
  3import re
  4import os
  5
  6import CDPL.Chem as Chem
  7import CDPL.Biomol as Biomol
  8import CDPL.Pharm as Pharm
  9import CDPL.MolProp as MolProp
 10
 11
 12# reads and preprocesses the specified receptor structure
 13def readAndPrepareReceptorStructure(args: argparse.Namespace) -> Chem.Molecule:
 14    # create reader for receptor structure (format specified by file extension)
 15    reader = Chem.MoleculeReader(args.receptor_file) 
 16    
 17    sup_fmts = [ Chem.DataFormat.MOL2,
 18                 Biomol.DataFormat.PDB,
 19                 Biomol.DataFormat.MMTF,
 20                 Biomol.DataFormat.MMCIF ]
 21                        
 22    if reader.getDataFormat() not in sup_fmts:   # check if the format is supported by this script
 23        sys.exit('Error: receptor input file format not supported')
 24
 25    rec_mol = Chem.BasicMolecule()    # create an instance of the default implementation of the
 26                                      # Chem.Molecule interface that will store the receptor struct.
 27    try:
 28        if not reader.read(rec_mol):  # read receptor structure
 29            sys.exit('Error: reading receptor structure failed')
 30
 31    except Exception as e:
 32        sys.exit('Error: reading receptor structure failed:\n' + str(e))            
 33
 34    # preprocess the receptor structure (removal of residues and
 35    # calculation of properties required by the pharm. generation procedure)
 36    try:
 37        rem_atoms = False
 38
 39        # delete atoms belonging to residues that should be stripped
 40        if args.strip_res_list:            
 41            atoms_to_rem = Chem.Fragment() # will store the atoms to delete
 42            res_to_strip_ptns = { re.compile(tlc, re.IGNORECASE) for tlc in args.strip_res_list }
 43        
 44            # identify and note atoms belonging to the stripped residues
 45            for atom in rec_mol.atoms:
 46                # compose identifier of the residue the atom belongs to
 47                atom_res_id = f'{Biomol.getChainID(atom).upper()}_{Biomol.getResidueCode(atom).upper()}_{Biomol.getResidueSequenceNumber(atom)}'
 48
 49                # check if atom belongs to an excluded residue
 50                for res_ptn in res_to_strip_ptns:
 51                    if res_ptn.search(atom_res_id):
 52                        atoms_to_rem.addAtom(atom)
 53                        break
 54
 55            if atoms_to_rem.numAtoms > 0:
 56                rec_mol -= atoms_to_rem    # delete atoms from the receptor structure
 57                rem_atoms = True
 58
 59                if not args.quiet:
 60                    print('! Removed %s atoms from the receptor structure' % str(atoms_to_rem.numAtoms))
 61
 62        # prepares the receptor structure for pharmacophore generation
 63        Chem.perceiveSSSR(rec_mol, rem_atoms)
 64        Chem.setRingFlags(rec_mol, rem_atoms)
 65        Chem.calcImplicitHydrogenCounts(rec_mol, rem_atoms)
 66        Chem.perceiveHybridizationStates(rec_mol, rem_atoms)
 67        Chem.setAromaticityFlags(rec_mol, rem_atoms)
 68
 69        if Chem.makeHydrogenComplete(rec_mol):                    # make implicit hydrogens (if any) explicit
 70            Chem.calcHydrogen3DCoordinates(rec_mol)               # calculate 3D coordinates for the added expl. hydrogens
 71            Biomol.setHydrogenResidueSequenceInfo(rec_mol, False) # set residue information for the added expl. hydrogens
 72
 73        MolProp.calcAtomHydrophobicities(rec_mol, False)          # calculate atom hydrophobicity values (needed for hydrophobic
 74                                                                  # pharm. feature generation)
 75    except Exception as e:
 76        sys.exit('Error: processing of receptor structure failed:\n' + str(e))            
 77
 78    return rec_mol
 79
 80# outputs useful information about the ligand atoms and pocket residues that are involved in the
 81# interactions described by generated pharmacophoric features
 82def outputInteractionData(ligand_idx: int, lig_mol: Chem.MolecularGraph, ia_ph4: Pharm.FeatureContainer, out_file) -> None:
 83    for ftr in ia_ph4:
 84        if Pharm.getType(ftr) == Pharm.FeatureType.EXCLUSION_VOLUME: # only regular features cover ligand atoms
 85            continue
 86        
 87        lig_atom_inds = None
 88        lig_substruct = Pharm.getSubstructure(ftr)
 89
 90        # generate ligand atom index list
 91        for atom_idx in sorted(lig_mol.getAtomIndex(atom) for atom in lig_substruct.atoms):
 92            if lig_atom_inds:
 93                lig_atom_inds = f'{lig_atom_inds}, {atom_idx}'
 94            else:
 95                lig_atom_inds = str(atom_idx)
 96
 97        out_file.write(f'{ligand_idx}\t{ia_ph4.getFeatureIndex(ftr)}\t{Pharm.getType(ftr)}\t{lig_atom_inds}\t{Pharm.getEnvironmentResidueInfo(ftr)}\n')
 98
 99def parseArgs() -> argparse.Namespace:
100    parser = argparse.ArgumentParser(description='Generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.')
101
102    parser.add_argument('-r',
103                        dest='receptor_file',
104                        required=True,
105                        metavar='<file>',
106                        help='Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)')
107    parser.add_argument('-l',
108                        dest='ligands_file',
109                        required=True,
110                        metavar='<file>',
111                        help='Ligand structure input file (*.sdf, *.mol2, *.cdf)')
112    parser.add_argument('-o',
113                        dest='ph4_out_file',
114                        required=True,
115                        metavar='<file>',
116                        help='Pharmacophore output file (*.pml, *.cdf)')
117    parser.add_argument('-i',
118                        dest='ia_out_file',
119                        required=False,
120                        metavar='<file>',
121                        help='Interaction data output file')
122    parser.add_argument('-s',
123                        dest='strip_res_list',
124                        required=False,
125                        metavar='<res-id>',
126                        nargs='+',
127                        help='Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). '\
128                        'Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. '\
129                        'The individual components are optional and the whole string is interpreted '\
130                        'as a regular expression that gets matched against the residue id of '\
131                        'each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, '\
132                        '_300$ -> remove all residues with sequ. number 300')
133    parser.add_argument('-q',
134                        dest='quiet',
135                        required=False,
136                        action='store_true',
137                        default=False,
138                        help='Disable progress output (default: false)')
139    parser.add_argument('-x',
140                        dest='gen_x_vols',
141                        required=False,
142                        action='store_true',
143                        default=False,
144                        help='Generate exclusion volumes (default: false)')
145
146    return parser.parse_args()
147
148def main() -> None:
149    args = parseArgs()
150
151    # read and preprocess the receptor structure
152    rec_mol = readAndPrepareReceptorStructure(args)
153
154    # create reader for the ligand input file (format specified by file extension)
155    lig_reader = Chem.MoleculeReader(args.ligands_file)
156
157     # create writer for the generated pharmacophores (format specified by file extension)
158    ph4_writer = Pharm.FeatureContainerWriter(args.ph4_out_file)
159
160    if args.ia_out_file:
161        ia_out_file = open(args.ia_out_file, 'w')
162        
163        # write TSV file column headers
164        ia_out_file.write('Input Ligand Index\tPharm. Feature Index\tPharm. Feature Type\tLigand Atom Indices\tPocket Residues\n')
165
166    lig_mol = Chem.BasicMolecule()          # create an instance of the default implementation of the
167                                            # Chem.Molecule interface that will store the ligand structures
168    ia_ph4 = Pharm.BasicPharmacophore()     # create an instance of the default implementation of the Pharm.Pharmacophore
169                                            # interface that will store the generated pharmacophores
170
171    ph4_gen = Pharm.InteractionPharmacophoreGenerator() # create an instance of the pharmacophore generator
172
173    ph4_gen.addExclusionVolumes(args.gen_x_vols)        # specify whether to generate exclusion volume spheres 
174                                                        # on pharm. feature atoms of interacting residues
175    try:
176        i = 1
177
178        # read and process ligand molecules one after the other until the end of input has been reached (or a severe error occurs)
179        while lig_reader.read(lig_mol):
180            mol_id = Chem.getName(lig_mol).strip()
181
182            # compose a ligand identifier (for messages) and a name for the created pharmacophore
183            if mol_id == '':   # fallback if ligand name is empty or not available
184                pharm_id = f'{os.path.splitext(os.path.basename(args.ligands_file))[0]}#{i}'
185                mol_id = f'#{i}'  
186            else:
187                pharm_id = mol_id
188                mol_id = f'\'{mol_id}\' (#{i})'
189
190            if not args.quiet:
191                print('- Generating interaction pharmacophore of molecule %s...' % mol_id)
192
193            try:
194                Pharm.prepareForPharmacophoreGeneration(lig_mol) # make ligand ready for pharm. generation
195
196                ph4_gen.generate(lig_mol, rec_mol, ia_ph4, True) # generate the pharmacophore (True = extract ligand environment residues on-the-fly)
197
198                Pharm.setName(ia_ph4, pharm_id)                  # set pharmacophore name
199                
200                if not args.quiet:
201                     print(' -> Generated %s features: %s' % (str(ia_ph4.numFeatures), Pharm.generateFeatureTypeHistogramString(ia_ph4)))
202
203                if args.ia_out_file:                             # if desired, output interaction data 
204                     outputInteractionData(i - 1, lig_mol, ia_ph4, ia_out_file)
205                    
206                try:
207                    if not ph4_writer.write(ia_ph4): # output pharmacophore
208                        sys.exit('Error: writing interaction pharmacophore of molecule %s failed' % mol_id)
209
210                except Exception as e:               # handle exception raised in case of severe write errors
211                    sys.exit('Error: writing interaction pharmacophore of molecule %s failed:\n%s' % (mol_id, str(e)))
212                
213            except Exception as e:                   # handle exception raised in case of severe processing errors
214                sys.exit('Error: interaction pharmacophore generation for molecule %s failed:\n%s' % (mol_id, str(e)))
215
216            i += 1
217            
218    except Exception as e:                           # handle exception raised in case of severe read errors
219        sys.exit('Error: reading molecule %s failed:\n%s' % (str(i), str(e)))
220
221    if not args.quiet:
222        print('Done!')
223        
224    if args.ia_out_file:
225        ia_out_file.close()
226
227    ph4_writer.close()
228    sys.exit(0)
229        
230if __name__ == '__main__':
231    main()

Download source file