2.2.2. Ligand-receptor Interaction Pharmacophores
The script gen_ia_ph4s.py generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.
Synopsis
python gen_ia_ph4s.py [-h] -r <file> -l <file> -o <file> [-i <file>] [-s <res-id> [<res-id> …]] [-q] [-x]
Mandatory options
- -r <file>
Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)
- -l <file>
Ligand structure input file (*.sdf, *.mol2, *.cdf)
- -o <file>
Pharmacophore output file (*.pml, *.cdf)
Other options
- -h, --help
Show help message and exit
- -i
Interaction data output file
-s <res-id> [<res-id> …]
Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. The individual components are optional and the whole string is interpreted as a regular expression that gets matched against the residue id of each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, _300$ -> remove all residues with sequ. number 300
- -q
Disable progress output (default: false)
- -x
Generate exclusion volumes (default: false)
Code
1import sys
2import argparse
3import re
4import os
5
6import CDPL.Chem as Chem
7import CDPL.Biomol as Biomol
8import CDPL.Pharm as Pharm
9import CDPL.MolProp as MolProp
10
11
12# reads and preprocesses the specified receptor structure
13def readAndPrepareReceptorStructure(args: argparse.Namespace) -> Chem.Molecule:
14 # create reader for receptor structure (format specified by file extension)
15 reader = Chem.MoleculeReader(args.receptor_file)
16
17 sup_fmts = [ Chem.DataFormat.MOL2,
18 Biomol.DataFormat.PDB,
19 Biomol.DataFormat.MMTF,
20 Biomol.DataFormat.MMCIF ]
21
22 if reader.getDataFormat() not in sup_fmts: # check if the format is supported by this script
23 sys.exit('Error: receptor input file format not supported')
24
25 rec_mol = Chem.BasicMolecule() # create an instance of the default implementation of the
26 # Chem.Molecule interface that will store the receptor struct.
27 try:
28 if not reader.read(rec_mol): # read receptor structure
29 sys.exit('Error: reading receptor structure failed')
30
31 except Exception as e:
32 sys.exit('Error: reading receptor structure failed:\n' + str(e))
33
34 # preprocess the receptor structure (removal of residues and
35 # calculation of properties required by the pharm. generation procedure)
36 try:
37 rem_atoms = False
38
39 # delete atoms belonging to residues that should be stripped
40 if args.strip_res_list:
41 atoms_to_rem = Chem.Fragment() # will store the atoms to delete
42 res_to_strip_ptns = { re.compile(tlc, re.IGNORECASE) for tlc in args.strip_res_list }
43
44 # identify and note atoms belonging to the stripped residues
45 for atom in rec_mol.atoms:
46 # compose identifier of the residue the atom belongs to
47 atom_res_id = f'{Biomol.getChainID(atom).upper()}_{Biomol.getResidueCode(atom).upper()}_{Biomol.getResidueSequenceNumber(atom)}'
48
49 # check if atom belongs to an excluded residue
50 for res_ptn in res_to_strip_ptns:
51 if res_ptn.search(atom_res_id):
52 atoms_to_rem.addAtom(atom)
53 break
54
55 if atoms_to_rem.numAtoms > 0:
56 rec_mol -= atoms_to_rem # delete atoms from the receptor structure
57 rem_atoms = True
58
59 if not args.quiet:
60 print('! Removed %s atoms from the receptor structure' % str(atoms_to_rem.numAtoms))
61
62 # prepares the receptor structure for pharmacophore generation
63 Chem.perceiveSSSR(rec_mol, rem_atoms)
64 Chem.setRingFlags(rec_mol, rem_atoms)
65 Chem.calcImplicitHydrogenCounts(rec_mol, rem_atoms)
66 Chem.perceiveHybridizationStates(rec_mol, rem_atoms)
67 Chem.setAromaticityFlags(rec_mol, rem_atoms)
68
69 if Chem.makeHydrogenComplete(rec_mol): # make implicit hydrogens (if any) explicit
70 Chem.calcHydrogen3DCoordinates(rec_mol) # calculate 3D coordinates for the added expl. hydrogens
71 Biomol.setHydrogenResidueSequenceInfo(rec_mol, False) # set residue information for the added expl. hydrogens
72
73 MolProp.calcAtomHydrophobicities(rec_mol, False) # calculate atom hydrophobicity values (needed for hydrophobic
74 # pharm. feature generation)
75 except Exception as e:
76 sys.exit('Error: processing of receptor structure failed:\n' + str(e))
77
78 return rec_mol
79
80# outputs useful information about the ligand atoms and pocket residues that are involved in the
81# interactions described by generated pharmacophoric features
82def outputInteractionData(ligand_idx: int, lig_mol: Chem.MolecularGraph, ia_ph4: Pharm.FeatureContainer, out_file) -> None:
83 for ftr in ia_ph4:
84 if Pharm.getType(ftr) == Pharm.FeatureType.EXCLUSION_VOLUME: # only regular features cover ligand atoms
85 continue
86
87 lig_atom_inds = None
88 lig_substruct = Pharm.getSubstructure(ftr)
89
90 # generate ligand atom index list
91 for atom_idx in sorted(lig_mol.getAtomIndex(atom) for atom in lig_substruct.atoms):
92 if lig_atom_inds:
93 lig_atom_inds = f'{lig_atom_inds}, {atom_idx}'
94 else:
95 lig_atom_inds = str(atom_idx)
96
97 out_file.write(f'{ligand_idx}\t{ia_ph4.getFeatureIndex(ftr)}\t{Pharm.getType(ftr)}\t{lig_atom_inds}\t{Pharm.getEnvironmentResidueInfo(ftr)}\n')
98
99def parseArgs() -> argparse.Namespace:
100 parser = argparse.ArgumentParser(description='Generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.')
101
102 parser.add_argument('-r',
103 dest='receptor_file',
104 required=True,
105 metavar='<file>',
106 help='Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)')
107 parser.add_argument('-l',
108 dest='ligands_file',
109 required=True,
110 metavar='<file>',
111 help='Ligand structure input file (*.sdf, *.mol2, *.cdf)')
112 parser.add_argument('-o',
113 dest='ph4_out_file',
114 required=True,
115 metavar='<file>',
116 help='Pharmacophore output file (*.pml, *.cdf)')
117 parser.add_argument('-i',
118 dest='ia_out_file',
119 required=False,
120 metavar='<file>',
121 help='Interaction data output file')
122 parser.add_argument('-s',
123 dest='strip_res_list',
124 required=False,
125 metavar='<res-id>',
126 nargs='+',
127 help='Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). '\
128 'Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. '\
129 'The individual components are optional and the whole string is interpreted '\
130 'as a regular expression that gets matched against the residue id of '\
131 'each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, '\
132 '_300$ -> remove all residues with sequ. number 300')
133 parser.add_argument('-q',
134 dest='quiet',
135 required=False,
136 action='store_true',
137 default=False,
138 help='Disable progress output (default: false)')
139 parser.add_argument('-x',
140 dest='gen_x_vols',
141 required=False,
142 action='store_true',
143 default=False,
144 help='Generate exclusion volumes (default: false)')
145
146 return parser.parse_args()
147
148def main() -> None:
149 args = parseArgs()
150
151 # read and preprocess the receptor structure
152 rec_mol = readAndPrepareReceptorStructure(args)
153
154 # create reader for the ligand input file (format specified by file extension)
155 lig_reader = Chem.MoleculeReader(args.ligands_file)
156
157 # create writer for the generated pharmacophores (format specified by file extension)
158 ph4_writer = Pharm.FeatureContainerWriter(args.ph4_out_file)
159
160 if args.ia_out_file:
161 ia_out_file = open(args.ia_out_file, 'w')
162
163 # write TSV file column headers
164 ia_out_file.write('Input Ligand Index\tPharm. Feature Index\tPharm. Feature Type\tLigand Atom Indices\tPocket Residues\n')
165
166 lig_mol = Chem.BasicMolecule() # create an instance of the default implementation of the
167 # Chem.Molecule interface that will store the ligand structures
168 ia_ph4 = Pharm.BasicPharmacophore() # create an instance of the default implementation of the Pharm.Pharmacophore
169 # interface that will store the generated pharmacophores
170
171 ph4_gen = Pharm.InteractionPharmacophoreGenerator() # create an instance of the pharmacophore generator
172
173 ph4_gen.addExclusionVolumes(args.gen_x_vols) # specify whether to generate exclusion volume spheres
174 # on pharm. feature atoms of interacting residues
175 try:
176 i = 1
177
178 # read and process ligand molecules one after the other until the end of input has been reached (or a severe error occurs)
179 while lig_reader.read(lig_mol):
180 mol_id = Chem.getName(lig_mol).strip()
181
182 # compose a ligand identifier (for messages) and a name for the created pharmacophore
183 if mol_id == '': # fallback if ligand name is empty or not available
184 pharm_id = f'{os.path.splitext(os.path.basename(args.ligands_file))[0]}#{i}'
185 mol_id = f'#{i}'
186 else:
187 pharm_id = mol_id
188 mol_id = f'\'{mol_id}\' (#{i})'
189
190 if not args.quiet:
191 print('- Generating interaction pharmacophore of molecule %s...' % mol_id)
192
193 try:
194 Pharm.prepareForPharmacophoreGeneration(lig_mol) # make ligand ready for pharm. generation
195
196 ph4_gen.generate(lig_mol, rec_mol, ia_ph4, True) # generate the pharmacophore (True = extract ligand environment residues on-the-fly)
197
198 Pharm.setName(ia_ph4, pharm_id) # set pharmacophore name
199
200 if not args.quiet:
201 print(' -> Generated %s features: %s' % (str(ia_ph4.numFeatures), Pharm.generateFeatureTypeHistogramString(ia_ph4)))
202
203 if args.ia_out_file: # if desired, output interaction data
204 outputInteractionData(i - 1, lig_mol, ia_ph4, ia_out_file)
205
206 try:
207 if not ph4_writer.write(ia_ph4): # output pharmacophore
208 sys.exit('Error: writing interaction pharmacophore of molecule %s failed' % mol_id)
209
210 except Exception as e: # handle exception raised in case of severe write errors
211 sys.exit('Error: writing interaction pharmacophore of molecule %s failed:\n%s' % (mol_id, str(e)))
212
213 except Exception as e: # handle exception raised in case of severe processing errors
214 sys.exit('Error: interaction pharmacophore generation for molecule %s failed:\n%s' % (mol_id, str(e)))
215
216 i += 1
217
218 except Exception as e: # handle exception raised in case of severe read errors
219 sys.exit('Error: reading molecule %s failed:\n%s' % (str(i), str(e)))
220
221 if not args.quiet:
222 print('Done!')
223
224 if args.ia_out_file:
225 ia_out_file.close()
226
227 ph4_writer.close()
228 sys.exit(0)
229
230if __name__ == '__main__':
231 main()