2.2.2. Ligand-receptor Interaction Pharmacophores
The script gen_ia_ph4s.py generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.
Synopsis
python gen_ia_ph4s.py [-h] -r <file> -l <file> -o <file> [-i <file>] [-s <res-id> [<res-id> …]] [-q] [-x]
Mandatory options
- -r <file>
Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)
- -l <file>
Ligand structure input file (*.sdf, *.mol2, *.cdf)
- -o <file>
Pharmacophore output file (*.pml, *.cdf)
Other options
- -h, --help
Show help message and exit
- -i
Interaction data output file
-s <res-id> [<res-id> …]
Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. The individual components are optional and the whole string is interpreted as a regular expression that gets matched against the residue id of each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, _300$ -> remove all residues with sequ. number 300
- -q
Disable progress output (default: false)
- -x
Generate exclusion volumes (default: false)
Code
1import sys
2import argparse
3import re
4import os
5
6import CDPL.Chem as Chem
7import CDPL.Biomol as Biomol
8import CDPL.Pharm as Pharm
9import CDPL.MolProp as MolProp
10
11
12# reads and preprocesses the specified receptor structure
13def readAndPrepareReceptorStructure(args: argparse.Namespace) -> Chem.Molecule:
14 # create reader for receptor structure (format specified by file extension)
15 reader = Chem.MoleculeReader(args.receptor_file)
16
17 sup_fmts = [ Chem.DataFormat.MOL2,
18 Biomol.DataFormat.PDB,
19 Biomol.DataFormat.MMTF,
20 Biomol.DataFormat.MMCIF ]
21
22 if reader.getDataFormat() not in sup_fmts: # check if the format is supported by this script
23 sys.exit('Error: receptor input file format not supported')
24
25 rec_mol = Chem.BasicMolecule() # create an instance of the default implementation of the
26 # Chem.Molecule interface that will store the receptor struct.
27 try:
28 if not reader.read(rec_mol): # read receptor structure
29 sys.exit('Error: reading receptor structure failed')
30
31 except Exception as e:
32 sys.exit('Error: reading receptor structure failed:\n' + str(e))
33
34 # preprocess the receptor structure (removal of residues and
35 # calculation of properties required by the pharm. generation procedure)
36 try:
37 rem_atoms = False
38
39 # delete atoms belonging to residues that should be stripped
40 if args.strip_res_list:
41 atoms_to_rem = Chem.Fragment() # will store the atoms to delete
42 res_to_strip_ptns = { re.compile(tlc, re.IGNORECASE) for tlc in args.strip_res_list }
43
44 # identify and note atoms belonging to the stripped residues
45 for atom in rec_mol.atoms:
46 # compose identifier of the residue the atom belongs to
47 atom_res_id = f'{Biomol.getChainID(atom).upper()}_{Biomol.getResidueCode(atom).upper()}_{Biomol.getResidueSequenceNumber(atom)}'
48
49 # check if atom belongs to an excluded residue
50 for res_ptn in res_to_strip_ptns:
51 if res_ptn.search(atom_res_id):
52 atoms_to_rem.addAtom(atom)
53 break
54
55 if atoms_to_rem.numAtoms > 0:
56 rec_mol -= atoms_to_rem # delete atoms from the receptor structure
57 rem_atoms = True
58
59 if not args.quiet:
60 print('! Removed %s atoms from the receptor structure' % str(atoms_to_rem.numAtoms))
61
62 # prepares the receptor structure for pharmacophore generation
63 Chem.perceiveSSSR(rec_mol, rem_atoms)
64 Chem.setRingFlags(rec_mol, rem_atoms)
65 Chem.calcImplicitHydrogenCounts(rec_mol, rem_atoms)
66 Chem.perceiveHybridizationStates(rec_mol, rem_atoms)
67 Chem.setAromaticityFlags(rec_mol, rem_atoms)
68
69 if Chem.makeHydrogenComplete(rec_mol): # make implicit hydrogens (if any) explicit
70 Chem.calcHydrogen3DCoordinates(rec_mol) # calculate 3D coordinates for the added expl. hydrogens
71 Biomol.setHydrogenResidueSequenceInfo(rec_mol, False) # set residue information for the added expl. hydrogens
72
73 MolProp.calcAtomHydrophobicities(rec_mol, False) # calculate atom hydrophobicity values (needed for hydrophobic
74 # pharm. feature generation)
75 except Exception as e:
76 sys.exit('Error: processing of receptor structure failed:\n' + str(e))
77
78 return rec_mol
79
80# outputs useful information about the ligand atoms and pocket residues that are involved in the
81# interactions described by generated pharmacophoric features
82def outputInteractionData(ligand_idx: int, lig_mol: Chem.MolecularGraph, ia_ph4: Pharm.FeatureContainer, out_file) -> None:
83 for ftr in ia_ph4:
84 if Pharm.getType(ftr) == Pharm.FeatureType.EXCLUSION_VOLUME: # only regular features cover ligand atoms
85 continue
86
87 lig_atom_inds = None
88 lig_substruct = Pharm.getSubstructure(ftr)
89
90 # generate ligand atom index list
91 for atom_idx in sorted(lig_mol.getAtomIndex(atom) for atom in lig_substruct.atoms):
92 if lig_atom_inds:
93 lig_atom_inds = f'{lig_atom_inds}, {atom_idx}'
94 else:
95 lig_atom_inds = str(atom_idx)
96
97 out_file.write(f'{ligand_idx}\t{ia_ph4.getFeatureIndex(ftr)}\t{Pharm.getType(ftr)}' \
98 f'\t{lig_atom_inds}\t{Pharm.getEnvironmentResidueInfo(ftr)}\t{Pharm.getEnvironmentResidueAtomInfo(ftr)}\n')
99
100def parseArgs() -> argparse.Namespace:
101 parser = argparse.ArgumentParser(description='Generates pharmacophores describing the interactions between a given receptor structure and a set of ligand molecules.')
102
103 parser.add_argument('-r',
104 dest='receptor_file',
105 required=True,
106 metavar='<file>',
107 help='Receptor structure input file (*.mol2, *.pdb, *.mmtf, *.cif, *.mmcif)')
108 parser.add_argument('-l',
109 dest='ligands_file',
110 required=True,
111 metavar='<file>',
112 help='Ligand structure input file (*.sdf, *.mol2, *.cdf)')
113 parser.add_argument('-o',
114 dest='ph4_out_file',
115 required=True,
116 metavar='<file>',
117 help='Pharmacophore output file (*.pml, *.cdf)')
118 parser.add_argument('-i',
119 dest='ia_out_file',
120 required=False,
121 metavar='<file>',
122 help='Interaction data output file')
123 parser.add_argument('-s',
124 dest='strip_res_list',
125 required=False,
126 metavar='<res-id>',
127 nargs='+',
128 help='Whitespace separated list of identifiers of residues to remove from the receptor structure (e.g. an existing ligand). '\
129 'Residue identifiers consist of three components separated by an underscore: [chain id]_[tlc]_[res. seq. no.]. '\
130 'The individual components are optional and the whole string is interpreted '\
131 'as a regular expression that gets matched against the residue id of '\
132 'each receptor atom. Examples: HOH -> rem. all waters, A_MET -> remove all MET residues of chain A, '\
133 '_300$ -> remove all residues with sequ. number 300')
134 parser.add_argument('-q',
135 dest='quiet',
136 required=False,
137 action='store_true',
138 default=False,
139 help='Disable progress output (default: false)')
140 parser.add_argument('-x',
141 dest='gen_x_vols',
142 required=False,
143 action='store_true',
144 default=False,
145 help='Generate exclusion volumes (default: false)')
146
147 return parser.parse_args()
148
149def main() -> None:
150 args = parseArgs()
151
152 # read and preprocess the receptor structure
153 rec_mol = readAndPrepareReceptorStructure(args)
154
155 # create reader for the ligand input file (format specified by file extension)
156 lig_reader = Chem.MoleculeReader(args.ligands_file)
157
158 # create writer for the generated pharmacophores (format specified by file extension)
159 ph4_writer = Pharm.FeatureContainerWriter(args.ph4_out_file)
160
161 if args.ia_out_file:
162 ia_out_file = open(args.ia_out_file, 'w')
163
164 # write TSV file column headers
165 ia_out_file.write('Input Ligand Index\tPharm. Feature Index\tPharm. Feature Type\tLigand Atom Indices\tPocket Residues\tPocket Residue Atoms\n')
166
167 lig_mol = Chem.BasicMolecule() # create an instance of the default implementation of the
168 # Chem.Molecule interface that will store the ligand structures
169 ia_ph4 = Pharm.BasicPharmacophore() # create an instance of the default implementation of the Pharm.Pharmacophore
170 # interface that will store the generated pharmacophores
171
172 ph4_gen = Pharm.InteractionPharmacophoreGenerator() # create an instance of the pharmacophore generator
173
174 ph4_gen.addExclusionVolumes(args.gen_x_vols) # specify whether to generate exclusion volume spheres
175 # on pharm. feature atoms of interacting residues
176 try:
177 i = 1
178
179 # read and process ligand molecules one after the other until the end of input has been reached (or a severe error occurs)
180 while lig_reader.read(lig_mol):
181 mol_id = Chem.getName(lig_mol).strip()
182
183 # compose a ligand identifier (for messages) and a name for the created pharmacophore
184 if mol_id == '': # fallback if ligand name is empty or not available
185 pharm_id = f'{os.path.splitext(os.path.basename(args.ligands_file))[0]}#{i}'
186 mol_id = f'#{i}'
187 else:
188 pharm_id = mol_id
189 mol_id = f'\'{mol_id}\' (#{i})'
190
191 if not args.quiet:
192 print('- Generating interaction pharmacophore of molecule %s...' % mol_id)
193
194 try:
195 Pharm.prepareForPharmacophoreGeneration(lig_mol) # make ligand ready for pharm. generation
196
197 ph4_gen.generate(lig_mol, rec_mol, ia_ph4, True) # generate the pharmacophore (True = extract ligand environment residues on-the-fly)
198
199 Pharm.setName(ia_ph4, pharm_id) # set pharmacophore name
200
201 if not args.quiet:
202 print(' -> Generated %s features: %s' % (str(ia_ph4.numFeatures), Pharm.generateFeatureTypeHistogramString(ia_ph4)))
203
204 if args.ia_out_file: # if desired, output interaction data
205 outputInteractionData(i - 1, lig_mol, ia_ph4, ia_out_file)
206
207 try:
208 if not ph4_writer.write(ia_ph4): # output pharmacophore
209 sys.exit('Error: writing interaction pharmacophore of molecule %s failed' % mol_id)
210
211 except Exception as e: # handle exception raised in case of severe write errors
212 sys.exit('Error: writing interaction pharmacophore of molecule %s failed:\n%s' % (mol_id, str(e)))
213
214 except Exception as e: # handle exception raised in case of severe processing errors
215 sys.exit('Error: interaction pharmacophore generation for molecule %s failed:\n%s' % (mol_id, str(e)))
216
217 i += 1
218
219 except Exception as e: # handle exception raised in case of severe read errors
220 sys.exit('Error: reading molecule %s failed:\n%s' % (str(i), str(e)))
221
222 if not args.quiet:
223 print('Done!')
224
225 if args.ia_out_file:
226 ia_out_file.close()
227
228 ph4_writer.close()
229 sys.exit(0)
230
231if __name__ == '__main__':
232 main()