1.1.2.3. Bemis-Murcko Framework Analysis
The script gen_bm_frags.py analyzes the framework of input molecules according to the rules established by Bemis and Murcko [17].
Synopsis
python gen_bm_frags.py [-h] -i <file> -o <file> [-v] [-m] [-H] [-f <true|false>] [-r] [-s] [-l] [-x] [-c]
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Fragment output file
Other options
- -h, --help
Show help message and exit
- -v
Verbose output (default: false)
- -m
Output the input molecule before the resulting fragments (default: false)
- -H
Keep hydrogen atoms (default: false)
- -f <true|false>
Output molecule frameworks (default: true)
- -r
Output molecule ring systems (default: false)
- -s
Output molecule side chains (default: false)
- -l
Output ring system linkers (default: false)
- -x
Label atoms with atom mapping IDs (default: false)
- -c
Transform fragments into carbon skeletons (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5
6
7# performs Bemis-Murcko framework analyis for a given input molecule
8def bmAnalyze(molgraph: Chem.MolecularGraph, keep_hs: bool) -> Chem.BemisMurckoAnalyzer:
9 Chem.calcBasicProperties(molgraph, False) # calculate basic molecular properties (if not yet done)
10
11 # create an instance of the class implementing the Bemis-Murcko analyis rules
12 bm_analyzer = Chem.BemisMurckoAnalyzer()
13
14 # apply -H option
15 bm_analyzer.stripHydrogens(not keep_hs)
16
17 # perform analysis
18 bm_analyzer.analyze(molgraph)
19
20 return bm_analyzer
21
22def getLogMessage(bm_analyzer: Chem.BemisMurckoAnalyzer, mol_id: str) -> str:
23 msg = f'- Molecule {mol_id}\n'
24 msg += f' Frameworks: {bm_analyzer.getFrameworks().getSize()}\n'
25 msg += f' Ring Systems: {bm_analyzer.getRingSystems().getSize()}\n'
26 msg += f' Linkers: {bm_analyzer.getLinkers().getSize()}\n'
27 msg += f' Side Chains: {bm_analyzer.getSideChains().getSize()}'
28
29 return msg
30
31def parseArgs() -> argparse.Namespace:
32 def strtobool(value: str) -> bool:
33 value = value.lower()
34
35 if value in ("y", "yes", "on", "1", "true", "t"):
36 return True
37
38 return False
39
40 parser = argparse.ArgumentParser(description='Analyzes the framework of given input molecules according to the rules established by Bemis and Murcko.')
41
42 parser.add_argument('-i',
43 dest='in_file',
44 required=True,
45 metavar='<file>',
46 help='Input molecule file')
47 parser.add_argument('-o',
48 dest='out_file',
49 required=True,
50 metavar='<file>',
51 help='Fragment output file')
52 parser.add_argument('-v',
53 dest='verbose',
54 required=False,
55 action='store_true',
56 default=False,
57 help='Verbose output (default: false)')
58 parser.add_argument('-m',
59 dest='output_mol',
60 required=False,
61 action='store_true',
62 default=False,
63 help='Output the input molecule before the resulting fragments (default: false)')
64 parser.add_argument('-H',
65 dest='keep_hs',
66 required=False,
67 action='store_true',
68 default=False,
69 help='Keep hydrogen atoms (default: false)')
70 parser.add_argument('-f',
71 dest='output_fws',
72 required=False,
73 metavar='<true|false>',
74 type=lambda x:bool(strtobool(x)),
75 default=True,
76 help='Output molecule frameworks (default: true)')
77 parser.add_argument('-r',
78 dest='output_rsys',
79 required=False,
80 action='store_true',
81 default=False,
82 help='Output molecule ring systems (default: false)')
83 parser.add_argument('-s',
84 dest='output_schns',
85 required=False,
86 action='store_true',
87 default=False,
88 help='Output molecule side chains (default: false)')
89 parser.add_argument('-l',
90 dest='output_lnks',
91 required=False,
92 action='store_true',
93 default=False,
94 help='Output ring system linkers (default: false)')
95 parser.add_argument('-x',
96 dest='atom_mpg',
97 required=False,
98 action='store_true',
99 default=False,
100 help='Label atoms with atom mapping IDs (default: false)')
101 parser.add_argument('-c',
102 dest='carbonize',
103 required=False,
104 action='store_true',
105 default=False,
106 help='Transform fragments into carbon skeletons (default: false)')
107
108 return parser.parse_args()
109
110# converts all atoms into carbons and equalizes other basic atom/bond properties
111def carbonize(molgraph: Chem.MolecularGraph) -> None:
112 for atom in molgraph.atoms:
113 Chem.setSymbol(atom, 'C')
114 Chem.setType(atom, Chem.AtomType.C)
115 Chem.clearFormalCharge(atom)
116 Chem.setImplicitHydrogenCount(atom, 0)
117 Chem.setAromaticityFlag(atom, False)
118 Chem.setHybridizationState(atom, Chem.HybridizationState.UNKNOWN)
119 Chem.clearStereoDescriptor(atom)
120 Chem.clear3DCoordinates(atom)
121 Chem.clear3DCoordinatesArray(atom)
122
123 for bond in molgraph.bonds:
124 Chem.setOrder(bond, 1)
125 Chem.setAromaticityFlag(bond, False)
126 Chem.clearStereoDescriptor(bond)
127 Chem.set2DStereoFlag(bond, Chem.BondStereoFlag.PLAIN)
128
129def outputFragments(frags: Chem.FragmentList, writer: Chem.MolecularGraphWriter, mol_name: str, mol_id: str, frag_type: str) -> None:
130 for i in range(len(frags)):
131 frag = frags[i]
132
133 # set fragment name encoding the parent molecule and fragment number
134 Chem.setName(frag, mol_name + f'_{frag_type}#{i + 1}')
135
136 # calculate (if not present) some basic properties of the output fragments
137 # that might be required for writing (output format dependent)
138 Chem.perceiveSSSR(frag, False)
139 Chem.perceiveComponents(frag, False)
140
141 try:
142 # write fragment
143 if not writer.write(frag):
144 sys.exit('Error: writing fragment of molecule {mol_id} failed')
145
146 except Exception as e: # handle exception raised in case of severe write errors
147 sys.exit(f'Error: writing fragment of molecule {mol_id} failed: {str(e)}')
148
149def main() -> None:
150 args = parseArgs() # process command line arguments
151
152 # create reader for input molecules (format specified by file extension)
153 reader = Chem.MoleculeReader(args.in_file)
154
155 # create writer for output fragments (format specified by file extension)
156 writer = Chem.MolecularGraphWriter(args.out_file)
157
158 if args.atom_mpg: # enable atom mapping IDs for SMILES output
159 Chem.setSMILESMolOutputAtomMappingIDParameter(writer, True)
160
161 # write canonical SMILES
162 Chem.setSMILESOutputCanonicalFormParameter(writer, True)
163
164 # create instances of the default implementation of the Chem.Molecule interface for the input molecule
165 in_mol = Chem.BasicMolecule()
166 i = 1
167
168 try:
169 # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
170 while reader.read(in_mol):
171 # compose a molecule identifier
172 mol_name = Chem.getName(in_mol).strip()
173
174 if mol_name == '':
175 mol_name = f'Mol#{i}'
176 mol_id = f'#{i}' # fallback if name is empty or not available
177 else:
178 mol_id = f'\'{mol_name}\' (#{i})'
179
180 try:
181 # perform Bemis-Murcko analysis of in_mol
182 bm_analyzer = bmAnalyze(in_mol, args.keep_hs)
183
184 if args.verbose:
185 print(getLogMessage(bm_analyzer, mol_id))
186
187 # label atoms with atom mapping IDs to see correspondences between fragment and parent molecule atoms
188 if args.atom_mpg:
189 for atom in in_mol.atoms:
190 Chem.setAtomMappingID(atom, atom.getIndex() + 1)
191
192 try:
193 if args.output_mol: # write input molecule before the obtained fragments
194 if not writer.write(in_mol):
195 sys.exit(f'Error: writing molecule {mol_id} failed')
196
197 if args.carbonize: # convert all atoms into carbons and equalize other basic atom/bond properties
198 carbonize(in_mol)
199
200 if args.output_fws:
201 outputFragments(bm_analyzer.getFrameworks(), writer, mol_name, mol_id, 'F')
202
203 if args.output_rsys:
204 outputFragments(bm_analyzer.getRingSystems(), writer, mol_name, mol_id, 'R')
205
206 if args.output_lnks:
207 outputFragments(bm_analyzer.getLinkers(), writer, mol_name, mol_id, 'L')
208
209 if args.output_schns:
210 outputFragments(bm_analyzer.getSideChains(), writer, mol_name, mol_id, 'S')
211
212 except Exception as e: # handle exception raised in case of severe write errors
213 sys.exit(f'Error: writing molecule {mol_id} failed: {str(e)}')
214
215 except Exception as e: # handle exception raised in case of severe structure processing errors
216 sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
217
218 i += 1
219
220 except Exception as e: # handle exception raised in case of severe read errors
221 sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
222
223 writer.close()
224 sys.exit(0)
225
226if __name__ == '__main__':
227 main()