1.1.2.3. Bemis-Murcko Framework Analysis

The script gen_bm_frags.py analyzes the framework of input molecules according to the rules established by Bemis and Murcko [17].

Synopsis

python gen_bm_frags.py [-h] -i <file> -o <file> [-v] [-m] [-H] [-f <true|false>] [-r] [-s] [-l] [-x] [-c]

Mandatory options

-i <file>

Input molecule file

-o <file>

Fragment output file

Other options

-h, --help

Show help message and exit

-v

Verbose output (default: false)

-m

Output the input molecule before the resulting fragments (default: false)

-H

Keep hydrogen atoms (default: false)

-f <true|false>

Output molecule frameworks (default: true)

-r

Output molecule ring systems (default: false)

-s

Output molecule side chains (default: false)

-l

Output ring system linkers (default: false)

-x

Label atoms with atom mapping IDs (default: false)

-c

Transform fragments into carbon skeletons (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5
  6    
  7# performs Bemis-Murcko framework analyis for a given input molecule
  8def bmAnalyze(molgraph: Chem.MolecularGraph, keep_hs: bool) -> Chem.BemisMurckoAnalyzer:
  9    Chem.calcBasicProperties(molgraph, False)  # calculate basic molecular properties (if not yet done)
 10    
 11    # create an instance of the class implementing the Bemis-Murcko analyis rules
 12    bm_analyzer = Chem.BemisMurckoAnalyzer()
 13
 14    # apply -H option
 15    bm_analyzer.stripHydrogens(not keep_hs)
 16
 17    # perform analysis
 18    bm_analyzer.analyze(molgraph)
 19
 20    return bm_analyzer
 21    
 22def getLogMessage(bm_analyzer: Chem.BemisMurckoAnalyzer, mol_id: str) -> str:
 23    msg =  f'- Molecule {mol_id}\n'
 24    msg += f'  Frameworks:   {bm_analyzer.getFrameworks().getSize()}\n'
 25    msg += f'  Ring Systems: {bm_analyzer.getRingSystems().getSize()}\n'
 26    msg += f'  Linkers:      {bm_analyzer.getLinkers().getSize()}\n'
 27    msg += f'  Side Chains:  {bm_analyzer.getSideChains().getSize()}'
 28     
 29    return msg
 30
 31def parseArgs() -> argparse.Namespace:
 32    def strtobool(value: str) -> bool:
 33        value = value.lower()
 34        
 35        if value in ("y", "yes", "on", "1", "true", "t"):
 36            return True
 37        
 38        return False
 39    
 40    parser = argparse.ArgumentParser(description='Analyzes the framework of given input molecules according to the rules established by Bemis and Murcko.')
 41
 42    parser.add_argument('-i',
 43                        dest='in_file',
 44                        required=True,
 45                        metavar='<file>',
 46                        help='Input molecule file')
 47    parser.add_argument('-o',
 48                        dest='out_file',
 49                        required=True,
 50                        metavar='<file>',
 51                        help='Fragment output file')
 52    parser.add_argument('-v',
 53                        dest='verbose',
 54                        required=False,
 55                        action='store_true',
 56                        default=False,
 57                        help='Verbose output (default: false)')
 58    parser.add_argument('-m',
 59                        dest='output_mol',
 60                        required=False,
 61                        action='store_true',
 62                        default=False,
 63                        help='Output the input molecule before the resulting fragments (default: false)')
 64    parser.add_argument('-H',
 65                        dest='keep_hs',
 66                        required=False,
 67                        action='store_true',
 68                        default=False,
 69                        help='Keep hydrogen atoms (default: false)')
 70    parser.add_argument('-f',
 71                        dest='output_fws',
 72                        required=False,
 73                        metavar='<true|false>',
 74                        type=lambda x:bool(strtobool(x)),
 75                        default=True,
 76                        help='Output molecule frameworks (default: true)')
 77    parser.add_argument('-r',
 78                        dest='output_rsys',
 79                        required=False,
 80                        action='store_true',
 81                        default=False,
 82                        help='Output molecule ring systems (default: false)')
 83    parser.add_argument('-s',
 84                        dest='output_schns',
 85                        required=False,
 86                        action='store_true',
 87                        default=False,
 88                        help='Output molecule side chains (default: false)')
 89    parser.add_argument('-l',
 90                        dest='output_lnks',
 91                        required=False,
 92                        action='store_true',
 93                        default=False,
 94                        help='Output ring system linkers (default: false)')
 95    parser.add_argument('-x',
 96                        dest='atom_mpg',
 97                        required=False,
 98                        action='store_true',
 99                        default=False,
100                        help='Label atoms with atom mapping IDs (default: false)')
101    parser.add_argument('-c',
102                        dest='carbonize',
103                        required=False,
104                        action='store_true',
105                        default=False,
106                        help='Transform fragments into carbon skeletons (default: false)')
107   
108    return parser.parse_args()
109
110# converts all atoms into carbons and equalizes other basic atom/bond properties
111def carbonize(molgraph: Chem.MolecularGraph) -> None:
112    for atom in molgraph.atoms:
113        Chem.setSymbol(atom, 'C')
114        Chem.setType(atom, Chem.AtomType.C)
115        Chem.clearFormalCharge(atom)
116        Chem.setImplicitHydrogenCount(atom, 0)
117        Chem.setAromaticityFlag(atom, False)
118        Chem.setHybridizationState(atom, Chem.HybridizationState.UNKNOWN)
119        Chem.clearStereoDescriptor(atom)
120        Chem.clear3DCoordinates(atom)
121        Chem.clear3DCoordinatesArray(atom)
122
123    for bond in molgraph.bonds:
124        Chem.setOrder(bond, 1)
125        Chem.setAromaticityFlag(bond, False)
126        Chem.clearStereoDescriptor(bond)
127        Chem.set2DStereoFlag(bond, Chem.BondStereoFlag.PLAIN)
128        
129def outputFragments(frags: Chem.FragmentList, writer: Chem.MolecularGraphWriter, mol_name: str, mol_id: str, frag_type: str) -> None:
130    for i in range(len(frags)):
131        frag = frags[i]
132
133        # set fragment name encoding the parent molecule and fragment number
134        Chem.setName(frag, mol_name + f'_{frag_type}#{i + 1}') 
135                        
136        # calculate (if not present) some basic properties of the output fragments
137        # that might be required for writing (output format dependent)
138        Chem.perceiveSSSR(frag, False)
139        Chem.perceiveComponents(frag, False)
140
141        try:
142            # write fragment
143            if not writer.write(frag):
144                sys.exit('Error: writing fragment of molecule {mol_id} failed')
145
146        except Exception as e: # handle exception raised in case of severe write errors
147            sys.exit(f'Error: writing fragment of molecule {mol_id} failed: {str(e)}')  
148
149def main() -> None:
150    args = parseArgs() # process command line arguments
151        
152    # create reader for input molecules (format specified by file extension)
153    reader = Chem.MoleculeReader(args.in_file) 
154
155    # create writer for output fragments (format specified by file extension)
156    writer = Chem.MolecularGraphWriter(args.out_file) 
157  
158    if args.atom_mpg: # enable atom mapping IDs for SMILES output
159        Chem.setSMILESMolOutputAtomMappingIDParameter(writer, True)
160
161    # write canonical SMILES
162    Chem.setSMILESOutputCanonicalFormParameter(writer, True)
163    
164    # create instances of the default implementation of the Chem.Molecule interface for the input molecule
165    in_mol = Chem.BasicMolecule()
166    i = 1
167    
168    try:
169        # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
170        while reader.read(in_mol):
171            # compose a molecule identifier
172            mol_name = Chem.getName(in_mol).strip() 
173
174            if mol_name == '':
175                mol_name = f'Mol#{i}'
176                mol_id = f'#{i}'  # fallback if name is empty or not available
177            else:
178                mol_id = f'\'{mol_name}\' (#{i})'
179         
180            try:
181                # perform Bemis-Murcko analysis of in_mol
182                bm_analyzer = bmAnalyze(in_mol, args.keep_hs) 
183
184                if args.verbose:
185                    print(getLogMessage(bm_analyzer, mol_id))
186
187                # label atoms with atom mapping IDs to see correspondences between fragment and parent molecule atoms
188                if args.atom_mpg: 
189                    for atom in in_mol.atoms:
190                        Chem.setAtomMappingID(atom, atom.getIndex() + 1)
191
192                try:
193                    if args.output_mol: # write input molecule before the obtained fragments
194                        if not writer.write(in_mol):
195                            sys.exit(f'Error: writing molecule {mol_id} failed')
196
197                    if args.carbonize:  # convert all atoms into carbons and equalize other basic atom/bond properties
198                        carbonize(in_mol)
199
200                    if args.output_fws:
201                        outputFragments(bm_analyzer.getFrameworks(), writer, mol_name, mol_id, 'F')
202
203                    if args.output_rsys:
204                        outputFragments(bm_analyzer.getRingSystems(), writer, mol_name, mol_id, 'R')
205
206                    if args.output_lnks:
207                        outputFragments(bm_analyzer.getLinkers(), writer, mol_name, mol_id, 'L')
208                     
209                    if args.output_schns:
210                        outputFragments(bm_analyzer.getSideChains(), writer, mol_name, mol_id, 'S')
211                             
212                except Exception as e: # handle exception raised in case of severe write errors
213                    sys.exit(f'Error: writing molecule {mol_id} failed: {str(e)}')
214                
215            except Exception as e: # handle exception raised in case of severe structure processing errors
216                sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
217
218            i += 1
219            
220    except Exception as e: # handle exception raised in case of severe read errors
221        sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
222
223    writer.close()
224    sys.exit(0)
225        
226if __name__ == '__main__':
227    main()

Download source file