1.1.2.2. Rule-based Fragmentation

The script gen_mol_frags.py performs molecule fragmentation according to BRICS [2] or RECAP rules [1].

Synopsis

python gen_mol_frags.py [-h] -i <file> -o <file> -r <string> [-v <0|1|2>] [-m] [-b] [-x]

Mandatory options

-i <file>

Input molecule file

-o <file>

Fragment output file

-r <string>

Fragmentation rule set (BRICS or RECAP)

Other options

-h, --help

Show help message and exit

-v <0|1|2>

Verbosity level (default: 1; 0 -> no console output, 1 -> verbose, 2 -> extra verbose)

-m

Output the fragmented molecule before its fragments (default: false)

-b

Include bonds that were split during fragmentation in the obtained fragments (default: false)

-x

Label atoms with atom mapping IDs (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5
  6    
  7# splits a given input molecule into fragments using the provided Chem.FragmentGenerator instance implementing
  8# the fragmentation rules
  9def genFragments(frag_gen: Chem.FragmentGenerator, in_mol: Chem.Molecule, out_frags: Chem.FragmentList) -> None:
 10    Chem.initSubstructureSearchTarget(in_mol, False) # calculate required properties
 11
 12    frag_gen.generate(in_mol, out_frags)
 13
 14def getLogMessage(verb_level: int, mol_id: str, num_frags: int, frag_gen: Chem.FragmentGenerator) -> str:
 15    if verb_level == 0:
 16        return None
 17
 18    if num_frags <= 1:
 19        return  f'- Molecule {mol_id}: no fragmentation'
 20
 21    msg = f'- Molecule {mol_id}: {num_frags} fragments'
 22
 23    if verb_level < 2:
 24        return msg
 25
 26    msg += '\n  Cut bonds (Rule ID, Bond index, Atom#1 index, Atom#2 index, Fragment#1 index, Fragment#2 index):'
 27    
 28    # gather information about the bonds that were split during fragmentation
 29    for i in range(frag_gen.numFragmentLinks):
 30        frag_link = frag_gen.getFragmentLink(i)
 31        msg += (f'\n  {frag_link.ruleID}, {frag_link.bond.index}, {frag_link.bond.begin.index}, '
 32                f'{frag_link.bond.end.index}, {frag_link.fragment1Index}, {frag_link.fragment2Index}')
 33    
 34    return msg
 35
 36def parseArgs() -> argparse.Namespace:
 37    parser = argparse.ArgumentParser(description='Performs molecule fragmentation using BRICS or RECAP rules.')
 38
 39    parser.add_argument('-i',
 40                        dest='in_file',
 41                        required=True,
 42                        metavar='<file>',
 43                        help='Input molecule file')
 44    parser.add_argument('-o',
 45                        dest='out_file',
 46                        required=True,
 47                        metavar='<file>',
 48                        help='Fragment output file')
 49    parser.add_argument('-r',
 50                        dest='rule_set',
 51                        required=True,
 52                        metavar='<string>',
 53                        help='Fragmentation rule set (BRICS or RECAP)')
 54    parser.add_argument('-v',
 55                        dest='verb_level',
 56                        required=False,
 57                        metavar='<0|1|2>',
 58                        choices=range(0, 3),
 59                        default=1,
 60                        help='Verbosity level (default: 1; 0 -> no console output, 1 -> verbose, 2 -> extra verbose)',
 61                        type=int)
 62    parser.add_argument('-m',
 63                        dest='output_mol',
 64                        required=False,
 65                        action='store_true',
 66                        default=False,
 67                        help='Output the fragmented molecule before its fragments (default: false)')
 68    parser.add_argument('-b',
 69                        dest='inc_split_bonds',
 70                        required=False,
 71                        action='store_true',
 72                        default=False,
 73                        help='Include bonds that were split during fragmentation in the obtained fragments (default: false)')
 74    parser.add_argument('-x',
 75                        dest='atom_mpg',
 76                        required=False,
 77                        action='store_true',
 78                        default=False,
 79                        help='Label atoms with atom mapping IDs (default: false)')
 80
 81    return parser.parse_args()
 82
 83def main() -> None:
 84    args = parseArgs() # process command line arguments
 85    
 86    # create an instance of the class implementing the desired fragmentation rule set
 87    if args.rule_set.lower() == "brics":
 88        frag_gen = Chem.BRICSFragmentGenerator()
 89    elif args.rule_set.lower() == "recap":
 90        frag_gen = Chem.RECAPFragmentGenerator()
 91    else:
 92        sys.exit('Error: invalid fragmentation rule set')
 93
 94    frag_gen.includeSplitBonds(args.inc_split_bonds) # apply -b option
 95        
 96    # create reader for input molecules (format specified by file extension)
 97    reader = Chem.MoleculeReader(args.in_file) 
 98
 99    # create writer for output fragments (format specified by file extension)
100    writer = Chem.MolecularGraphWriter(args.out_file) 
101  
102    if args.atom_mpg: # enable atom mapping IDs for SMILES output
103        Chem.setSMILESMolOutputAtomMappingIDParameter(writer, True)
104    
105    # create instances of the default implementation of the Chem.Molecule interface for the input molecule
106    in_mol = Chem.BasicMolecule()
107
108    # create instances of the Chem.FragmentList data structure storing the generated fragments
109    out_frags = Chem.FragmentList()
110        
111    i = 1
112    
113    try:
114        # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
115        while reader.read(in_mol):
116            # compose a molecule identifier
117            mol_name = Chem.getName(in_mol).strip() 
118
119            if mol_name == '':
120                mol_name = f'Mol#{i}'
121                mol_id = f'#{i}'  # fallback if name is empty or not available
122            else:
123                mol_id = f'\'{mol_name}\' (#{i})'
124         
125            try:
126                # perform molecule fragmentation
127                genFragments(frag_gen, in_mol, out_frags) 
128
129                log_msg = getLogMessage(args.verb_level, mol_id, len(out_frags), frag_gen)
130
131                if log_msg:
132                    print(log_msg)
133
134                if args.atom_mpg: # label atoms with atom mapping IDs to see correspondences between fragment and parent molecule atoms
135                    for atom in in_mol.atoms:
136                        Chem.setAtomMappingID(atom, atom.getIndex() + 1)
137
138                try:
139                    if args.output_mol: # write input molecule before the generated fragments
140                        if not writer.write(in_mol):
141                            sys.exit(f'Error: writing molecule {mol_id} failed')
142
143                    for j in range(len(out_frags)):
144                        frag = out_frags[j]
145
146                        # set fragment name encoding the parent molecule and fragment number
147                        Chem.setName(frag, mol_name + f'_F#{j + 1}') 
148                        
149                        # calculate (if not present) some basic properties of the output fragments
150                        # that might be required for writing (output format dependent)
151                        Chem.perceiveSSSR(frag, False)
152                        Chem.perceiveComponents(frag, False)
153
154                        try:
155                            # write fragment
156                            if not writer.write(frag):
157                                sys.exit('Error: writing fragment of molecule {mol_id} failed')
158                        except Exception as e: # handle exception raised in case of severe write errors
159                             sys.exit(f'Error: writing fragment of molecule {mol_id} failed: {str(e)}')
160                             
161                except Exception as e: # handle exception raised in case of severe write errors
162                    sys.exit(f'Error: writing molecule {mol_id} failed: {str(e)}')
163                
164            except Exception as e: # handle exception raised in case of severe structure processing errors
165                sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
166
167            i += 1
168            
169    except Exception as e: # handle exception raised in case of severe read errors
170        sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
171
172    writer.close()
173    sys.exit(0)
174        
175if __name__ == '__main__':
176    main()

Download source file