1.1.2.2. Rule-based Fragmentation
The script gen_mol_frags.py performs molecule fragmentation according to BRICS [2] or RECAP rules [1].
Synopsis
python gen_mol_frags.py [-h] -i <file> -o <file> -r <string> [-v <0|1|2>] [-m] [-b] [-x]
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Fragment output file
- -r <string>
Fragmentation rule set (BRICS or RECAP)
Other options
- -h, --help
Show help message and exit
- -v <0|1|2>
Verbosity level (default: 1; 0 -> no console output, 1 -> verbose, 2 -> extra verbose)
- -m
Output the fragmented molecule before its fragments (default: false)
- -b
Include bonds that were split during fragmentation in the obtained fragments (default: false)
- -x
Label atoms with atom mapping IDs (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5
6
7# splits a given input molecule into fragments using the provided Chem.FragmentGenerator instance implementing
8# the fragmentation rules
9def genFragments(frag_gen: Chem.FragmentGenerator, in_mol: Chem.Molecule, out_frags: Chem.FragmentList) -> None:
10 Chem.initSubstructureSearchTarget(in_mol, False) # calculate required properties
11
12 frag_gen.generate(in_mol, out_frags)
13
14def getLogMessage(verb_level: int, mol_id: str, num_frags: int, frag_gen: Chem.FragmentGenerator) -> str:
15 if verb_level == 0:
16 return None
17
18 if num_frags <= 1:
19 return f'- Molecule {mol_id}: no fragmentation'
20
21 msg = f'- Molecule {mol_id}: {num_frags} fragments'
22
23 if verb_level < 2:
24 return msg
25
26 msg += '\n Cut bonds (Rule ID, Bond index, Atom#1 index, Atom#2 index, Fragment#1 index, Fragment#2 index):'
27
28 # gather information about the bonds that were split during fragmentation
29 for i in range(frag_gen.numFragmentLinks):
30 frag_link = frag_gen.getFragmentLink(i)
31 msg += (f'\n {frag_link.ruleID}, {frag_link.bond.index}, {frag_link.bond.begin.index}, '
32 f'{frag_link.bond.end.index}, {frag_link.fragment1Index}, {frag_link.fragment2Index}')
33
34 return msg
35
36def parseArgs() -> argparse.Namespace:
37 parser = argparse.ArgumentParser(description='Performs molecule fragmentation using BRICS or RECAP rules.')
38
39 parser.add_argument('-i',
40 dest='in_file',
41 required=True,
42 metavar='<file>',
43 help='Input molecule file')
44 parser.add_argument('-o',
45 dest='out_file',
46 required=True,
47 metavar='<file>',
48 help='Fragment output file')
49 parser.add_argument('-r',
50 dest='rule_set',
51 required=True,
52 metavar='<string>',
53 help='Fragmentation rule set (BRICS or RECAP)')
54 parser.add_argument('-v',
55 dest='verb_level',
56 required=False,
57 metavar='<0|1|2>',
58 choices=range(0, 3),
59 default=1,
60 help='Verbosity level (default: 1; 0 -> no console output, 1 -> verbose, 2 -> extra verbose)',
61 type=int)
62 parser.add_argument('-m',
63 dest='output_mol',
64 required=False,
65 action='store_true',
66 default=False,
67 help='Output the fragmented molecule before its fragments (default: false)')
68 parser.add_argument('-b',
69 dest='inc_split_bonds',
70 required=False,
71 action='store_true',
72 default=False,
73 help='Include bonds that were split during fragmentation in the obtained fragments (default: false)')
74 parser.add_argument('-x',
75 dest='atom_mpg',
76 required=False,
77 action='store_true',
78 default=False,
79 help='Label atoms with atom mapping IDs (default: false)')
80
81 return parser.parse_args()
82
83def main() -> None:
84 args = parseArgs() # process command line arguments
85
86 # create an instance of the class implementing the desired fragmentation rule set
87 if args.rule_set.lower() == "brics":
88 frag_gen = Chem.BRICSFragmentGenerator()
89 elif args.rule_set.lower() == "recap":
90 frag_gen = Chem.RECAPFragmentGenerator()
91 else:
92 sys.exit('Error: invalid fragmentation rule set')
93
94 frag_gen.includeSplitBonds(args.inc_split_bonds) # apply -b option
95
96 # create reader for input molecules (format specified by file extension)
97 reader = Chem.MoleculeReader(args.in_file)
98
99 # create writer for output fragments (format specified by file extension)
100 writer = Chem.MolecularGraphWriter(args.out_file)
101
102 if args.atom_mpg: # enable atom mapping IDs for SMILES output
103 Chem.setSMILESMolOutputAtomMappingIDParameter(writer, True)
104
105 # create instances of the default implementation of the Chem.Molecule interface for the input molecule
106 in_mol = Chem.BasicMolecule()
107
108 # create instances of the Chem.FragmentList data structure storing the generated fragments
109 out_frags = Chem.FragmentList()
110
111 i = 1
112
113 try:
114 # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
115 while reader.read(in_mol):
116 # compose a molecule identifier
117 mol_name = Chem.getName(in_mol).strip()
118
119 if mol_name == '':
120 mol_name = f'Mol#{i}'
121 mol_id = f'#{i}' # fallback if name is empty or not available
122 else:
123 mol_id = f'\'{mol_name}\' (#{i})'
124
125 try:
126 # perform molecule fragmentation
127 genFragments(frag_gen, in_mol, out_frags)
128
129 log_msg = getLogMessage(args.verb_level, mol_id, len(out_frags), frag_gen)
130
131 if log_msg:
132 print(log_msg)
133
134 if args.atom_mpg: # label atoms with atom mapping IDs to see correspondences between fragment and parent molecule atoms
135 for atom in in_mol.atoms:
136 Chem.setAtomMappingID(atom, atom.getIndex() + 1)
137
138 try:
139 if args.output_mol: # write input molecule before the generated fragments
140 if not writer.write(in_mol):
141 sys.exit(f'Error: writing molecule {mol_id} failed')
142
143 for j in range(len(out_frags)):
144 frag = out_frags[j]
145
146 # set fragment name encoding the parent molecule and fragment number
147 Chem.setName(frag, mol_name + f'_F#{j + 1}')
148
149 # calculate (if not present) some basic properties of the output fragments
150 # that might be required for writing (output format dependent)
151 Chem.perceiveSSSR(frag, False)
152 Chem.perceiveComponents(frag, False)
153
154 try:
155 # write fragment
156 if not writer.write(frag):
157 sys.exit('Error: writing fragment of molecule {mol_id} failed')
158 except Exception as e: # handle exception raised in case of severe write errors
159 sys.exit(f'Error: writing fragment of molecule {mol_id} failed: {str(e)}')
160
161 except Exception as e: # handle exception raised in case of severe write errors
162 sys.exit(f'Error: writing molecule {mol_id} failed: {str(e)}')
163
164 except Exception as e: # handle exception raised in case of severe structure processing errors
165 sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
166
167 i += 1
168
169 except Exception as e: # handle exception raised in case of severe read errors
170 sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
171
172 writer.close()
173 sys.exit(0)
174
175if __name__ == '__main__':
176 main()