3.2.1. Functional Groups
The script count_func_groups.py perceives the functional groups of molecules read from a specified input file using Ertl’s algorithm [18] and outputs the abundances of the detected functional groups as a CSV-file.
Synopsis
python count_func_groups.py [-h] -i <file> -o <file> [-v <0|1|2|3>] [-m]
Mandatory options
- -i <file>
Input molecule file
- -o <file>
Functional group abundance output file
Other options
- -h, --help
Show help message and exit
- -v <0|1|2|3>
Verbosity level (default: 1; 0 -> no console output, 1 -> info, 2 -> verbose, 3 -> extra verbose)
- -m
Consider all possible types of aromatic nitrogens as pyridine nitrogen (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.MolProp as MolProp
6
7
8# extracts all functional groups of a given input molecule using the provided MolProp.FunctionalGroupsList instance
9# and updates their global counts
10def extractFunctionalGroups(func_grp_list: MolProp.FunctionalGroupList, in_mol: Chem.Molecule, func_grp_counts: dict, merge_aro_n: bool) -> int:
11 Chem.calcBasicProperties(in_mol, False) # calculate required properties
12
13 func_grp_list.extract(in_mol) # perceive and extract functional groups of the molecule
14
15 for fg in func_grp_list: # iterate over the functional groups
16 name = Chem.getName(fg)
17
18 # if requested, regard all possible types of arom. nitrogens as pyridine N
19 if merge_aro_n and (name == '[n+]' or name == 'n[R]'):
20 name = 'n' # change to pyridine nitrogen type
21 Chem.setName(fg, name) # save modified name for later use
22
23 if name in func_grp_counts: # update the global number of times this functional group was encountered
24 func_grp_counts[name] += 1
25 else:
26 func_grp_counts[name] = 1
27
28 return len(func_grp_list)
29
30def getLogMessage(verb_level: int, mol_id: str, func_grp_list: MolProp.FunctionalGroupList) -> str:
31 if verb_level <= 1:
32 return None
33
34 if len(func_grp_list) == 0:
35 return f'- Molecule {mol_id}: no functional groups'
36
37 if verb_level == 2:
38 return f'- Molecule {mol_id}: {len(func_grp_list)} functional group(s)'
39
40 fg_names = [Chem.getName(fg) for fg in func_grp_list].__str__().replace('[', '').replace(']', '').replace('\'', '')
41
42 return f'- Molecule {mol_id}: {fg_names}'
43
44
45def parseArgs() -> argparse.Namespace:
46 parser = argparse.ArgumentParser(description='Perceives the functional groups of molecules read from a specified \
47 input file and outputs the abundances as a CSV-file.')
48
49 parser.add_argument('-i',
50 dest='in_file',
51 required=True,
52 metavar='<file>',
53 help='Input molecule file')
54 parser.add_argument('-o',
55 dest='out_file',
56 required=True,
57 metavar='<file>',
58 help='Functional group abundance output file')
59 parser.add_argument('-v',
60 dest='verb_level',
61 required=False,
62 metavar='<0|1|2|3>',
63 choices=range(0, 4),
64 default=1,
65 help='Verbosity level (default: 1; 0 -> no console output, 1 -> info, 2 -> verbose, 3 -> extra verbose)',
66 type=int)
67 parser.add_argument('-m',
68 dest='merge_arom_n',
69 required=False,
70 action='store_true',
71 default=False,
72 help='Consider all possible types of aromatic nitrogens as pyridine nitrogen (default: false)')
73
74 return parser.parse_args()
75
76def main() -> None:
77 args = parseArgs() # process command line arguments
78
79 # create instance of the class implementing Ertl's alorithm for functional group perception
80 func_grp_list = MolProp.FunctionalGroupList()
81
82 # create reader for input molecules (format specified by file extension)
83 reader = Chem.MoleculeReader(args.in_file)
84
85 # create instances of the default implementation of the Chem.Molecule interface for the input molecule
86 in_mol = Chem.BasicMolecule()
87
88 i = 0
89 tot_func_grp_count = 0
90 func_grp_counts = {}
91
92 try:
93 # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
94 while reader.read(in_mol):
95 # compose a molecule identifier
96 mol_id = Chem.getName(in_mol).strip()
97
98 if mol_id == '':
99 mol_id = f'#{i + 1}' # fallback if name is empty or not available
100 else:
101 mol_id = f'\'{mol_id}\' (#{i + 1})'
102
103 try:
104 # extract the functional groups of the read molecule
105 tot_func_grp_count += extractFunctionalGroups(func_grp_list, in_mol, func_grp_counts, args.merge_arom_n)
106
107 # generate log message
108 log_msg = getLogMessage(args.verb_level, mol_id, func_grp_list)
109
110 if log_msg:
111 print(log_msg)
112
113 except Exception as e: # handle exception raised in case of severe structure processing errors
114 sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
115
116 i += 1
117
118 except Exception as e: # handle exception raised in case of severe read errors
119 sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
120
121 with open(args.out_file, 'w') as out_file:
122 out_file.write('Functional Group,Count,Percentage\n')
123
124 for count, name in sorted( ((v,k) for k,v in func_grp_counts.items()), reverse=True):
125 out_file.write(f'{name},{count},{count / tot_func_grp_count * 100.0:.3f}\n')
126
127 if args.verb_level >= 1:
128 print(f'Processed {i} molecules(s)')
129 print(f'Found {tot_func_grp_count} functional group(s)')
130
131 sys.exit(0)
132
133if __name__ == '__main__':
134 main()