3.2.1. Functional Groups

The script count_func_groups.py perceives the functional groups of molecules read from a specified input file using Ertl’s algorithm [18] and outputs the abundances of the detected functional groups as a CSV-file.

Synopsis

python count_func_groups.py [-h] -i <file> -o <file> [-v <0|1|2|3>] [-m]

Mandatory options

-i <file>

Input molecule file

-o <file>

Functional group abundance output file

Other options

-h, --help

Show help message and exit

-v <0|1|2|3>

Verbosity level (default: 1; 0 -> no console output, 1 -> info, 2 -> verbose, 3 -> extra verbose)

-m

Consider all possible types of aromatic nitrogens as pyridine nitrogen (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.MolProp as MolProp
  6
  7    
  8# extracts all functional groups of a given input molecule using the provided MolProp.FunctionalGroupsList instance
  9# and updates their global counts
 10def extractFunctionalGroups(func_grp_list: MolProp.FunctionalGroupList, in_mol: Chem.Molecule, func_grp_counts: dict, merge_aro_n: bool) -> int:
 11    Chem.calcBasicProperties(in_mol, False) # calculate required properties 
 12
 13    func_grp_list.extract(in_mol) # perceive and extract functional groups of the molecule
 14
 15    for fg in func_grp_list:      # iterate over the functional groups
 16        name = Chem.getName(fg)
 17
 18        # if requested, regard all possible types of arom. nitrogens as pyridine N
 19        if merge_aro_n and (name == '[n+]' or name == 'n[R]'):
 20            name = 'n'              # change to pyridine nitrogen type
 21            Chem.setName(fg, name)  # save modified name for later use
 22
 23        if name in func_grp_counts: # update the global number of times this functional group was encountered
 24            func_grp_counts[name] += 1
 25        else:
 26            func_grp_counts[name] = 1
 27    
 28    return len(func_grp_list)
 29    
 30def getLogMessage(verb_level: int, mol_id: str, func_grp_list: MolProp.FunctionalGroupList) -> str:
 31    if verb_level <= 1:
 32        return None
 33
 34    if len(func_grp_list) == 0:
 35        return  f'- Molecule {mol_id}: no functional groups'
 36    
 37    if verb_level == 2:
 38        return f'- Molecule {mol_id}: {len(func_grp_list)} functional group(s)'
 39
 40    fg_names = [Chem.getName(fg) for fg in func_grp_list].__str__().replace('[', '').replace(']', '').replace('\'', '')
 41    
 42    return f'- Molecule {mol_id}: {fg_names}'
 43
 44    
 45def parseArgs() -> argparse.Namespace:
 46    parser = argparse.ArgumentParser(description='Perceives the functional groups of molecules read from a specified \
 47    input file and outputs the abundances as a CSV-file.')
 48
 49    parser.add_argument('-i',
 50                        dest='in_file',
 51                        required=True,
 52                        metavar='<file>',
 53                        help='Input molecule file')
 54    parser.add_argument('-o',
 55                        dest='out_file',
 56                        required=True,
 57                        metavar='<file>',
 58                        help='Functional group abundance output file')
 59    parser.add_argument('-v',
 60                        dest='verb_level',
 61                        required=False,
 62                        metavar='<0|1|2|3>',
 63                        choices=range(0, 4),
 64                        default=1,
 65                        help='Verbosity level (default: 1; 0 -> no console output, 1 -> info, 2 -> verbose, 3 -> extra verbose)',
 66                        type=int)
 67    parser.add_argument('-m',
 68                        dest='merge_arom_n',
 69                        required=False,
 70                        action='store_true',
 71                        default=False,
 72                        help='Consider all possible types of aromatic nitrogens as pyridine nitrogen (default: false)')
 73
 74    return parser.parse_args()
 75
 76def main() -> None:
 77    args = parseArgs() # process command line arguments
 78    
 79    # create instance of the class implementing Ertl's alorithm for functional group perception
 80    func_grp_list = MolProp.FunctionalGroupList()
 81
 82    # create reader for input molecules (format specified by file extension)
 83    reader = Chem.MoleculeReader(args.in_file) 
 84    
 85    # create instances of the default implementation of the Chem.Molecule interface for the input molecule
 86    in_mol = Chem.BasicMolecule()
 87        
 88    i = 0
 89    tot_func_grp_count = 0
 90    func_grp_counts = {}
 91
 92    try:
 93        # read and process molecules one after the other until the end of input has been reached (or a severe error occurs)
 94        while reader.read(in_mol):
 95            # compose a molecule identifier
 96            mol_id = Chem.getName(in_mol).strip() 
 97
 98            if mol_id == '':
 99                mol_id = f'#{i + 1}'  # fallback if name is empty or not available
100            else:
101                mol_id = f'\'{mol_id}\' (#{i + 1})'
102         
103            try:
104                # extract the functional groups of the read molecule
105                tot_func_grp_count += extractFunctionalGroups(func_grp_list, in_mol, func_grp_counts, args.merge_arom_n) 
106
107                # generate log message
108                log_msg = getLogMessage(args.verb_level, mol_id, func_grp_list)
109
110                if log_msg:
111                    print(log_msg)
112                
113            except Exception as e: # handle exception raised in case of severe structure processing errors
114                sys.exit(f'Error: processing of molecule {mol_id} failed: {str(e)}')
115
116            i += 1
117            
118    except Exception as e: # handle exception raised in case of severe read errors
119        sys.exit(f'Error: reading of molecule {mol_id} failed: {str(e)}')
120
121    with open(args.out_file, 'w') as out_file:
122        out_file.write('Functional Group,Count,Percentage\n')
123
124        for count, name in sorted( ((v,k) for k,v in func_grp_counts.items()), reverse=True):
125            out_file.write(f'{name},{count},{count / tot_func_grp_count * 100.0:.3f}\n')
126
127    if args.verb_level >= 1:
128        print(f'Processed {i} molecules(s)')
129        print(f'Found {tot_func_grp_count} functional group(s)')
130            
131    sys.exit(0)
132        
133if __name__ == '__main__':
134    main()

Download source file