1.1.4.1. Testing for the Presence of Substructures

 1import sys
 2import argparse
 3
 4import CDPL.Chem as Chem
 5
 6
 7def filterMolecules() -> None:
 8    args = parseArgs()
 9    
10    # create reader for input molecules (format specified by file extension)
11    reader = Chem.MoleculeReader(args.in_file) 
12
13    # create writer for the output of matching molecules (format specified by file extension)
14    writer = Chem.MolecularGraphWriter(args.out_file) 
15
16    # parse the substructure SMARTS pattern
17    try:
18        sub_srch_ptn = Chem.parseSMARTS(args.smarts_ptn)
19    except Exception as e:
20        sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
21
22    # create and initialize an instance of the class Chem.SubstructureSearch that
23    # implements the substructure searching algorithm
24    substr_srch = Chem.SubstructureSearch(sub_srch_ptn)
25
26    # create an instance of the default implementation of the Chem.Molecule interface
27    mol = Chem.BasicMolecule()
28    i = 1
29    
30    # read and process molecules one after the other until the end of input has been reached
31    try:
32        while reader.read(mol):
33            # compose a simple molecule identifier
34            mol_id = Chem.getName(mol).strip() 
35
36            if mol_id == '':
37                mol_id = '#' + str(i) # fallback if name is empty
38            else:
39                mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
40
41            if not args.quiet:
42                print('- Searching for a matching substructure in molecule %s...' % mol_id)
43
44            try:
45                Chem.initSubstructureSearchTarget(mol, False)
46
47                if substr_srch.mappingExists(mol):
48                    if not args.quiet:
49                        print(' -> substructure found, forwarding molecule to output file')
50
51                    # output the matched molecule                 
52                    if not writer.write(mol):   
53                        sys.exit('Error: output of molecule failed')
54
55                elif not args.quiet: 
56                    print(' -> substructure not found')
57                        
58            except Exception as e:
59                sys.exit('Error: substructure search or output of molecule %s failed: %s' % (mol_id, str(e)))
60
61            i += 1
62                
63    except Exception as e: # handle exception raised in case of severe read errors
64        sys.exit('Error: reading molecule failed: ' + str(e))
65
66    writer.close()
67    sys.exit(0)
68        
69def parseArgs() -> argparse.Namespace:
70    parser = argparse.ArgumentParser(description='Writes input molecules that match the specified SMARTS substructure pattern to an output file.')
71
72    parser.add_argument('-i',
73                        dest='in_file',
74                        required=True,
75                        metavar='<file>',
76                        help='Molecule input file')
77    parser.add_argument('-o',
78                        dest='out_file',
79                        required=True,
80                        metavar='<file>',
81                        help='Molecule output file')
82    parser.add_argument('-p',
83                        dest='smarts_ptn',
84                        required=True,
85                        metavar='<SMARTS>',
86                        help='SMARTS pattern describing the substructure to search for')
87    parser.add_argument('-q',
88                        dest='quiet',
89                        required=False,
90                        action='store_true',
91                        default=False,
92                        help='Disable progress output (default: false)')
93    
94    return parser.parse_args()
95
96if __name__ == '__main__':
97    filterMolecules()

Download source file