1import sys
2import argparse
3
4import CDPL.Chem as Chem
5
6
7def filterMolecules() -> None:
8 args = parseArgs()
9
10 # create reader for input molecules (format specified by file extension)
11 reader = Chem.MoleculeReader(args.in_file)
12
13 # create writer for the output of matching molecules (format specified by file extension)
14 writer = Chem.MolecularGraphWriter(args.out_file)
15
16 # parse the substructure SMARTS pattern
17 try:
18 sub_srch_ptn = Chem.parseSMARTS(args.smarts_ptn)
19 except Exception as e:
20 sys.exit('Error: parsing of SMARTS pattern failed: %s' % str(e))
21
22 # create and initialize an instance of the class Chem.SubstructureSearch that
23 # implements the substructure searching algorithm
24 substr_srch = Chem.SubstructureSearch(sub_srch_ptn)
25
26 # create an instance of the default implementation of the Chem.Molecule interface
27 mol = Chem.BasicMolecule()
28 i = 1
29
30 # read and process molecules one after the other until the end of input has been reached
31 try:
32 while reader.read(mol):
33 # compose a simple molecule identifier
34 mol_id = Chem.getName(mol).strip()
35
36 if mol_id == '':
37 mol_id = '#' + str(i) # fallback if name is empty
38 else:
39 mol_id = '\'%s\' (#%s)' % (mol_id, str(i))
40
41 if not args.quiet:
42 print('- Searching for a matching substructure in molecule %s...' % mol_id)
43
44 try:
45 Chem.initSubstructureSearchTarget(mol, False)
46
47 if substr_srch.mappingExists(mol):
48 if not args.quiet:
49 print(' -> substructure found, forwarding molecule to output file')
50
51 # output the matched molecule
52 if not writer.write(mol):
53 sys.exit('Error: output of molecule failed')
54
55 elif not args.quiet:
56 print(' -> substructure not found')
57
58 except Exception as e:
59 sys.exit('Error: substructure search or output of molecule %s failed: %s' % (mol_id, str(e)))
60
61 i += 1
62
63 except Exception as e: # handle exception raised in case of severe read errors
64 sys.exit('Error: reading molecule failed: ' + str(e))
65
66 writer.close()
67 sys.exit(0)
68
69def parseArgs() -> argparse.Namespace:
70 parser = argparse.ArgumentParser(description='Writes input molecules that match the specified SMARTS substructure pattern to an output file.')
71
72 parser.add_argument('-i',
73 dest='in_file',
74 required=True,
75 metavar='<file>',
76 help='Molecule input file')
77 parser.add_argument('-o',
78 dest='out_file',
79 required=True,
80 metavar='<file>',
81 help='Molecule output file')
82 parser.add_argument('-p',
83 dest='smarts_ptn',
84 required=True,
85 metavar='<SMARTS>',
86 help='SMARTS pattern describing the substructure to search for')
87 parser.add_argument('-q',
88 dest='quiet',
89 required=False,
90 action='store_true',
91 default=False,
92 help='Disable progress output (default: false)')
93
94 return parser.parse_args()
95
96if __name__ == '__main__':
97 filterMolecules()