7.1. Gaussian Shape-based Molecule Alignment
The script align_mols_by_shape.py overlays a set of input molecules with a given reference molecule based on their Gaussian shape representations and outputs the molecules translated/rotated to the calculated alignment pose(s).
Synopsis
python align_mols_by_shape.py [-h] -r <file> -i <file> -o <file> [-s <string>] [-p] [-f] [-m <integer>] [-q]
Mandatory options
- -r <file>
Reference molecule input file
- -i <file>
Molecule input file
- -o <file>
Aligned molecule output file
Other options
- -h, --help
Show help message and exit
- -p
Regard pharmacophoric (= color) features (default: false)
- -f
Perform a fast but less accurate shape alignment (default: false)
- -s <string>
Scoring function to use for assessing computed alignments (default: ShapeTanimotoScore, valid other values: TotalOverlapTanimotoScore, ColorTanimotoScore, TanimotoComboScore, TotalOverlapTverskyScore, ShapeTverskyScore, ColorTverskyScore, TverskyComboScore, ReferenceTotalOverlapTverskyScore, ReferenceShapeTverskyScore, ReferenceColorTverskyScore, ReferenceTverskyComboScore, AlignedTotalOverlapTverskyScore, AlignedShapeTverskyScore, AlignedColorTverskyScore, AlignedTverskyComboScore)
- -m <integer>
Maximum order of the Gaussian sphere overlap products (only effective in absence of option -f, default: 4)
- -d <float>
Minimum required RMSD between two consecutively output molecule alignment poses (default: 0.0)
- -q
Disable progress output (default: false)
Code
1import sys
2import argparse
3
4import CDPL.Chem as Chem
5import CDPL.Shape as Shape
6import CDPL.Pharm as Pharm
7
8
9# reads and returns the specified alignment reference molecule
10def readRefMolecule(filename: str) -> Chem.Molecule:
11 # create molecule reader instance
12 reader = Chem.MoleculeReader(filename)
13
14 # create an instance of the default implementation of the Chem.Molecule interface
15 mol = Chem.BasicMolecule()
16
17 try:
18 if not reader.read(mol): # read reference molecule
19 sys.exit('Error: reading reference molecule failed')
20
21 except Exception as e: # handle exception raised in case of severe read errors
22 sys.exit('Error: reading reference molecule failed: ' + str(e))
23
24 return mol
25
26def parseArgs() -> argparse.Namespace:
27 parser = argparse.ArgumentParser(description='Aligns a set of input molecules onto a given reference molecule by means of Gaussian shape alignment.')
28
29 parser.add_argument('-r',
30 dest='ref_mol_file',
31 required=True,
32 metavar='<file>',
33 help='Reference molecule input file')
34 parser.add_argument('-i',
35 dest='in_file',
36 required=True,
37 metavar='<file>',
38 help='Molecule input file')
39 parser.add_argument('-o',
40 dest='out_file',
41 required=True,
42 metavar='<file>',
43 help='Aligned molecule output file')
44 parser.add_argument('-s',
45 dest='scoring_func',
46 required=False,
47 default='ShapeTanimotoScore',
48 metavar='<string>',
49 help='Scoring function to use for assessing computed alignments (default: ShapeTanimotoScore, valid other \
50 values: TotalOverlapTanimotoScore, ColorTanimotoScore, TanimotoComboScore, TotalOverlapTverskyScore, \
51 ShapeTverskyScore, ColorTverskyScore, TverskyComboScore, ReferenceTotalOverlapTverskyScore, \
52 ReferenceShapeTverskyScore, ReferenceColorTverskyScore, ReferenceTverskyComboScore, \
53 AlignedTotalOverlapTverskyScore, AlignedShapeTverskyScore, AlignedColorTverskyScore, \
54 AlignedTverskyComboScore)')
55 parser.add_argument('-p',
56 dest='inc_ph4_ftrs',
57 required=False,
58 action='store_true',
59 default=False,
60 help='Regard pharmacophoric (= color) features (default: false)')
61 parser.add_argument('-f',
62 dest='fast',
63 required=False,
64 action='store_true',
65 default=False,
66 help='Perform a fast but less accurate shape alignment (default: false)')
67 parser.add_argument('-m',
68 dest='max_order',
69 required=False,
70 metavar='<integer>',
71 default=4,
72 help='Maximum order of the Gaussian sphere overlap products (only effective in absence of option -f, default: 4)',
73 type=int)
74 parser.add_argument('-q',
75 dest='quiet',
76 required=False,
77 action='store_true',
78 default=False,
79 help='Disable progress output (default: false)')
80
81 return parser.parse_args()
82
83def main() -> None:
84 args = parseArgs()
85
86 # read the reference molecule
87 ref_mol = readRefMolecule(args.ref_mol_file)
88
89 # if necessary, prepare the ref. molecule for pharm. feature perception
90 if args.inc_ph4_ftrs:
91 Pharm.prepareForPharmacophoreGeneration(ref_mol)
92
93 # create reader for input molecules (format specified by file extension)
94 mol_reader = Chem.MoleculeReader(args.in_file)
95
96 Chem.setMultiConfImportParameter(mol_reader, False) # treat input molecule conformers as individual molecules
97
98 # create writer for aligned molecules (format specified by file extension)
99 mol_writer = Chem.MolecularGraphWriter(args.out_file)
100
101 # create an instance of the default implementation of the Chem.Molecule interface
102 mol = Chem.BasicMolecule()
103
104 # create an instance of the specified alignment scoring function
105 scoring_func = getattr(Shape, args.scoring_func)()
106
107 # create an instance of the class implementing the Gaussian shape alignment algorithm
108 if args.fast:
109 almnt = Shape.FastGaussianShapeAlignment()
110 else:
111 almnt = Shape.GaussianShapeAlignment()
112
113 # check and apply value of the -d option
114 if args.max_order <= 0:
115 sys.exit('Error: -d argument value has to be > 0')
116
117 almnt.setMaxOrder(args.max_order)
118
119 # set scoring functions
120 almnt.setScoringFunction(scoring_func)
121 almnt.setResultCompareFunction(scoring_func)
122
123 # create and setup an instance of the class implementing Gaussian shape generation from molecular graphs
124 shape_gen = Shape.GaussianShapeGenerator()
125
126 shape_gen.generatePharmacophoreShape(args.inc_ph4_ftrs) # apply -p option
127 shape_gen.multiConformerMode(False) # only one shape per molecule (input molecule conformers are read separately)
128
129 # set the alignment reference shape
130 almnt.addReferenceShapes(shape_gen.generate(ref_mol))
131
132 # read and process input molecules one after the other until the end of input has been reached
133 try:
134 i = 1
135
136 while mol_reader.read(mol):
137 # compose a simple molecule identifier
138 mol_id = Chem.getName(mol).strip()
139
140 if mol_id == '':
141 mol_id = '#' + str(i) # fallback if name is empty
142 else:
143 mol_id = f'\'{mol_id}\' (#{i})'
144
145 if not args.quiet:
146 print(f'- Aligning molecule {mol_id}...')
147
148 try:
149 # if necessary, prepare the molecule to align for pharm. feature perception
150 if args.inc_ph4_ftrs:
151 Pharm.prepareForPharmacophoreGeneration(mol)
152
153 mol_shape = shape_gen.generate(mol) # generate Gaussian shape of input molecule to align
154
155 if not almnt.align(mol_shape): # perform the shape alignment and check for errors
156 if not args.quiet:
157 print(' -> alignment failed')
158 continue
159
160 if Chem.hasStructureData(mol): # get existing structure data if available
161 struct_data = Chem.getStructureData(mol)
162 else: # otherwise create and set new structure data
163 struct_data = Chem.StringDataBlock()
164
165 Chem.setStructureData(mol, struct_data)
166
167 # add alignment score entry to struct. data
168 struct_data.addEntry(f'<{args.scoring_func}>', format(almnt[0].getScore(), '.4f'))
169
170 if not args.quiet:
171 print(f' -> computed alignment with score {almnt[0].getScore()}')
172
173 # transform input molecule coordinates according to the computed shape alignment
174 Chem.transform3DCoordinates(mol, almnt[0].getTransform())
175
176 try:
177 if not mol_writer.write(mol): # output the alignment pose of the molecule
178 sys.exit(f'Error: writing alignment pose of molecule {mol_id} failed')
179
180 except Exception as e: # handle exception raised in case of severe write errors
181 sys.exit(f'Error: writing alignment pose of molecule {mol_id} failed: {str(e)}')
182
183 except Exception as e:
184 sys.exit(f'Error: shape alignment of molecule {mol_id} failed: {str(e)}')
185
186 i += 1
187
188 except Exception as e: # handle exception raised in case of severe read errors
189 sys.exit(f'Error: reading input molecule failed: {str(e)}')
190
191 mol_writer.close()
192 sys.exit(0)
193
194if __name__ == '__main__':
195 main()