7.1. Gaussian Shape-based Molecule Alignment

The script align_mols_by_shape.py overlays a set of input molecules with a given reference molecule based on their Gaussian shape representations and outputs the molecules translated/rotated to the calculated alignment pose(s).

Synopsis

python align_mols_by_shape.py [-h] -r <file> -i <file> -o <file> [-s <string>] [-p] [-f] [-m <integer>] [-q]

Mandatory options

-r <file>

Reference molecule input file

-i <file>

Molecule input file

-o <file>

Aligned molecule output file

Other options

-h, --help

Show help message and exit

-p

Regard pharmacophoric (= color) features (default: false)

-f

Perform a fast but less accurate shape alignment (default: false)

-s <string>

Scoring function to use for assessing computed alignments (default: ShapeTanimotoScore, valid other values: TotalOverlapTanimotoScore, ColorTanimotoScore, TanimotoComboScore, TotalOverlapTverskyScore, ShapeTverskyScore, ColorTverskyScore, TverskyComboScore, ReferenceTotalOverlapTverskyScore, ReferenceShapeTverskyScore, ReferenceColorTverskyScore, ReferenceTverskyComboScore, AlignedTotalOverlapTverskyScore, AlignedShapeTverskyScore, AlignedColorTverskyScore, AlignedTverskyComboScore)

-m <integer>

Maximum order of the Gaussian sphere overlap products (only effective in absence of option -f, default: 4)

-d <float>

Minimum required RMSD between two consecutively output molecule alignment poses (default: 0.0)

-q

Disable progress output (default: false)

Code

  1import sys
  2import argparse
  3
  4import CDPL.Chem as Chem
  5import CDPL.Shape as Shape
  6import CDPL.Pharm as Pharm
  7
  8
  9# reads and returns the specified alignment reference molecule
 10def readRefMolecule(filename: str) -> Chem.Molecule:
 11    # create molecule reader instance
 12    reader = Chem.MoleculeReader(filename)
 13
 14    # create an instance of the default implementation of the Chem.Molecule interface
 15    mol = Chem.BasicMolecule()
 16
 17    try:
 18        if not reader.read(mol): # read reference molecule
 19            sys.exit('Error: reading reference molecule failed')
 20                
 21    except Exception as e:       # handle exception raised in case of severe read errors
 22        sys.exit('Error: reading reference molecule failed: ' + str(e))
 23
 24    return mol
 25
 26def parseArgs() -> argparse.Namespace:
 27    parser = argparse.ArgumentParser(description='Aligns a set of input molecules onto a given reference molecule by means of Gaussian shape alignment.')
 28
 29    parser.add_argument('-r',
 30                        dest='ref_mol_file',
 31                        required=True,
 32                        metavar='<file>',
 33                        help='Reference molecule input file')
 34    parser.add_argument('-i',
 35                        dest='in_file',
 36                        required=True,
 37                        metavar='<file>',
 38                        help='Molecule input file')
 39    parser.add_argument('-o',
 40                        dest='out_file',
 41                        required=True,
 42                        metavar='<file>',
 43                        help='Aligned molecule output file')
 44    parser.add_argument('-s',
 45                        dest='scoring_func',
 46                        required=False,
 47                        default='ShapeTanimotoScore',
 48                        metavar='<string>',
 49                        help='Scoring function to use for assessing computed alignments (default: ShapeTanimotoScore, valid other \
 50                        values: TotalOverlapTanimotoScore, ColorTanimotoScore, TanimotoComboScore, TotalOverlapTverskyScore, \
 51                        ShapeTverskyScore, ColorTverskyScore, TverskyComboScore, ReferenceTotalOverlapTverskyScore, \
 52                        ReferenceShapeTverskyScore, ReferenceColorTverskyScore, ReferenceTverskyComboScore, \
 53                        AlignedTotalOverlapTverskyScore, AlignedShapeTverskyScore, AlignedColorTverskyScore, \
 54                        AlignedTverskyComboScore)')
 55    parser.add_argument('-p',
 56                        dest='inc_ph4_ftrs',
 57                        required=False,
 58                        action='store_true',
 59                        default=False,
 60                        help='Regard pharmacophoric (= color) features (default: false)')
 61    parser.add_argument('-f',
 62                        dest='fast',
 63                        required=False,
 64                        action='store_true',
 65                        default=False,
 66                        help='Perform a fast but less accurate shape alignment (default: false)')
 67    parser.add_argument('-m',
 68                        dest='max_order',
 69                        required=False,
 70                        metavar='<integer>',
 71                        default=4,
 72                        help='Maximum order of the Gaussian sphere overlap products (only effective in absence of option -f, default: 4)',
 73                        type=int)
 74    parser.add_argument('-q',
 75                        dest='quiet',
 76                        required=False,
 77                        action='store_true',
 78                        default=False,
 79                        help='Disable progress output (default: false)')
 80  
 81    return parser.parse_args()
 82
 83def main() -> None:
 84    args = parseArgs()
 85
 86    # read the reference molecule
 87    ref_mol = readRefMolecule(args.ref_mol_file) 
 88
 89    # if necessary, prepare the ref. molecule for pharm. feature perception
 90    if args.inc_ph4_ftrs:
 91        Pharm.prepareForPharmacophoreGeneration(ref_mol)
 92    
 93    # create reader for input molecules (format specified by file extension)
 94    mol_reader = Chem.MoleculeReader(args.in_file) 
 95
 96    Chem.setMultiConfImportParameter(mol_reader, False) # treat input molecule conformers as individual molecules
 97    
 98    # create writer for aligned molecules (format specified by file extension)
 99    mol_writer = Chem.MolecularGraphWriter(args.out_file) 
100
101    # create an instance of the default implementation of the Chem.Molecule interface
102    mol = Chem.BasicMolecule()
103
104    # create an instance of the specified alignment scoring function
105    scoring_func = getattr(Shape, args.scoring_func)()
106
107    # create an instance of the class implementing the Gaussian shape alignment algorithm
108    if args.fast:
109        almnt = Shape.FastGaussianShapeAlignment()
110    else:
111        almnt = Shape.GaussianShapeAlignment()
112
113         # check and apply value of the -d option
114        if args.max_order <= 0:
115            sys.exit('Error: -d argument value has to be > 0')
116            
117        almnt.setMaxOrder(args.max_order)
118
119    # set scoring functions
120    almnt.setScoringFunction(scoring_func)
121    almnt.setResultCompareFunction(scoring_func)
122    
123    # create and setup an instance of the class implementing Gaussian shape generation from molecular graphs
124    shape_gen = Shape.GaussianShapeGenerator()
125
126    shape_gen.generatePharmacophoreShape(args.inc_ph4_ftrs) # apply -p option
127    shape_gen.multiConformerMode(False)                     # only one shape per molecule (input molecule conformers are read separately)
128
129    # set the alignment reference shape
130    almnt.addReferenceShapes(shape_gen.generate(ref_mol))
131    
132    # read and process input molecules one after the other until the end of input has been reached
133    try:
134        i = 1
135
136        while mol_reader.read(mol):
137            # compose a simple molecule identifier
138            mol_id = Chem.getName(mol).strip() 
139
140            if mol_id == '':
141                mol_id = '#' + str(i)  # fallback if name is empty
142            else:
143                mol_id = f'\'{mol_id}\' (#{i})'
144
145            if not args.quiet:
146                print(f'- Aligning molecule {mol_id}...')
147
148            try:
149                # if necessary, prepare the molecule to align for pharm. feature perception
150                if args.inc_ph4_ftrs:
151                    Pharm.prepareForPharmacophoreGeneration(mol)
152
153                mol_shape = shape_gen.generate(mol) # generate Gaussian shape of input molecule to align
154
155                if not almnt.align(mol_shape):      # perform the shape alignment and check for errors
156                    if not args.quiet:
157                        print(' -> alignment failed')
158                        continue
159                
160                if Chem.hasStructureData(mol):      # get existing structure data if available
161                    struct_data = Chem.getStructureData(mol)
162                else:                               # otherwise create and set new structure data
163                    struct_data = Chem.StringDataBlock()
164
165                    Chem.setStructureData(mol, struct_data)
166
167                # add alignment score entry to struct. data
168                struct_data.addEntry(f'<{args.scoring_func}>', format(almnt[0].getScore(), '.4f')) 
169
170                if not args.quiet:
171                    print(f' -> computed alignment with score {almnt[0].getScore()}')
172
173                # transform input molecule coordinates according to the computed shape alignment
174                Chem.transform3DCoordinates(mol, almnt[0].getTransform())
175                 
176                try:
177                    if not mol_writer.write(mol): # output the alignment pose of the molecule
178                        sys.exit(f'Error: writing alignment pose of molecule {mol_id} failed')
179
180                except Exception as e:            # handle exception raised in case of severe write errors
181                    sys.exit(f'Error: writing alignment pose of molecule {mol_id} failed: {str(e)}')
182
183            except Exception as e:
184                sys.exit(f'Error: shape alignment of molecule {mol_id} failed: {str(e)}')
185
186            i += 1
187                
188    except Exception as e: # handle exception raised in case of severe read errors
189        sys.exit(f'Error: reading input molecule failed: {str(e)}')
190
191    mol_writer.close()
192    sys.exit(0)
193        
194if __name__ == '__main__':
195    main()

Download source file