Chemical Data Processing Library C++ API - Version 1.4.0
CanonicalNumberingCalculator.hpp
Go to the documentation of this file.
1 /*
2  * CanonicalNumberingCalculator.hpp
3  *
4  * B.D. McKay, Practical graph isomorphism. Proceedings of
5  * the 10th Manitoba Conference on Numerical Maths and
6  * Computing. Congressus Numerantium, 30 (1981) 45-87.
7  *
8  * This file is part of the Chemical Data Processing Toolkit
9  *
10  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
11  *
12  * This library is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2 of the License, or (at your option) any later version.
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this library; see the file COPYING. If not, write to
24  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25  * Boston, MA 02111-1307, USA.
26  */
27 
33 #ifndef CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
34 #define CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
35 
36 #include <cstddef>
37 #include <cstdint>
38 #include <vector>
39 #include <utility>
40 #include <functional>
41 
42 #include "CDPL/Chem/APIPrefix.hpp"
46 #include "CDPL/Util/Array.hpp"
47 #include "CDPL/Util/BitSet.hpp"
49 
50 
51 namespace CDPL
52 {
53 
54  namespace Chem
55  {
56 
57  class MolecularGraph;
58  class Fragment;
59  class Atom;
60  class Bond;
61 
72  {
73 
74  public:
79  static constexpr unsigned int DEF_ATOM_PROPERTY_FLAGS =
82 
87  static constexpr unsigned int DEF_BOND_PROPERTY_FLAGS =
89 
93  typedef std::function<std::size_t(const Atom&, const MolecularGraph&)> HydrogenCountFunction;
94 
99 
109 
111 
113 
130  void setAtomPropertyFlags(unsigned int flags);
131 
138  unsigned int getAtomPropertyFlags() const;
139 
153  void setBondPropertyFlags(unsigned int flags);
154 
161  unsigned int getBondPropertyFlags() const;
162 
168 
174 
182  void calculate(const MolecularGraph& molgraph, Util::STArray& numbering);
183 
184  private:
185  class AtomNode;
186  class Edge;
187 
189  typedef std::vector<Edge*> EdgeList;
190 
191  typedef std::vector<std::uint64_t> ConnectionTable;
192 
193  void init(const MolecularGraph&, Util::STArray&);
194  void setup(const MolecularGraph&);
195 
197  void canonicalize(std::size_t);
198 
199  void processNewSolution();
200 
201  int testNewSolution();
202 
203  void buildConnectionTable(ConnectionTable& ctab) const;
204  void appendAtomConfigs(ConnectionTable& ctab);
205  void appendBondConfigs(ConnectionTable& ctab);
206 
207  void establishCanonNumbering(Util::STArray&);
208 
209  void saveState();
210  void restoreState();
211 
212  AtomNode* allocNode(Calculator* calculator, const Atom* atom, std::uint64_t label, std::size_t id);
213 
214  Edge* allocEdge(const Calculator* calculator, const Bond* bond, std::uint64_t label,
215  AtomNode* nbr_node, std::size_t id);
216 
217  class AtomNode
218  {
219 
220  public:
221  typedef EdgeList::const_iterator EdgeIterator;
222 
223  AtomNode();
224 
225  void init(Calculator* calculator, const Atom* atom, std::uint64_t label, std::size_t id);
226 
227  const Atom* getAtom() const;
228 
229  void addEdge(Edge* edge);
230 
231  std::uint64_t getLabel() const;
232 
233  void setLabel(std::uint64_t label);
234  void setNewLabel(std::size_t label);
235 
236  void updateLabel();
237 
238  std::size_t getID() const;
239 
240  void sortEdges();
241 
242  std::size_t getNumEdges() const;
243 
244  EdgeIterator getEdgesBegin() const;
245  EdgeIterator getEdgesEnd() const;
246 
247  void appendConnectivityData(ConnectionTable& ctab) const;
248  void appendBondConfigData(ConnectionTable& ctab) const;
249  void appendAtomConfigData(ConnectionTable& ctab);
250 
251  bool involvedInStereocenter();
252 
253  bool isEquivalent(const AtomNode* node) const;
254  bool isNonEquivalent(const AtomNode* node) const;
255 
256  void addToEquivalenceSet(const AtomNode* node);
257  void addToNonEquivalenceSet(const AtomNode* node);
258 
259  static bool terminalAndOnCommonNonStereoNode(const AtomNode* node1, const AtomNode* node2);
260 
261  struct LessCmpFunc
262  {
263 
264  bool operator()(const AtomNode*, const AtomNode*) const;
265  };
266 
267  private:
268  bool initConfigurationData();
269 
270  Calculator* calculator;
271  const Atom* atom;
272  std::uint64_t initialLabel;
273  std::uint64_t label;
274  std::size_t newLabel;
275  std::size_t id;
276  Util::BitSet equivNodeMask;
277  EdgeList edges;
278  StereoDescriptor stereoDescr;
279  bool hasConfiguration;
280  bool configDataValid;
281  bool partOfStereocenter;
282  bool partOfStereocenterValid;
283  };
284 
285  class Edge
286  {
287 
288  public:
289  Edge();
290 
291  void init(const Calculator* calculator, const Bond* bond, std::uint64_t label,
292  AtomNode* nbr_node, std::size_t id);
293 
294  void appendBondData(ConnectionTable&) const;
295  void appendConfigurationData(const AtomNode* node, ConnectionTable& ctab);
296 
297  AtomNode* getNeighborNode() const;
298 
299  bool representsStereoBond(const AtomNode* node);
300 
301  std::size_t getID() const;
302 
303  struct LessCmpFunc
304  {
305 
306  bool operator()(const Edge*, const Edge*) const;
307  };
308 
309  private:
310  bool initConfigurationData(const AtomNode* node);
311 
312  const Calculator* calculator;
313  const Bond* bond;
314  AtomNode* nbrNode;
315  std::uint64_t label;
316  std::size_t id;
317  StereoDescriptor stereoDescr;
318  bool hasConfiguration;
319  bool configDataValid;
320  };
321 
322  typedef std::pair<const Fragment*, const ConnectionTable*> CanonComponentInfo;
323 
324  struct ComponentCmpFunc
325  {
326 
327  bool operator()(const CanonComponentInfo&, const CanonComponentInfo&) const;
328  };
329 
330  typedef std::pair<AtomNode*, std::uint64_t> NodeLabelingState;
331  typedef std::vector<NodeLabelingState> NodeLabelingStack;
332  typedef std::vector<AtomNode*> NodeList;
333  typedef std::vector<ConnectionTable> ConnectionTableList;
334  typedef std::vector<CanonComponentInfo> CanonComponentList;
335  typedef Util::ObjectStack<AtomNode> NodeCache;
336  typedef Util::ObjectStack<Edge> EdgeCache;
337 
338  NodeCache nodeCache;
339  EdgeCache edgeCache;
340  unsigned int atomPropertyFlags;
341  unsigned int bondPropertyFlags;
342  HydrogenCountFunction hCountFunc;
343  bool foundStereogenicAtoms;
344  bool foundStereogenicBonds;
345  const MolecularGraph* molGraph;
346  NodeList allocNodes;
347  EdgeList allocEdges;
348  NodeList nodeList;
349  NodeList equivNodeStack;
350  NodeLabelingStack nodeLabelingStack;
351  ConnectionTableList compConnectionTables;
352  ConnectionTableList levelConnectionTables;
353  ConnectionTable testConnectionTable;
354  NodeList minNodeList;
355  CanonComponentList canonComponentList;
356  Util::BitSet visitedEdgeMask;
357  };
358  } // namespace Chem
359 } // namespace CDPL
360 
361 #endif // CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
Definition of class CDPL::Util::Array.
Declaration of type CDPL::Util::BitSet.
Definition of constants in namespace CDPL::Chem::BondPropertyFlag.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of constants in namespace CDPL::Chem::AtomPropertyFlag.
Definition of class CDPL::Util::ObjectStack.
Definition of the type CDPL::Chem::StereoDescriptor.
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
Calculation of canonical atom numberings for molecular graphs using McKay's algorithm.
Definition: CanonicalNumberingCalculator.hpp:72
void setHydrogenCountFunction(const HydrogenCountFunction &func)
Specifies a function for the retrieval of the hydrogen count of an atom.
void setAtomPropertyFlags(unsigned int flags)
Allows to specify the set of atomic properties that has to be considered by the canonical numering al...
const HydrogenCountFunction & getHydrogenCountFunction()
Returns the function used for the retrieval of the hydrogen count of an atom.
unsigned int getBondPropertyFlags() const
Returns the set of bond properties that gets considered by the canonical numbering algorithm.
unsigned int getAtomPropertyFlags() const
Returns the set of atomic properties that gets considered by the canonical numbering algorithm.
std::function< std::size_t(const Atom &, const MolecularGraph &)> HydrogenCountFunction
Type of the generic functor used to retrieve the (implicit + explicit) hydrogen count of an atom.
Definition: CanonicalNumberingCalculator.hpp:93
CanonicalNumberingCalculator & operator=(const CanonicalNumberingCalculator &)=delete
CanonicalNumberingCalculator()
Constructs the CanonicalNumberingCalculator instance.
CanonicalNumberingCalculator(const CanonicalNumberingCalculator &)=delete
CanonicalNumberingCalculator(const MolecularGraph &molgraph, Util::STArray &numbering)
Constructs the CanonicalNumberingCalculator instance and performs a canonical numbering of the atoms ...
void calculate(const MolecularGraph &molgraph, Util::STArray &numbering)
Performs a canonical numbering of the atoms in the molecular graph molgraph.
void setBondPropertyFlags(unsigned int flags)
Allows to specify the set of bond properties that has to be considered by the canonical numering algo...
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
Data structure for the storage and retrieval of stereochemical information about atoms and bonds.
Definition: StereoDescriptor.hpp:102
constexpr unsigned int FORMAL_CHARGE
Specifies the formal charge of an atom.
Definition: Chem/AtomPropertyFlag.hpp:73
constexpr unsigned int H_COUNT
Specifies the hydrogen count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:78
constexpr unsigned int AROMATICITY
Specifies the membership of an atom in aromatic rings.
Definition: Chem/AtomPropertyFlag.hpp:93
constexpr unsigned int ISOTOPE
Specifies the isotopic mass of an atom.
Definition: Chem/AtomPropertyFlag.hpp:68
constexpr unsigned int CONFIGURATION
Specifies the configuration of a stereogenic atom.
Definition: Chem/AtomPropertyFlag.hpp:98
constexpr unsigned int TYPE
Specifies the generic type or element of an atom.
Definition: Chem/AtomPropertyFlag.hpp:63
constexpr unsigned int AROMATICITY
Specifies the membership of a bond in aromatic rings.
Definition: BondPropertyFlag.hpp:73
constexpr unsigned int ORDER
Specifies the order of a bond.
Definition: BondPropertyFlag.hpp:63
constexpr unsigned int CONFIGURATION
Specifies the steric configuration of a double bond.
Definition: BondPropertyFlag.hpp:78
CDPL_CHEM_API void canonicalize(MolecularGraph &molgraph, const AtomCompareFunction &func, bool atoms=true, bool atom_nbrs=true, bool bonds=true, bool bond_atoms=false)
Reorders the atoms (and optionally their neighbors and bonds) of molgraph according to a user-supplie...
Array< std::size_t > STArray
Array storing unsigned integers of type std::size_t.
Definition: Array.hpp:575
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.
Definition: CanonicalNumberingCalculator.hpp:262
bool operator()(const AtomNode *, const AtomNode *) const
Definition: CanonicalNumberingCalculator.hpp:304
bool operator()(const Edge *, const Edge *) const