Chemical Data Processing Library C++ API - Version 1.2.1
CanonicalNumberingCalculator.hpp
Go to the documentation of this file.
1 /*
2  * CanonicalNumberingCalculator.hpp
3  *
4  * B.D. McKay, Practical graph isomorphism. Proceedings of
5  * the 10th Manitoba Conference on Numerical Maths and
6  * Computing. Congressus Numerantium, 30 (1981) 45-87.
7  *
8  * This file is part of the Chemical Data Processing Toolkit
9  *
10  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
11  *
12  * This library is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2 of the License, or (at your option) any later version.
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this library; see the file COPYING. If not, write to
24  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25  * Boston, MA 02111-1307, USA.
26  */
27 
33 #ifndef CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
34 #define CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
35 
36 #include <cstddef>
37 #include <cstdint>
38 #include <vector>
39 #include <utility>
40 #include <functional>
41 
42 #include "CDPL/Chem/APIPrefix.hpp"
46 #include "CDPL/Util/Array.hpp"
47 #include "CDPL/Util/BitSet.hpp"
49 
50 
51 namespace CDPL
52 {
53 
54  namespace Chem
55  {
56 
57  class MolecularGraph;
58  class Fragment;
59  class Atom;
60  class Bond;
61 
67  {
68 
69  public:
74  static constexpr unsigned int DEF_ATOM_PROPERTY_FLAGS =
77 
82  static constexpr unsigned int DEF_BOND_PROPERTY_FLAGS =
84 
85  typedef std::function<std::size_t(const Atom&, const MolecularGraph&)> HydrogenCountFunction;
86 
91 
101 
103 
105 
122  void setAtomPropertyFlags(unsigned int flags);
123 
130  unsigned int getAtomPropertyFlags() const;
131 
145  void setBondPropertyFlags(unsigned int flags);
146 
153  unsigned int getBondPropertyFlags() const;
154 
156 
158 
166  void calculate(const MolecularGraph& molgraph, Util::STArray& numbering);
167 
168  private:
169  class AtomNode;
170  class Edge;
171 
173  typedef std::vector<Edge*> EdgeList;
174 
175  typedef std::vector<std::uint64_t> ConnectionTable;
176 
177  void init(const MolecularGraph&, Util::STArray&);
178  void setup(const MolecularGraph&);
179 
181  void canonicalize(std::size_t);
182 
183  void processNewSolution();
184 
185  int testNewSolution();
186 
187  void buildConnectionTable(ConnectionTable& ctab) const;
188  void appendAtomConfigs(ConnectionTable& ctab);
189  void appendBondConfigs(ConnectionTable& ctab);
190 
191  void establishCanonNumbering(Util::STArray&);
192 
193  void saveState();
194  void restoreState();
195 
196  AtomNode* allocNode(Calculator* calculator, const Atom* atom, std::uint64_t label, std::size_t id);
197 
198  Edge* allocEdge(const Calculator* calculator, const Bond* bond, std::uint64_t label,
199  AtomNode* nbr_node, std::size_t id);
200 
201  class AtomNode
202  {
203 
204  public:
205  typedef EdgeList::const_iterator EdgeIterator;
206 
207  AtomNode();
208 
209  void init(Calculator* calculator, const Atom* atom, std::uint64_t label, std::size_t id);
210 
211  const Atom* getAtom() const;
212 
213  void addEdge(Edge* edge);
214 
215  std::uint64_t getLabel() const;
216 
217  void setLabel(std::uint64_t label);
218  void setNewLabel(std::size_t label);
219 
220  void updateLabel();
221 
222  std::size_t getID() const;
223 
224  void sortEdges();
225 
226  std::size_t getNumEdges() const;
227 
228  EdgeIterator getEdgesBegin() const;
229  EdgeIterator getEdgesEnd() const;
230 
231  void appendConnectivityData(ConnectionTable& ctab) const;
232  void appendBondConfigData(ConnectionTable& ctab) const;
233  void appendAtomConfigData(ConnectionTable& ctab);
234 
235  bool involvedInStereocenter();
236 
237  bool isEquivalent(const AtomNode* node) const;
238  bool isNonEquivalent(const AtomNode* node) const;
239 
240  void addToEquivalenceSet(const AtomNode* node);
241  void addToNonEquivalenceSet(const AtomNode* node);
242 
243  static bool terminalAndOnCommonNonStereoNode(const AtomNode* node1, const AtomNode* node2);
244 
245  struct LessCmpFunc
246  {
247 
248  bool operator()(const AtomNode*, const AtomNode*) const;
249  };
250 
251  private:
252  bool initConfigurationData();
253 
254  Calculator* calculator;
255  const Atom* atom;
256  std::uint64_t initialLabel;
257  std::uint64_t label;
258  std::size_t newLabel;
259  std::size_t id;
260  Util::BitSet equivNodeMask;
261  EdgeList edges;
262  StereoDescriptor stereoDescr;
263  bool hasConfiguration;
264  bool configDataValid;
265  bool partOfStereocenter;
266  bool partOfStereocenterValid;
267  };
268 
269  class Edge
270  {
271 
272  public:
273  Edge();
274 
275  void init(const Calculator* calculator, const Bond* bond, std::uint64_t label,
276  AtomNode* nbr_node, std::size_t id);
277 
278  void appendBondData(ConnectionTable&) const;
279  void appendConfigurationData(const AtomNode* node, ConnectionTable& ctab);
280 
281  AtomNode* getNeighborNode() const;
282 
283  bool representsStereoBond(const AtomNode* node);
284 
285  std::size_t getID() const;
286 
287  struct LessCmpFunc
288  {
289 
290  bool operator()(const Edge*, const Edge*) const;
291  };
292 
293  private:
294  bool initConfigurationData(const AtomNode* node);
295 
296  const Calculator* calculator;
297  const Bond* bond;
298  AtomNode* nbrNode;
299  std::uint64_t label;
300  std::size_t id;
301  StereoDescriptor stereoDescr;
302  bool hasConfiguration;
303  bool configDataValid;
304  };
305 
306  typedef std::pair<const Fragment*, const ConnectionTable*> CanonComponentInfo;
307 
308  struct ComponentCmpFunc
309  {
310 
311  bool operator()(const CanonComponentInfo&, const CanonComponentInfo&) const;
312  };
313 
314  typedef std::pair<AtomNode*, std::uint64_t> NodeLabelingState;
315  typedef std::vector<NodeLabelingState> NodeLabelingStack;
316  typedef std::vector<AtomNode*> NodeList;
317  typedef std::vector<ConnectionTable> ConnectionTableList;
318  typedef std::vector<CanonComponentInfo> CanonComponentList;
319  typedef Util::ObjectStack<AtomNode> NodeCache;
320  typedef Util::ObjectStack<Edge> EdgeCache;
321 
322  NodeCache nodeCache;
323  EdgeCache edgeCache;
324  unsigned int atomPropertyFlags;
325  unsigned int bondPropertyFlags;
326  HydrogenCountFunction hCountFunc;
327  bool foundStereogenicAtoms;
328  bool foundStereogenicBonds;
329  const MolecularGraph* molGraph;
330  NodeList allocNodes;
331  EdgeList allocEdges;
332  NodeList nodeList;
333  NodeList equivNodeStack;
334  NodeLabelingStack nodeLabelingStack;
335  ConnectionTableList compConnectionTables;
336  ConnectionTableList levelConnectionTables;
337  ConnectionTable testConnectionTable;
338  NodeList minNodeList;
339  CanonComponentList canonComponentList;
340  Util::BitSet visitedEdgeMask;
341  };
342  } // namespace Chem
343 } // namespace CDPL
344 
345 #endif // CDPL_CHEM_CANONICALNUMBERINGCALCULATOR_HPP
Definition of the class CDPL::Util::Array.
Definition of the type CDPL::Util::BitSet.
Definition of constants in namespace CDPL::Chem::BondPropertyFlag.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of constants in namespace CDPL::Chem::AtomPropertyFlag.
Definition of the class CDPL::Util::ObjectStack.
Definition of the type CDPL::Chem::StereoDescriptor.
Atom.
Definition: Atom.hpp:52
Bond.
Definition: Bond.hpp:50
CanonicalNumberingCalculator.
Definition: CanonicalNumberingCalculator.hpp:67
void setHydrogenCountFunction(const HydrogenCountFunction &func)
void setAtomPropertyFlags(unsigned int flags)
Allows to specify the set of atomic properties that has to be considered by the canonical numering al...
const HydrogenCountFunction & getHydrogenCountFunction()
unsigned int getBondPropertyFlags() const
Returns the set of bond properties that gets considered by the canonical numbering algorithm.
unsigned int getAtomPropertyFlags() const
Returns the set of atomic properties that gets considered by the canonical numbering algorithm.
std::function< std::size_t(const Atom &, const MolecularGraph &)> HydrogenCountFunction
Definition: CanonicalNumberingCalculator.hpp:85
CanonicalNumberingCalculator & operator=(const CanonicalNumberingCalculator &)=delete
CanonicalNumberingCalculator()
Constructs the CanonicalNumberingCalculator instance.
CanonicalNumberingCalculator(const CanonicalNumberingCalculator &)=delete
CanonicalNumberingCalculator(const MolecularGraph &molgraph, Util::STArray &numbering)
Constructs the CanonicalNumberingCalculator instance and performs a canonical numbering of the atoms ...
void calculate(const MolecularGraph &molgraph, Util::STArray &numbering)
Performs a canonical numbering of the atoms in the molecular graph molgraph.
void setBondPropertyFlags(unsigned int flags)
Allows to specify the set of bond properties that has to be considered by the canonical numering algo...
MolecularGraph.
Definition: MolecularGraph.hpp:52
A data structure for the storage and retrieval of stereochemical information about atoms and bonds.
Definition: StereoDescriptor.hpp:102
constexpr unsigned int FORMAL_CHARGE
Specifies the formal charge of an atom.
Definition: Chem/AtomPropertyFlag.hpp:73
constexpr unsigned int H_COUNT
Specifies the hydrogen count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:78
constexpr unsigned int AROMATICITY
Specifies the membership of an atom in aromatic rings.
Definition: Chem/AtomPropertyFlag.hpp:93
constexpr unsigned int ISOTOPE
Specifies the isotopic mass of an atom.
Definition: Chem/AtomPropertyFlag.hpp:68
constexpr unsigned int CONFIGURATION
Specifies the configuration of a stereogenic atom.
Definition: Chem/AtomPropertyFlag.hpp:98
constexpr unsigned int TYPE
Specifies the generic type or element of an atom.
Definition: Chem/AtomPropertyFlag.hpp:63
constexpr unsigned int AROMATICITY
Specifies the membership of a bond in aromatic rings.
Definition: BondPropertyFlag.hpp:73
constexpr unsigned int ORDER
Specifies the order of a bond.
Definition: BondPropertyFlag.hpp:63
constexpr unsigned int CONFIGURATION
Specifies the steric configuration of a double bond.
Definition: BondPropertyFlag.hpp:78
CDPL_CHEM_API void canonicalize(MolecularGraph &molgraph, const AtomCompareFunction &func, bool atoms=true, bool atom_nbrs=true, bool bonds=true, bool bond_atoms=false)
Array< std::size_t > STArray
An array of unsigned integers of type std::size_t.
Definition: Array.hpp:567
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.
Definition: CanonicalNumberingCalculator.hpp:246
bool operator()(const AtomNode *, const AtomNode *) const
Definition: CanonicalNumberingCalculator.hpp:288
bool operator()(const Edge *, const Edge *) const