Chemical Data Processing Library C++ API - Version 1.1.0
MaxCommonBondSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * MaxCommonBondSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <set>
34 #include <cstddef>
35 #include <memory>
36 
37 #include <boost/iterator/indirect_iterator.hpp>
38 
39 #include "CDPL/Chem/APIPrefix.hpp"
42 #include "CDPL/Util/BitSet.hpp"
44 
45 
46 namespace CDPL
47 {
48 
49  namespace Chem
50  {
51 
52  class MolecularGraph;
53  class Atom;
54  class Bond;
55 
61  {
62 
63  typedef std::vector<AtomBondMapping*> ABMappingList;
64 
65  public:
66  typedef std::shared_ptr<MaxCommonBondSubstructureSearch> SharedPointer;
67 
71  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
72 
76  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
77 
82 
88 
95 
100  void setQuery(const MolecularGraph& query);
101 
115  bool mappingExists(const MolecularGraph& target);
116 
132  bool findMappings(const MolecularGraph& target);
133 
139  std::size_t getNumMappings() const;
140 
147  AtomBondMapping& getMapping(std::size_t idx);
148 
155  const AtomBondMapping& getMapping(std::size_t idx) const;
156 
162 
168 
174 
180 
186 
192 
198 
204 
216  void uniqueMappingsOnly(bool unique);
217 
223  bool uniqueMappingsOnly() const;
224 
235  void setMaxNumMappings(std::size_t max_num_mappings);
236 
242  std::size_t getMaxNumMappings() const;
243 
253  void setMinSubstructureSize(std::size_t min_size);
254 
260  std::size_t getMinSubstructureSize() const;
261 
262  private:
263  class AGNode;
264 
266 
268 
269  bool init(const MolecularGraph&);
270 
271  void initMatchExpressions();
272 
273  bool findEquivAtoms();
274 
275  bool atomsCompatible(const Bond&, const Bond&) const;
276 
277  const Atom* getCommonAtom(const Bond&, const Bond&) const;
278 
279  bool buildAssocGraph();
280 
281  bool findAssocGraphCliques(std::size_t);
282  bool isLegal(const AGNode*);
283 
284  void undoAtomMapping(std::size_t);
285 
286  bool mappingFound();
287 
288  bool hasPostMappingMatchExprs() const;
289  bool foundMappingMatches(const AtomBondMapping*) const;
290 
291  bool foundMappingUnique(bool);
292 
293  void clearMappings();
294 
295  AtomBondMapping* createAtomBondMapping(bool);
296  void freeAtomBondMapping();
297 
298  void freeAssocGraph();
299  void freeAtomBondMappings();
300 
301  class AGEdge;
302 
303  AGNode* allocAGNode(const Bond*, const Bond*);
304  AGEdge* allocAGEdge(const Atom*, const Atom*);
305 
306  typedef std::vector<const AGEdge*> AGraphEdgeList;
307 
308  class AGNode
309  {
310 
311  public:
312  void setQueryBond(const Bond*);
313  const Bond* getQueryBond() const;
314 
315  void setAssocBond(const Bond*);
316  const Bond* getAssocBond() const;
317 
318  void addEdge(const AGEdge*);
319 
320  bool isConnected(const AGNode*) const;
321  const AGEdge* findEdge(const AGNode*) const;
322 
323  void clear();
324 
325  void setIndex(std::size_t idx);
326 
327  private:
328  std::size_t index;
329  const Bond* queryBond;
330  const Bond* assocBond;
331  Util::BitSet connNodes;
332  AGraphEdgeList atomEdges;
333  };
334 
335  class AGEdge
336  {
337 
338  public:
339  void setQueryAtom(const Atom*);
340  const Atom* getQueryAtom() const;
341 
342  void setAssocAtom(const Atom*);
343  const Atom* getAssocAtom() const;
344 
345  void setNode1(const AGNode*);
346  void setNode2(const AGNode*);
347 
348  const AGNode* getNode1() const;
349  const AGNode* getNode2() const;
350 
351  const AGNode* getOther(const AGNode*) const;
352 
353  private:
354  const Atom* queryAtom;
355  const Atom* assocAtom;
356  const AGNode* node1;
357  const AGNode* node2;
358  };
359 
360  class ABMappingMask
361  {
362 
363  public:
364  void initQueryAtomMask(std::size_t);
365  void initTargetAtomMask(std::size_t);
366 
367  void initQueryBondMask(std::size_t);
368  void initTargetBondMask(std::size_t);
369 
370  void setQueryAtomBit(std::size_t);
371  void setTargetAtomBit(std::size_t);
372 
373  void resetQueryAtomBit(std::size_t);
374  void resetTargetAtomBit(std::size_t);
375 
376  bool testQueryAtomBit(std::size_t) const;
377  bool testTargetAtomBit(std::size_t) const;
378 
379  void setQueryBondBit(std::size_t);
380  void setTargetBondBit(std::size_t);
381 
382  void resetQueryAtomBits();
383  void resetTargetAtomBits();
384 
385  void resetBondBits();
386 
387  bool operator<(const ABMappingMask&) const;
388  bool operator>(const ABMappingMask&) const;
389 
390  private:
391  Util::BitSet queryAtomMask;
392  Util::BitSet targetAtomMask;
393  Util::BitSet queryBondMask;
394  Util::BitSet targetBondMask;
395  };
396 
397  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
398 
399  typedef std::vector<Util::BitSet> BitMatrix;
400  typedef std::vector<AGNode*> AGraphNodeList;
401  typedef std::vector<AGraphNodeList> AGraphNodeMatrix;
402  typedef std::set<ABMappingMask> UniqueMappingList;
403  typedef std::vector<const Atom*> AtomList;
404  typedef std::vector<const Bond*> BondList;
405  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
406  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
407  typedef Util::ObjectStack<AGNode> NodeCache;
408  typedef Util::ObjectStack<AGEdge> EdgeCache;
409  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
410 
411  const MolecularGraph* query;
412  const MolecularGraph* target;
413  BitMatrix atomEquivMatrix;
414  AGraphNodeMatrix nodeMatrix;
415  ABMappingList foundMappings;
416  UniqueMappingList uniqueMappings;
417  AGraphEdgeList cliqueEdges;
418  AGraphNodeList cliqueNodes;
419  ABMappingMask mappingMask;
420  AtomMatchExprTable atomMatchExprTable;
421  BondMatchExprTable bondMatchExprTable;
422  MolGraphMatchExprPtr molGraphMatchExpr;
423  AtomList postMappingMatchAtoms;
424  BondList postMappingMatchBonds;
425  NodeCache nodeCache;
426  EdgeCache edgeCache;
427  MappingCache mappingCache;
428  bool queryChanged;
429  bool initQueryData;
430  bool uniqueMatches;
431  bool saveMappings;
432  std::size_t numQueryAtoms;
433  std::size_t numQueryBonds;
434  std::size_t numTargetAtoms;
435  std::size_t numTargetBonds;
436  std::size_t maxBondSubstructureSize;
437  std::size_t currNumNullNodes;
438  std::size_t minNumNullNodes;
439  std::size_t maxNumMappings;
440  std::size_t minSubstructureSize;
441  std::size_t currNodeIdx;
442  };
443  } // namespace Chem
444 } // namespace CDPL
445 
446 #endif // CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
CDPL::Chem::MaxCommonBondSubstructureSearch::getMinSubstructureSize
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
CDPL::Chem::MaxCommonBondSubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch.
Definition: MaxCommonBondSubstructureSearch.hpp:61
CDPL::Chem::MaxCommonBondSubstructureSearch::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a MaxCommonBondSubstructureSearch instance for the specified query structu...
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Chem::MaxCommonBondSubstructureSearch::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch()
Constructs and initializes a MaxCommonBondSubstructureSearch instance.
CDPL::Chem::MaxCommonBondSubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::MaxCommonBondSubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Searches for a common substructure between the query and the specified target molecular graph.
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::MaxCommonBondSubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
CDPL::Chem::MaxCommonBondSubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonBondSubstructureSearch.hpp:71
CDPL::Chem::MaxCommonBondSubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Chem::MaxCommonBondSubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonBondSubstructureSearch.hpp:76
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::MaxCommonBondSubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::findMappings
bool findMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MatchExpression< MolecularGraph >::SharedPointer
std::shared_ptr< MatchExpression > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MatchExpression instances.
Definition: MatchExpression.hpp:81
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::MaxCommonBondSubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonBondSubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::setMinSubstructureSize
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
CDPL::Chem::MaxCommonBondSubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CDPL::Chem::MaxCommonBondSubstructureSearch::~MaxCommonBondSubstructureSearch
~MaxCommonBondSubstructureSearch()
Destructor.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Chem::MaxCommonBondSubstructureSearch::SharedPointer
std::shared_ptr< MaxCommonBondSubstructureSearch > SharedPointer
Definition: MaxCommonBondSubstructureSearch.hpp:66
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonBondSubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.