Chemical Data Processing Library C++ API - Version 1.0.0
MaxCommonBondSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * MaxCommonBondSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <set>
34 #include <cstddef>
35 
36 #include <boost/iterator/indirect_iterator.hpp>
37 
38 #include "CDPL/Chem/APIPrefix.hpp"
41 #include "CDPL/Util/BitSet.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Chem
49  {
50 
51  class MolecularGraph;
52  class Atom;
53  class Bond;
54 
60  {
61 
62  typedef std::vector<AtomBondMapping*> ABMappingList;
63 
64  public:
68  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
69 
73  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
74 
79 
85 
92 
97  void setQuery(const MolecularGraph& query);
98 
112  bool mappingExists(const MolecularGraph& target);
113 
129  bool findMappings(const MolecularGraph& target);
130 
136  std::size_t getNumMappings() const;
137 
144  AtomBondMapping& getMapping(std::size_t idx);
145 
152  const AtomBondMapping& getMapping(std::size_t idx) const;
153 
159 
165 
171 
177 
183 
189 
195 
201 
213  void uniqueMappingsOnly(bool unique);
214 
220  bool uniqueMappingsOnly() const;
221 
232  void setMaxNumMappings(std::size_t max_num_mappings);
233 
239  std::size_t getMaxNumMappings() const;
240 
250  void setMinSubstructureSize(std::size_t min_size);
251 
257  std::size_t getMinSubstructureSize() const;
258 
259  private:
260  class AGNode;
261 
263 
265 
266  bool init(const MolecularGraph&);
267 
268  void initMatchExpressions();
269 
270  bool findEquivAtoms();
271 
272  bool atomsCompatible(const Bond&, const Bond&) const;
273 
274  const Atom* getCommonAtom(const Bond&, const Bond&) const;
275 
276  bool buildAssocGraph();
277 
278  bool findAssocGraphCliques(std::size_t);
279  bool isLegal(const AGNode*);
280 
281  void undoAtomMapping(std::size_t);
282 
283  bool mappingFound();
284 
285  bool hasPostMappingMatchExprs() const;
286  bool foundMappingMatches(const AtomBondMapping*) const;
287 
288  bool foundMappingUnique(bool);
289 
290  void clearMappings();
291 
292  AtomBondMapping* createAtomBondMapping(bool);
293  void freeAtomBondMapping();
294 
295  void freeAssocGraph();
296  void freeAtomBondMappings();
297 
298  class AGEdge;
299 
300  AGNode* allocAGNode(const Bond*, const Bond*);
301  AGEdge* allocAGEdge(const Atom*, const Atom*);
302 
303  typedef std::vector<const AGEdge*> AGraphEdgeList;
304 
305  class AGNode
306  {
307 
308  public:
309  void setQueryBond(const Bond*);
310  const Bond* getQueryBond() const;
311 
312  void setAssocBond(const Bond*);
313  const Bond* getAssocBond() const;
314 
315  void addEdge(const AGEdge*);
316 
317  bool isConnected(const AGNode*) const;
318  const AGEdge* findEdge(const AGNode*) const;
319 
320  void clear();
321 
322  void setIndex(std::size_t idx);
323 
324  private:
325  std::size_t index;
326  const Bond* queryBond;
327  const Bond* assocBond;
328  Util::BitSet connNodes;
329  AGraphEdgeList atomEdges;
330  };
331 
332  class AGEdge
333  {
334 
335  public:
336  void setQueryAtom(const Atom*);
337  const Atom* getQueryAtom() const;
338 
339  void setAssocAtom(const Atom*);
340  const Atom* getAssocAtom() const;
341 
342  void setNode1(const AGNode*);
343  void setNode2(const AGNode*);
344 
345  const AGNode* getNode1() const;
346  const AGNode* getNode2() const;
347 
348  const AGNode* getOther(const AGNode*) const;
349 
350  private:
351  const Atom* queryAtom;
352  const Atom* assocAtom;
353  const AGNode* node1;
354  const AGNode* node2;
355  };
356 
357  class ABMappingMask
358  {
359 
360  public:
361  void initQueryAtomMask(std::size_t);
362  void initTargetAtomMask(std::size_t);
363 
364  void initQueryBondMask(std::size_t);
365  void initTargetBondMask(std::size_t);
366 
367  void setQueryAtomBit(std::size_t);
368  void setTargetAtomBit(std::size_t);
369 
370  void resetQueryAtomBit(std::size_t);
371  void resetTargetAtomBit(std::size_t);
372 
373  bool testQueryAtomBit(std::size_t) const;
374  bool testTargetAtomBit(std::size_t) const;
375 
376  void setQueryBondBit(std::size_t);
377  void setTargetBondBit(std::size_t);
378 
379  void resetQueryAtomBits();
380  void resetTargetAtomBits();
381 
382  void resetBondBits();
383 
384  bool operator<(const ABMappingMask&) const;
385  bool operator>(const ABMappingMask&) const;
386 
387  private:
388  Util::BitSet queryAtomMask;
389  Util::BitSet targetAtomMask;
390  Util::BitSet queryBondMask;
391  Util::BitSet targetBondMask;
392  };
393 
394  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
395 
396  typedef std::vector<Util::BitSet> BitMatrix;
397  typedef std::vector<AGNode*> AGraphNodeList;
398  typedef std::vector<AGraphNodeList> AGraphNodeMatrix;
399  typedef std::set<ABMappingMask> UniqueMappingList;
400  typedef std::vector<const Atom*> AtomList;
401  typedef std::vector<const Bond*> BondList;
402  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
403  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
404  typedef Util::ObjectStack<AGNode> NodeCache;
405  typedef Util::ObjectStack<AGEdge> EdgeCache;
406  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
407 
408  const MolecularGraph* query;
409  const MolecularGraph* target;
410  BitMatrix atomEquivMatrix;
411  AGraphNodeMatrix nodeMatrix;
412  ABMappingList foundMappings;
413  UniqueMappingList uniqueMappings;
414  AGraphEdgeList cliqueEdges;
415  AGraphNodeList cliqueNodes;
416  ABMappingMask mappingMask;
417  AtomMatchExprTable atomMatchExprTable;
418  BondMatchExprTable bondMatchExprTable;
419  MolGraphMatchExprPtr molGraphMatchExpr;
420  AtomList postMappingMatchAtoms;
421  BondList postMappingMatchBonds;
422  NodeCache nodeCache;
423  EdgeCache edgeCache;
424  MappingCache mappingCache;
425  bool queryChanged;
426  bool initQueryData;
427  bool uniqueMatches;
428  bool saveMappings;
429  std::size_t numQueryAtoms;
430  std::size_t numQueryBonds;
431  std::size_t numTargetAtoms;
432  std::size_t numTargetBonds;
433  std::size_t maxBondSubstructureSize;
434  std::size_t currNumNullNodes;
435  std::size_t minNumNullNodes;
436  std::size_t maxNumMappings;
437  std::size_t minSubstructureSize;
438  std::size_t currNodeIdx;
439  };
440  } // namespace Chem
441 } // namespace CDPL
442 
443 #endif // CDPL_CHEM_MAXCOMMONBONDSUBSTRUCTURESEARCH_HPP
CDPL::Chem::MaxCommonBondSubstructureSearch::getMinSubstructureSize
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
CDPL::Chem::MaxCommonBondSubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch.
Definition: MaxCommonBondSubstructureSearch.hpp:60
CDPL::Chem::MaxCommonBondSubstructureSearch::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a MaxCommonBondSubstructureSearch instance for the specified query structu...
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Chem::MaxCommonBondSubstructureSearch::MaxCommonBondSubstructureSearch
MaxCommonBondSubstructureSearch()
Constructs and initializes a MaxCommonBondSubstructureSearch instance.
CDPL::Chem::MaxCommonBondSubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::MaxCommonBondSubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Searches for a common substructure between the query and the specified target molecular graph.
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::MaxCommonBondSubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
CDPL::Chem::MaxCommonBondSubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonBondSubstructureSearch.hpp:68
CDPL::Chem::MaxCommonBondSubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Chem::MaxCommonBondSubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonBondSubstructureSearch.hpp:73
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::MaxCommonBondSubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::findMappings
bool findMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MatchExpression< MolecularGraph >::SharedPointer
std::shared_ptr< MatchExpression > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MatchExpression instances.
Definition: MatchExpression.hpp:81
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::MaxCommonBondSubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonBondSubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::setMinSubstructureSize
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
CDPL::Chem::MaxCommonBondSubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonBondSubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CDPL::Chem::MaxCommonBondSubstructureSearch::~MaxCommonBondSubstructureSearch
~MaxCommonBondSubstructureSearch()
Destructor.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonBondSubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MaxCommonBondSubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.