Chemical Data Processing Library C++ API - Version 1.1.0
MaxCommonAtomSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * MaxCommonAtomSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <set>
34 #include <cstddef>
35 #include <memory>
36 
37 #include <boost/iterator/indirect_iterator.hpp>
38 
39 #include "CDPL/Chem/APIPrefix.hpp"
42 #include "CDPL/Util/BitSet.hpp"
44 
45 
46 namespace CDPL
47 {
48 
49  namespace Chem
50  {
51 
52  class MolecularGraph;
53  class Atom;
54  class Bond;
55 
61  {
62 
63  typedef std::vector<AtomBondMapping*> ABMappingList;
64 
65  public:
66  typedef std::shared_ptr<MaxCommonAtomSubstructureSearch> SharedPointer;
67 
71  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
72 
76  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
77 
82 
88 
95 
100  void setQuery(const MolecularGraph& query);
101 
115  bool mappingExists(const MolecularGraph& target);
116 
132  bool findAllMappings(const MolecularGraph& target);
133 
149  bool findMaxBondMappings(const MolecularGraph& target);
150 
156  std::size_t getNumMappings() const;
157 
164  AtomBondMapping& getMapping(std::size_t idx);
165 
172  const AtomBondMapping& getMapping(std::size_t idx) const;
173 
179 
185 
191 
197 
203 
209 
215 
221 
233  void uniqueMappingsOnly(bool unique);
234 
240  bool uniqueMappingsOnly() const;
241 
252  void setMaxNumMappings(std::size_t max_num_mappings);
253 
259  std::size_t getMaxNumMappings() const;
260 
270  void setMinSubstructureSize(std::size_t min_size);
271 
277  std::size_t getMinSubstructureSize() const;
278 
279  private:
280  class AGNode;
281 
283 
285 
286  bool init(const MolecularGraph&);
287 
288  void initMatchExpressions();
289 
290  bool buildAssocGraph();
291 
292  bool findAssocGraphCliques(std::size_t);
293  bool isLegal(const AGNode*);
294 
295  bool mappingFound();
296 
297  bool hasPostMappingMatchExprs() const;
298  bool foundMappingMatches(const AtomBondMapping*) const;
299 
300  bool foundMappingUnique();
301 
302  void clearMappings();
303 
304  void freeAtomBondMapping();
305  void freeAtomBondMappings();
306  void freeAssocGraph();
307 
308  AtomBondMapping* createAtomBondMapping();
309 
310  class AGEdge;
311 
312  AGNode* allocAGNode(const Atom*, const Atom*);
313  AGEdge* allocAGEdge(const Bond*, const Bond*);
314 
315  typedef std::vector<const AGEdge*> AGraphEdgeList;
316 
317  class AGNode
318  {
319 
320  public:
321  void setQueryAtom(const Atom*);
322  const Atom* getQueryAtom() const;
323 
324  void setAssocAtom(const Atom*);
325  const Atom* getAssocAtom() const;
326 
327  void addEdge(const AGEdge*);
328 
329  bool isConnected(const AGNode*) const;
330  const AGEdge* findEdge(const AGNode*) const;
331 
332  void clear();
333 
334  void setIndex(std::size_t idx);
335 
336  private:
337  std::size_t index;
338  const Atom* queryAtom;
339  const Atom* assocAtom;
340  Util::BitSet connNodes;
341  AGraphEdgeList bondEdges;
342  };
343 
344  class AGEdge
345  {
346 
347  public:
348  void setQueryBond(const Bond*);
349  const Bond* getQueryBond() const;
350 
351  void setAssocBond(const Bond*);
352  const Bond* getAssocBond() const;
353 
354  void setNode1(const AGNode*);
355  void setNode2(const AGNode*);
356 
357  const AGNode* getNode1() const;
358  const AGNode* getNode2() const;
359 
360  const AGNode* getOther(const AGNode*) const;
361 
362  private:
363  const Bond* queryBond;
364  const Bond* assocBond;
365  const AGNode* node1;
366  const AGNode* node2;
367  };
368 
369  class ABMappingMask
370  {
371 
372  public:
373  void initQueryAtomMask(std::size_t);
374  void initTargetAtomMask(std::size_t);
375 
376  void initQueryBondMask(std::size_t);
377  void initTargetBondMask(std::size_t);
378 
379  void setQueryAtomBit(std::size_t);
380  void setTargetAtomBit(std::size_t);
381 
382  void setQueryBondBit(std::size_t);
383  void setTargetBondBit(std::size_t);
384 
385  void reset();
386 
387  bool operator<(const ABMappingMask&) const;
388  bool operator>(const ABMappingMask&) const;
389 
390  private:
391  Util::BitSet queryAtomMask;
392  Util::BitSet targetAtomMask;
393  Util::BitSet queryBondMask;
394  Util::BitSet targetBondMask;
395  };
396 
397  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
398 
399  typedef std::vector<AGNode*> AGraphNodeList;
400  typedef std::vector<AGraphNodeList> AGraphNodeMatrix;
401  typedef std::set<ABMappingMask> UniqueMappingList;
402  typedef std::vector<const Atom*> AtomList;
403  typedef std::vector<const Bond*> BondList;
404  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
405  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
406  typedef Util::ObjectStack<AGNode> NodeCache;
407  typedef Util::ObjectStack<AGEdge> EdgeCache;
408  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
409 
410  const MolecularGraph* query;
411  const MolecularGraph* target;
412  AGraphNodeMatrix nodeMatrix;
413  ABMappingList foundMappings;
414  UniqueMappingList uniqueMappings;
415  AGraphEdgeList cliqueEdges;
416  AGraphNodeList cliqueNodes;
417  ABMappingMask mappingMask;
418  AtomMatchExprTable atomMatchExprTable;
419  BondMatchExprTable bondMatchExprTable;
420  MolGraphMatchExprPtr molGraphMatchExpr;
421  AtomList postMappingMatchAtoms;
422  BondList postMappingMatchBonds;
423  NodeCache nodeCache;
424  EdgeCache edgeCache;
425  MappingCache mappingCache;
426  bool queryChanged;
427  bool initQueryData;
428  bool uniqueMatches;
429  bool saveMappings;
430  bool maxBondMappingsOnly;
431  std::size_t numQueryAtoms;
432  std::size_t numQueryBonds;
433  std::size_t numTargetAtoms;
434  std::size_t numTargetBonds;
435  std::size_t maxAtomSubstructureSize;
436  std::size_t maxBondSubstructureSize;
437  std::size_t currNumNullNodes;
438  std::size_t minNumNullNodes;
439  std::size_t maxNumMappings;
440  std::size_t minSubstructureSize;
441  std::size_t currNodeIdx;
442  };
443  } // namespace Chem
444 } // namespace CDPL
445 
446 #endif // CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
CDPL::Chem::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch.
Definition: MaxCommonAtomSubstructureSearch.hpp:61
CDPL::Chem::MaxCommonAtomSubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:71
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL::Chem::MaxCommonAtomSubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Chem::MaxCommonAtomSubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::MaxCommonAtomSubstructureSearch::SharedPointer
std::shared_ptr< MaxCommonAtomSubstructureSearch > SharedPointer
Definition: MaxCommonAtomSubstructureSearch.hpp:66
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::MaxCommonAtomSubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Searches for a common substructure between the query and the specified target molecular graph.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::setMinSubstructureSize
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Chem::MaxCommonAtomSubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:76
CDPL::Chem::MaxCommonAtomSubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::MaxCommonAtomSubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CDPL::Chem::MatchExpression< MolecularGraph >::SharedPointer
std::shared_ptr< MatchExpression > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MatchExpression instances.
Definition: MatchExpression.hpp:81
CDPL::Chem::MaxCommonAtomSubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonAtomSubstructureSearch::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a MaxCommonAtomSubstructureSearch instance for the specified query structu...
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::MaxCommonAtomSubstructureSearch::findMaxBondMappings
bool findMaxBondMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MaxCommonAtomSubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch()
Constructs and initializes a MaxCommonAtomSubstructureSearch instance.
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::MaxCommonAtomSubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::MaxCommonAtomSubstructureSearch::findAllMappings
bool findAllMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MaxCommonAtomSubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMinSubstructureSize
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MaxCommonAtomSubstructureSearch::~MaxCommonAtomSubstructureSearch
~MaxCommonAtomSubstructureSearch()
Destructor.