Chemical Data Processing Library C++ API - Version 1.0.0
MaxCommonAtomSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * MaxCommonAtomSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <set>
34 #include <cstddef>
35 
36 #include <boost/iterator/indirect_iterator.hpp>
37 
38 #include "CDPL/Chem/APIPrefix.hpp"
41 #include "CDPL/Util/BitSet.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Chem
49  {
50 
51  class MolecularGraph;
52  class Atom;
53  class Bond;
54 
60  {
61 
62  typedef std::vector<AtomBondMapping*> ABMappingList;
63 
64  public:
68  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
69 
73  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
74 
79 
85 
92 
97  void setQuery(const MolecularGraph& query);
98 
112  bool mappingExists(const MolecularGraph& target);
113 
129  bool findAllMappings(const MolecularGraph& target);
130 
146  bool findMaxBondMappings(const MolecularGraph& target);
147 
153  std::size_t getNumMappings() const;
154 
161  AtomBondMapping& getMapping(std::size_t idx);
162 
169  const AtomBondMapping& getMapping(std::size_t idx) const;
170 
176 
182 
188 
194 
200 
206 
212 
218 
230  void uniqueMappingsOnly(bool unique);
231 
237  bool uniqueMappingsOnly() const;
238 
249  void setMaxNumMappings(std::size_t max_num_mappings);
250 
256  std::size_t getMaxNumMappings() const;
257 
267  void setMinSubstructureSize(std::size_t min_size);
268 
274  std::size_t getMinSubstructureSize() const;
275 
276  private:
277  class AGNode;
278 
280 
282 
283  bool init(const MolecularGraph&);
284 
285  void initMatchExpressions();
286 
287  bool buildAssocGraph();
288 
289  bool findAssocGraphCliques(std::size_t);
290  bool isLegal(const AGNode*);
291 
292  bool mappingFound();
293 
294  bool hasPostMappingMatchExprs() const;
295  bool foundMappingMatches(const AtomBondMapping*) const;
296 
297  bool foundMappingUnique();
298 
299  void clearMappings();
300 
301  void freeAtomBondMapping();
302  void freeAtomBondMappings();
303  void freeAssocGraph();
304 
305  AtomBondMapping* createAtomBondMapping();
306 
307  class AGEdge;
308 
309  AGNode* allocAGNode(const Atom*, const Atom*);
310  AGEdge* allocAGEdge(const Bond*, const Bond*);
311 
312  typedef std::vector<const AGEdge*> AGraphEdgeList;
313 
314  class AGNode
315  {
316 
317  public:
318  void setQueryAtom(const Atom*);
319  const Atom* getQueryAtom() const;
320 
321  void setAssocAtom(const Atom*);
322  const Atom* getAssocAtom() const;
323 
324  void addEdge(const AGEdge*);
325 
326  bool isConnected(const AGNode*) const;
327  const AGEdge* findEdge(const AGNode*) const;
328 
329  void clear();
330 
331  void setIndex(std::size_t idx);
332 
333  private:
334  std::size_t index;
335  const Atom* queryAtom;
336  const Atom* assocAtom;
337  Util::BitSet connNodes;
338  AGraphEdgeList bondEdges;
339  };
340 
341  class AGEdge
342  {
343 
344  public:
345  void setQueryBond(const Bond*);
346  const Bond* getQueryBond() const;
347 
348  void setAssocBond(const Bond*);
349  const Bond* getAssocBond() const;
350 
351  void setNode1(const AGNode*);
352  void setNode2(const AGNode*);
353 
354  const AGNode* getNode1() const;
355  const AGNode* getNode2() const;
356 
357  const AGNode* getOther(const AGNode*) const;
358 
359  private:
360  const Bond* queryBond;
361  const Bond* assocBond;
362  const AGNode* node1;
363  const AGNode* node2;
364  };
365 
366  class ABMappingMask
367  {
368 
369  public:
370  void initQueryAtomMask(std::size_t);
371  void initTargetAtomMask(std::size_t);
372 
373  void initQueryBondMask(std::size_t);
374  void initTargetBondMask(std::size_t);
375 
376  void setQueryAtomBit(std::size_t);
377  void setTargetAtomBit(std::size_t);
378 
379  void setQueryBondBit(std::size_t);
380  void setTargetBondBit(std::size_t);
381 
382  void reset();
383 
384  bool operator<(const ABMappingMask&) const;
385  bool operator>(const ABMappingMask&) const;
386 
387  private:
388  Util::BitSet queryAtomMask;
389  Util::BitSet targetAtomMask;
390  Util::BitSet queryBondMask;
391  Util::BitSet targetBondMask;
392  };
393 
394  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
395 
396  typedef std::vector<AGNode*> AGraphNodeList;
397  typedef std::vector<AGraphNodeList> AGraphNodeMatrix;
398  typedef std::set<ABMappingMask> UniqueMappingList;
399  typedef std::vector<const Atom*> AtomList;
400  typedef std::vector<const Bond*> BondList;
401  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
402  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
403  typedef Util::ObjectStack<AGNode> NodeCache;
404  typedef Util::ObjectStack<AGEdge> EdgeCache;
405  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
406 
407  const MolecularGraph* query;
408  const MolecularGraph* target;
409  AGraphNodeMatrix nodeMatrix;
410  ABMappingList foundMappings;
411  UniqueMappingList uniqueMappings;
412  AGraphEdgeList cliqueEdges;
413  AGraphNodeList cliqueNodes;
414  ABMappingMask mappingMask;
415  AtomMatchExprTable atomMatchExprTable;
416  BondMatchExprTable bondMatchExprTable;
417  MolGraphMatchExprPtr molGraphMatchExpr;
418  AtomList postMappingMatchAtoms;
419  BondList postMappingMatchBonds;
420  NodeCache nodeCache;
421  EdgeCache edgeCache;
422  MappingCache mappingCache;
423  bool queryChanged;
424  bool initQueryData;
425  bool uniqueMatches;
426  bool saveMappings;
427  bool maxBondMappingsOnly;
428  std::size_t numQueryAtoms;
429  std::size_t numQueryBonds;
430  std::size_t numTargetAtoms;
431  std::size_t numTargetBonds;
432  std::size_t maxAtomSubstructureSize;
433  std::size_t maxBondSubstructureSize;
434  std::size_t currNumNullNodes;
435  std::size_t minNumNullNodes;
436  std::size_t maxNumMappings;
437  std::size_t minSubstructureSize;
438  std::size_t currNodeIdx;
439  };
440  } // namespace Chem
441 } // namespace CDPL
442 
443 #endif // CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
CDPL::Chem::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch.
Definition: MaxCommonAtomSubstructureSearch.hpp:60
CDPL::Chem::MaxCommonAtomSubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:68
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL::Chem::MaxCommonAtomSubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Chem::MaxCommonAtomSubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::MaxCommonAtomSubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Searches for a common substructure between the query and the specified target molecular graph.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::setMinSubstructureSize
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Chem::MaxCommonAtomSubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:73
CDPL::Chem::MaxCommonAtomSubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::MaxCommonAtomSubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CDPL::Chem::MatchExpression< MolecularGraph >::SharedPointer
std::shared_ptr< MatchExpression > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MatchExpression instances.
Definition: MatchExpression.hpp:81
CDPL::Chem::MaxCommonAtomSubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::MaxCommonAtomSubstructureSearch::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a MaxCommonAtomSubstructureSearch instance for the specified query structu...
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::MaxCommonAtomSubstructureSearch::findMaxBondMappings
bool findMaxBondMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MaxCommonAtomSubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::MaxCommonAtomSubstructureSearch
MaxCommonAtomSubstructureSearch()
Constructs and initializes a MaxCommonAtomSubstructureSearch instance.
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::MaxCommonAtomSubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::MaxCommonAtomSubstructureSearch::findAllMappings
bool findAllMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
CDPL::Chem::MaxCommonAtomSubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMinSubstructureSize
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::MaxCommonAtomSubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::MaxCommonAtomSubstructureSearch::~MaxCommonAtomSubstructureSearch
~MaxCommonAtomSubstructureSearch()
Destructor.