Chemical Data Processing Library C++ API - Version 1.4.0
CommonConnectedSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * CommonConnectedSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <deque>
34 #include <set>
35 #include <cstddef>
36 #include <memory>
37 #include <functional>
38 
39 #include <boost/iterator/indirect_iterator.hpp>
40 
41 #include "CDPL/Chem/APIPrefix.hpp"
44 #include "CDPL/Util/BitSet.hpp"
46 
47 
48 namespace CDPL
49 {
50 
51  namespace Chem
52  {
53 
54  class MolecularGraph;
55  class Atom;
56  class Bond;
57 
74  {
75 
76  typedef std::vector<AtomBondMapping*> ABMappingList;
77 
78  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
81 
82  public:
87  typedef std::shared_ptr<CommonConnectedSubstructureSearch> SharedPointer;
88 
92  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
93 
97  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
98 
102  typedef std::function<const AtomMatchExprPtr&(const Atom&)> AtomMatchExpressionFunction;
103 
107  typedef std::function<const BondMatchExprPtr&(const Bond&)> BondMatchExpressionFunction;
108 
112  typedef std::function<const MolGraphMatchExprPtr&(const MolecularGraph&)> MolecularGraphMatchExpressionFunction;
113 
118 
124 
126 
133 
135 
141 
147 
153 
158  void setQuery(const MolecularGraph& query);
159 
173  bool mappingExists(const MolecularGraph& target);
174 
189  bool findAllMappings(const MolecularGraph& target);
190 
205  bool findMaxMappings(const MolecularGraph& target);
206 
212  std::size_t getNumMappings() const;
213 
220  AtomBondMapping& getMapping(std::size_t idx);
221 
228  const AtomBondMapping& getMapping(std::size_t idx) const;
229 
235 
241 
247 
253 
259 
265 
271 
277 
289  void uniqueMappingsOnly(bool unique);
290 
296  bool uniqueMappingsOnly() const;
297 
308  void setMaxNumMappings(std::size_t max_num_mappings);
309 
315  std::size_t getMaxNumMappings() const;
316 
326  void setMinSubstructureSize(std::size_t min_size);
327 
333  std::size_t getMinSubstructureSize() const;
334 
335  private:
336  bool init(const MolecularGraph&);
337 
338  void initMatchExpressions();
339 
340  bool findEquivAtoms();
341  bool findEquivBonds();
342 
343  bool mapAtoms();
344  bool mapAtoms(std::size_t);
345  bool mapAtoms(std::size_t, std::size_t);
346 
347  bool nextTargetAtom(std::size_t, std::size_t&, std::size_t&) const;
348 
349  bool mappingFound();
350 
351  bool hasPostMappingMatchExprs() const;
352  bool foundMappingMatches(const AtomBondMapping*) const;
353 
354  bool foundMappingUnique();
355  bool mappingAlreadySeen(const AtomBondMapping*) const;
356 
357  void clearMappings();
358 
359  void freeAtomBondMappings();
360  void freeAtomBondMapping();
361 
362  AtomBondMapping* createAtomBondMapping();
363 
364  class ABMappingMask
365  {
366 
367  public:
368  void initQueryAtomMask(std::size_t);
369  void initTargetAtomMask(std::size_t);
370 
371  void initQueryBondMask(std::size_t);
372  void initTargetBondMask(std::size_t);
373 
374  void setQueryAtomBit(std::size_t);
375  void setTargetAtomBit(std::size_t);
376 
377  void resetQueryAtomBit(std::size_t);
378  void resetTargetAtomBit(std::size_t);
379 
380  bool testTargetAtomBit(std::size_t) const;
381 
382  void setQueryBondBit(std::size_t);
383  void setTargetBondBit(std::size_t);
384 
385  void resetBondMasks();
386 
387  bool operator<(const ABMappingMask&) const;
388  bool operator>(const ABMappingMask&) const;
389 
390  private:
391  Util::BitSet queryAtomMask;
392  Util::BitSet targetAtomMask;
393  Util::BitSet queryBondMask;
394  Util::BitSet targetBondMask;
395  };
396 
397  typedef std::vector<Util::BitSet> BitMatrix;
398  typedef std::vector<const Atom*> AtomMappingTable;
399  typedef std::vector<std::size_t> AtomIndexList;
400  typedef std::vector<std::size_t> BondMappingStack;
401  typedef std::deque<std::size_t> AtomQueue;
402  typedef std::set<ABMappingMask> UniqueMappingList;
403  typedef std::vector<const Atom*> AtomList;
404  typedef std::vector<const Bond*> BondList;
405  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
406  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
407  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
408 
409  const MolecularGraph* query;
410  const MolecularGraph* target;
411  AtomMatchExpressionFunction atomMatchExprFunc;
412  BondMatchExpressionFunction bondMatchExprFunc;
413  MolecularGraphMatchExpressionFunction molGraphMatchExprFunc;
414  BitMatrix atomEquivMatrix;
415  BitMatrix bondEquivMatrix;
416  AtomQueue termQueryAtoms;
417  AtomIndexList termTargetAtoms;
418  BondMappingStack bondMappingStack;
419  AtomMappingTable queryAtomMapping;
420  ABMappingMask mappingMask;
421  Util::BitSet hiddenQueryAtomMask;
422  Util::BitSet termQueryAtomMask;
423  Util::BitSet termTargetAtomMask;
424  ABMappingList foundMappings;
425  UniqueMappingList uniqueMappings;
426  AtomMatchExprTable atomMatchExprTable;
427  BondMatchExprTable bondMatchExprTable;
428  MolGraphMatchExprPtr molGraphMatchExpr;
429  AtomList postMappingMatchAtoms;
430  BondList postMappingMatchBonds;
431  MappingCache mappingCache;
432  bool queryChanged;
433  bool initQueryData;
434  bool uniqueMatches;
435  bool saveMappings;
436  bool maxMappingsOnly;
437  std::size_t numQueryAtoms;
438  std::size_t numQueryBonds;
439  std::size_t numTargetAtoms;
440  std::size_t numTargetBonds;
441  std::size_t numMappedAtoms;
442  std::size_t currMaxSubstructureSize;
443  std::size_t maxBondStackSize;
444  std::size_t maxNumMappings;
445  std::size_t minSubstructureSize;
446  };
447  } // namespace Chem
448 } // namespace CDPL
449 
450 #endif // CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
Definition of class CDPL::Chem::AtomBondMapping.
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::MatchExpression.
Definition of class CDPL::Util::ObjectStack.
Data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
Enumerates all maximal common connected substructures shared between a query and a target molecular g...
Definition: CommonConnectedSubstructureSearch.hpp:74
std::function< const MolGraphMatchExprPtr &(const MolecularGraph &)> MolecularGraphMatchExpressionFunction
Type of the functor used to retrieve the graph-level Chem::MatchExpression for the query molecular gr...
Definition: CommonConnectedSubstructureSearch.hpp:112
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: CommonConnectedSubstructureSearch.hpp:92
std::function< const AtomMatchExprPtr &(const Atom &)> AtomMatchExpressionFunction
Type of the functor used to retrieve the atom-level Chem::MatchExpression for a query atom.
Definition: CommonConnectedSubstructureSearch.hpp:102
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
bool mappingExists(const MolecularGraph &target)
Searches for a common connected substructure between the query and the specified target molecular gra...
void setAtomMatchExpressionFunction(const AtomMatchExpressionFunction &func)
Installs a function that resolves the atom-level Chem::MatchExpression for a query atom.
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: CommonConnectedSubstructureSearch.hpp:97
bool findMaxMappings(const MolecularGraph &target)
Searches for all maximum-sized atom/bond mappings of connected query subgraphs to substructures of th...
std::shared_ptr< CommonConnectedSubstructureSearch > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated CommonConnectedSubstructureSearch...
Definition: CommonConnectedSubstructureSearch.hpp:87
void setMolecularGraphMatchExpressionFunction(const MolecularGraphMatchExpressionFunction &func)
Installs a function that resolves the graph-level Chem::MatchExpression for the query molecular graph...
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
bool findAllMappings(const MolecularGraph &target)
Searches for all possible atom/bond mappings of connected query subgraphs to substructures of the spe...
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CommonConnectedSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a CommonConnectedSubstructureSearch instance for the specified query struc...
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CommonConnectedSubstructureSearch(const CommonConnectedSubstructureSearch &)=delete
std::function< const BondMatchExprPtr &(const Bond &)> BondMatchExpressionFunction
Type of the functor used to retrieve the bond-level Chem::MatchExpression for a query bond.
Definition: CommonConnectedSubstructureSearch.hpp:107
CommonConnectedSubstructureSearch & operator=(const CommonConnectedSubstructureSearch &)=delete
void setBondMatchExpressionFunction(const BondMatchExpressionFunction &func)
Installs a function that resolves the bond-level Chem::MatchExpression for a query bond.
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CommonConnectedSubstructureSearch()
Constructs and initializes a CommonConnectedSubstructureSearch instance.
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
Generic boolean expression interface for the implementation of query/target object equivalence tests ...
Definition: MatchExpression.hpp:75
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
The namespace of the Chemical Data Processing Library.