Chemical Data Processing Library C++ API - Version 1.1.0
CommonConnectedSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * CommonConnectedSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <deque>
34 #include <set>
35 #include <cstddef>
36 #include <memory>
37 
38 #include <boost/iterator/indirect_iterator.hpp>
39 
40 #include "CDPL/Chem/APIPrefix.hpp"
43 #include "CDPL/Util/BitSet.hpp"
45 
46 
47 namespace CDPL
48 {
49 
50  namespace Chem
51  {
52 
53  class MolecularGraph;
54  class Atom;
55  class Bond;
56 
62  {
63 
64  typedef std::vector<AtomBondMapping*> ABMappingList;
65 
66  public:
67  typedef std::shared_ptr<CommonConnectedSubstructureSearch> SharedPointer;
68 
72  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
73 
77  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
78 
83 
89 
96 
101  void setQuery(const MolecularGraph& query);
102 
116  bool mappingExists(const MolecularGraph& target);
117 
132  bool findAllMappings(const MolecularGraph& target);
133 
148  bool findMaxMappings(const MolecularGraph& target);
149 
155  std::size_t getNumMappings() const;
156 
163  AtomBondMapping& getMapping(std::size_t idx);
164 
171  const AtomBondMapping& getMapping(std::size_t idx) const;
172 
178 
184 
190 
196 
202 
208 
214 
220 
232  void uniqueMappingsOnly(bool unique);
233 
239  bool uniqueMappingsOnly() const;
240 
251  void setMaxNumMappings(std::size_t max_num_mappings);
252 
258  std::size_t getMaxNumMappings() const;
259 
269  void setMinSubstructureSize(std::size_t min_size);
270 
276  std::size_t getMinSubstructureSize() const;
277 
278  private:
280 
282 
283  bool init(const MolecularGraph&);
284 
285  void initMatchExpressions();
286 
287  bool findEquivAtoms();
288  bool findEquivBonds();
289 
290  bool mapAtoms();
291  bool mapAtoms(std::size_t);
292  bool mapAtoms(std::size_t, std::size_t);
293 
294  bool nextTargetAtom(std::size_t, std::size_t&, std::size_t&) const;
295 
296  bool mappingFound();
297 
298  bool hasPostMappingMatchExprs() const;
299  bool foundMappingMatches(const AtomBondMapping*) const;
300 
301  bool foundMappingUnique();
302  bool mappingAlreadySeen(const AtomBondMapping*) const;
303 
304  void clearMappings();
305 
306  void freeAtomBondMappings();
307  void freeAtomBondMapping();
308 
309  AtomBondMapping* createAtomBondMapping();
310 
311  class ABMappingMask
312  {
313 
314  public:
315  void initQueryAtomMask(std::size_t);
316  void initTargetAtomMask(std::size_t);
317 
318  void initQueryBondMask(std::size_t);
319  void initTargetBondMask(std::size_t);
320 
321  void setQueryAtomBit(std::size_t);
322  void setTargetAtomBit(std::size_t);
323 
324  void resetQueryAtomBit(std::size_t);
325  void resetTargetAtomBit(std::size_t);
326 
327  bool testTargetAtomBit(std::size_t) const;
328 
329  void setQueryBondBit(std::size_t);
330  void setTargetBondBit(std::size_t);
331 
332  void resetBondMasks();
333 
334  bool operator<(const ABMappingMask&) const;
335  bool operator>(const ABMappingMask&) const;
336 
337  private:
338  Util::BitSet queryAtomMask;
339  Util::BitSet targetAtomMask;
340  Util::BitSet queryBondMask;
341  Util::BitSet targetBondMask;
342  };
343 
344  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
345 
346  typedef std::vector<Util::BitSet> BitMatrix;
347  typedef std::vector<const Atom*> AtomMappingTable;
348  typedef std::vector<std::size_t> AtomIndexList;
349  typedef std::vector<std::size_t> BondMappingStack;
350  typedef std::deque<std::size_t> AtomQueue;
351  typedef std::set<ABMappingMask> UniqueMappingList;
352  typedef std::vector<const Atom*> AtomList;
353  typedef std::vector<const Bond*> BondList;
354  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
355  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
356  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
357 
358  const MolecularGraph* query;
359  const MolecularGraph* target;
360  BitMatrix atomEquivMatrix;
361  BitMatrix bondEquivMatrix;
362  AtomQueue termQueryAtoms;
363  AtomIndexList termTargetAtoms;
364  BondMappingStack bondMappingStack;
365  AtomMappingTable queryAtomMapping;
366  ABMappingMask mappingMask;
367  Util::BitSet hiddenQueryAtomMask;
368  Util::BitSet termQueryAtomMask;
369  Util::BitSet termTargetAtomMask;
370  ABMappingList foundMappings;
371  UniqueMappingList uniqueMappings;
372  AtomMatchExprTable atomMatchExprTable;
373  BondMatchExprTable bondMatchExprTable;
374  MolGraphMatchExprPtr molGraphMatchExpr;
375  AtomList postMappingMatchAtoms;
376  BondList postMappingMatchBonds;
377  MappingCache mappingCache;
378  bool queryChanged;
379  bool initQueryData;
380  bool uniqueMatches;
381  bool saveMappings;
382  bool maxMappingsOnly;
383  std::size_t numQueryAtoms;
384  std::size_t numQueryBonds;
385  std::size_t numTargetAtoms;
386  std::size_t numTargetBonds;
387  std::size_t numMappedAtoms;
388  std::size_t currMaxSubstructureSize;
389  std::size_t maxBondStackSize;
390  std::size_t maxNumMappings;
391  std::size_t minSubstructureSize;
392  };
393  } // namespace Chem
394 } // namespace CDPL
395 
396 #endif // CDPL_CHEM_COMMONCONNECTEDSUBSTRUCTURESEARCH_HPP
CDPL::Chem::CommonConnectedSubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::CommonConnectedSubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
CDPL::Chem::CommonConnectedSubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::CommonConnectedSubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::CommonConnectedSubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::CommonConnectedSubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::CommonConnectedSubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::CommonConnectedSubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: CommonConnectedSubstructureSearch.hpp:77
CDPL::Chem::CommonConnectedSubstructureSearch::findMaxMappings
bool findMaxMappings(const MolecularGraph &target)
Searches for all maximum-sized atom/bond mappings of connected query subgraphs to substructures of th...
CDPL::Chem::CommonConnectedSubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::CommonConnectedSubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Searches for a common connected substructure between the query and the specified target molecular gra...
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::CommonConnectedSubstructureSearch::~CommonConnectedSubstructureSearch
~CommonConnectedSubstructureSearch()
Destructor.
CDPL::Chem::CommonConnectedSubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::CommonConnectedSubstructureSearch::getMinSubstructureSize
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
CDPL::Chem::CommonConnectedSubstructureSearch
CommonConnectedSubstructureSearch.
Definition: CommonConnectedSubstructureSearch.hpp:62
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::CommonConnectedSubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::CommonConnectedSubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::CommonConnectedSubstructureSearch::CommonConnectedSubstructureSearch
CommonConnectedSubstructureSearch()
Constructs and initializes a CommonConnectedSubstructureSearch instance.
CDPL::Chem::CommonConnectedSubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::MatchExpression
A generic boolean expression interface for the implementation of query/target object equivalence test...
Definition: MatchExpression.hpp:75
CDPL::Chem::CommonConnectedSubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::CommonConnectedSubstructureSearch::setMinSubstructureSize
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
CDPL::Chem::CommonConnectedSubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL::Chem::CommonConnectedSubstructureSearch::SharedPointer
std::shared_ptr< CommonConnectedSubstructureSearch > SharedPointer
Definition: CommonConnectedSubstructureSearch.hpp:67
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Chem::CommonConnectedSubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
CDPL::Chem::CommonConnectedSubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: CommonConnectedSubstructureSearch.hpp:72
CDPL::Chem::CommonConnectedSubstructureSearch::findAllMappings
bool findAllMappings(const MolecularGraph &target)
Searches for all possible atom/bond mappings of connected query subgraphs to substructures of the spe...
CDPL::Util::ObjectStack< AtomBondMapping >
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::CommonConnectedSubstructureSearch::CommonConnectedSubstructureSearch
CommonConnectedSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a CommonConnectedSubstructureSearch instance for the specified query struc...
CDPL::Chem::CommonConnectedSubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.