Chemical Data Processing Library C++ API - Version 1.1.0
SubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * SubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <deque>
34 #include <set>
35 #include <cstddef>
36 #include <unordered_map>
37 #include <memory>
38 #include <functional>
39 
40 #include <boost/iterator/indirect_iterator.hpp>
41 
42 #include "CDPL/Chem/APIPrefix.hpp"
45 #include "CDPL/Util/BitSet.hpp"
47 
48 
49 namespace CDPL
50 {
51 
52  namespace Chem
53  {
54 
55  class MolecularGraph;
56  class Atom;
57  class Bond;
58 
64  {
65 
66  typedef std::vector<AtomBondMapping*> ABMappingList;
67 
68  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
71 
72  public:
73  typedef std::shared_ptr<SubstructureSearch> SharedPointer;
74 
78  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
79 
83  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
84 
85  typedef std::function<const AtomMatchExprPtr&(const Atom&)> AtomMatchExpressionFunction;
86  typedef std::function<const BondMatchExprPtr&(const Bond&)> BondMatchExpressionFunction;
87  typedef std::function<const MolGraphMatchExprPtr&(const MolecularGraph&)> MolecularGraphMatchExpressionFunction;
88 
93 
99 
106 
108 
110 
112 
117  void setQuery(const MolecularGraph& query);
118 
131  bool mappingExists(const MolecularGraph& target);
132 
147  bool findMappings(const MolecularGraph& target);
148 
149  void stopSearch();
150 
155  std::size_t getNumMappings() const;
156 
163  AtomBondMapping& getMapping(std::size_t idx);
164 
171  const AtomBondMapping& getMapping(std::size_t idx) const;
172 
178 
184 
190 
196 
202 
208 
214 
220 
232  void uniqueMappingsOnly(bool unique);
233 
239  bool uniqueMappingsOnly() const;
240 
251  void setMaxNumMappings(std::size_t max_num_mappings);
252 
258  std::size_t getMaxNumMappings() const;
259 
273  void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx);
274 
280 
294  void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx);
295 
301 
302  private:
304 
305  SubstructureSearch& operator=(const SubstructureSearch&);
306 
307  bool init(const MolecularGraph&);
308 
309  void initMatchExpressions();
310 
311  bool findEquivAtoms();
312  bool findEquivBonds();
313 
314  bool mapAtoms();
315 
316  std::size_t nextQueryAtom() const;
317  bool nextTargetAtom(std::size_t, std::size_t&, std::size_t&) const;
318 
319  bool atomMappingAllowed(std::size_t, std::size_t) const;
320  bool checkAtomMappingConstraints(std::size_t, std::size_t) const;
321  bool checkBondMappingConstraints(std::size_t, std::size_t) const;
322 
323  bool mapBonds(std::size_t, std::size_t);
324  bool mapAtoms(std::size_t);
325  bool mapAtoms(std::size_t, std::size_t);
326 
327  bool mappingFound();
328 
329  bool hasPostMappingMatchExprs() const;
330  bool foundMappingMatches(const AtomBondMapping*) const;
331 
332  bool foundMappingUnique();
333 
334  void freeAtomBondMappings();
335  void freeAtomBondMapping();
336 
337  AtomBondMapping* createAtomBondMapping();
338 
339  class ABMappingMask
340  {
341 
342  public:
343  void initAtomMask(std::size_t);
344  void initBondMask(std::size_t);
345 
346  void setAtomBit(std::size_t);
347  void resetAtomBit(std::size_t);
348 
349  bool testAtomBit(std::size_t) const;
350 
351  void setBondBit(std::size_t);
352  void resetBondMask();
353 
354  bool operator<(const ABMappingMask&) const;
355  bool operator>(const ABMappingMask&) const;
356 
357  private:
358  Util::BitSet atomMask;
359  Util::BitSet bondMask;
360  };
361 
362  typedef std::vector<Util::BitSet> BitMatrix;
363  typedef std::vector<const Atom*> AtomMappingTable;
364  typedef std::vector<const Bond*> BondMappingTable;
365  typedef std::deque<std::size_t> AtomQueue;
366  typedef std::set<ABMappingMask> UniqueMappingList;
367  typedef std::vector<const Atom*> AtomList;
368  typedef std::vector<const Bond*> BondList;
369  typedef std::vector<AtomMatchExprPtr> AtomMatchExprTable;
370  typedef std::vector<BondMatchExprPtr> BondMatchExprTable;
371  typedef std::unordered_multimap<std::size_t, std::size_t> MappingConstraintMap;
372  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
373 
374  const MolecularGraph* query;
375  const MolecularGraph* target;
376  AtomMatchExpressionFunction atomMatchExprFunc;
377  BondMatchExpressionFunction bondMatchExprFunc;
378  MolecularGraphMatchExpressionFunction molGraphMatchExprFunc;
379  BitMatrix atomEquivMatrix;
380  BitMatrix bondEquivMatrix;
381  MappingConstraintMap atomMappingConstrs;
382  MappingConstraintMap bondMappingConstrs;
383  AtomQueue termQueryAtoms;
384  AtomMappingTable queryAtomMapping;
385  BondMappingTable queryBondMapping;
386  Util::BitSet queryMappingMask;
387  ABMappingMask targetMappingMask;
388  ABMappingList foundMappings;
389  UniqueMappingList uniqueMappings;
390  AtomMatchExprTable atomMatchExprTable;
391  BondMatchExprTable bondMatchExprTable;
392  MolGraphMatchExprPtr molGraphMatchExpr;
393  AtomList postMappingMatchAtoms;
394  BondList postMappingMatchBonds;
395  MappingCache mappingCache;
396  bool queryChanged;
397  bool initQueryData;
398  bool uniqueMatches;
399  bool saveMappings;
400  bool exitSearch;
401  std::size_t numQueryAtoms;
402  std::size_t numQueryBonds;
403  std::size_t numTargetAtoms;
404  std::size_t numTargetBonds;
405  std::size_t numMappedAtoms;
406  std::size_t maxNumMappings;
407  };
408  } // namespace Chem
409 } // namespace CDPL
410 
411 #endif // CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
ObjectStack.hpp
Definition of the class CDPL::Util::ObjectStack.
CDPL::Chem::SubstructureSearch::setQuery
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
CDPL::Chem::SubstructureSearch::end
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::SubstructureSearch
SubstructureSearch(const MolecularGraph &query)
Constructs and initializes a SubstructureSearch instance for the specified query structure.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_CHEM_API.
CDPL::Chem::SubstructureSearch::getMappingsBegin
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::setBondMatchExpressionFunction
void setBondMatchExpressionFunction(const BondMatchExpressionFunction &func)
CDPL_CHEM_API
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Chem::SubstructureSearch::getMapping
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::SubstructureSearch
SubstructureSearch.
Definition: SubstructureSearch.hpp:64
CDPL::Chem::SubstructureSearch::SubstructureSearch
SubstructureSearch()
Constructs and initializes a SubstructureSearch instance.
CDPL::Chem::SubstructureSearch::getMaxNumMappings
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::SubstructureSearch::clearAtomMappingConstraints
void clearAtomMappingConstraints()
Clears all previously defined query to target atom mapping constraints.
CDPL::Chem::SubstructureSearch::end
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::stopSearch
void stopSearch()
CDPL::Chem::SubstructureSearch::begin
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Chem::SubstructureSearch::addAtomMappingConstraint
void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx)
Adds a constraint on the allowed mappings between query and target structure atoms.
CDPL::Chem::SubstructureSearch::MolecularGraphMatchExpressionFunction
std::function< const MolGraphMatchExprPtr &(const MolecularGraph &)> MolecularGraphMatchExpressionFunction
Definition: SubstructureSearch.hpp:87
CDPL::Chem::SubstructureSearch::begin
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::setMolecularGraphMatchExpressionFunction
void setMolecularGraphMatchExpressionFunction(const MolecularGraphMatchExpressionFunction &func)
CDPL::Util::operator<
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
CDPL::Chem::SubstructureSearch::uniqueMappingsOnly
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
CDPL::Chem::SubstructureSearch::~SubstructureSearch
~SubstructureSearch()
Destructor.
CDPL::Chem::SubstructureSearch::findMappings
bool findMappings(const MolecularGraph &target)
Searches for all possible atom/bond mappings of the query structure to substructures of the specified...
CDPL::Chem::SubstructureSearch::mappingExists
bool mappingExists(const MolecularGraph &target)
Tells whether the query structure matches a substructure of the specified target molecular graph.
AtomBondMapping.hpp
Definition of the class CDPL::Chem::AtomBondMapping.
CDPL::Chem::SubstructureSearch::ConstMappingIterator
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:83
CDPL::Chem::SubstructureSearch::uniqueMappingsOnly
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
CDPL::Chem::MatchExpression
A generic boolean expression interface for the implementation of query/target object equivalence test...
Definition: MatchExpression.hpp:75
MatchExpression.hpp
Definition of the class CDPL::Chem::MatchExpression.
CDPL::Chem::SubstructureSearch::addBondMappingConstraint
void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx)
Adds a constraint on the allowed mappings between query and target structure bonds.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Chem::SubstructureSearch::getMappingsBegin
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::getMappingsEnd
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::clearBondMappingConstraints
void clearBondMappingConstraints()
Clears all previously defined query to target bond mapping constraints.
CDPL::Chem::SubstructureSearch::getMapping
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
CDPL::Chem::SubstructureSearch::getMappingsEnd
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
CDPL::Chem::SubstructureSearch::setMaxNumMappings
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
CDPL::Chem::AtomBondMapping
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
CDPL::Chem::SubstructureSearch::MappingIterator
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:78
CDPL::Util::ObjectStack< AtomBondMapping >
CDPL::Chem::SubstructureSearch::SharedPointer
std::shared_ptr< SubstructureSearch > SharedPointer
Definition: SubstructureSearch.hpp:73
CDPL::Util::operator>
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
CDPL::Chem::SubstructureSearch::AtomMatchExpressionFunction
std::function< const AtomMatchExprPtr &(const Atom &)> AtomMatchExpressionFunction
Definition: SubstructureSearch.hpp:85
CDPL::Chem::SubstructureSearch::BondMatchExpressionFunction
std::function< const BondMatchExprPtr &(const Bond &)> BondMatchExpressionFunction
Definition: SubstructureSearch.hpp:86
CDPL::Chem::SubstructureSearch::setAtomMatchExpressionFunction
void setAtomMatchExpressionFunction(const AtomMatchExpressionFunction &func)
CDPL::Chem::SubstructureSearch::getNumMappings
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last call to findMappings().