Chemical Data Processing Library C++ API - Version 1.4.0
SubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * SubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <deque>
34 #include <set>
35 #include <cstddef>
36 #include <unordered_map>
37 #include <memory>
38 #include <functional>
39 
40 #include <boost/iterator/indirect_iterator.hpp>
41 
42 #include "CDPL/Chem/APIPrefix.hpp"
45 #include "CDPL/Util/BitSet.hpp"
47 
48 
49 namespace CDPL
50 {
51 
52  namespace Chem
53  {
54 
55  class MolecularGraph;
56  class Atom;
57  class Bond;
58 
74  {
75 
76  typedef std::vector<AtomBondMapping*> ABMappingList;
77 
78  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
81 
82  public:
86  typedef std::shared_ptr<SubstructureSearch> SharedPointer;
87 
91  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
92 
96  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
97 
101  typedef std::function<const AtomMatchExprPtr&(const Atom&)> AtomMatchExpressionFunction;
102 
106  typedef std::function<const BondMatchExprPtr&(const Bond&)> BondMatchExpressionFunction;
107 
111  typedef std::function<const MolGraphMatchExprPtr&(const MolecularGraph&)> MolecularGraphMatchExpressionFunction;
112 
117 
123 
125 
132 
134 
140 
146 
152 
157  void setQuery(const MolecularGraph& query);
158 
171  bool mappingExists(const MolecularGraph& target);
172 
187  bool findMappings(const MolecularGraph& target);
188 
195  void stopSearch();
196 
201  std::size_t getNumMappings() const;
202 
209  AtomBondMapping& getMapping(std::size_t idx);
210 
217  const AtomBondMapping& getMapping(std::size_t idx) const;
218 
224 
230 
236 
242 
248 
254 
260 
266 
278  void uniqueMappingsOnly(bool unique);
279 
285  bool uniqueMappingsOnly() const;
286 
297  void setMaxNumMappings(std::size_t max_num_mappings);
298 
304  std::size_t getMaxNumMappings() const;
305 
319  void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx);
320 
326 
340  void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx);
341 
347 
348  private:
349  bool init(const MolecularGraph&);
350 
351  void initMatchExpressions();
352 
353  bool findEquivAtoms();
354  bool findEquivBonds();
355 
356  bool mapAtoms();
357 
358  std::size_t nextQueryAtom() const;
359  bool nextTargetAtom(std::size_t, std::size_t&, std::size_t&) const;
360 
361  bool atomMappingAllowed(std::size_t, std::size_t) const;
362  bool checkAtomMappingConstraints(std::size_t, std::size_t) const;
363  bool checkBondMappingConstraints(std::size_t, std::size_t) const;
364 
365  bool mapBonds(std::size_t, std::size_t);
366  bool mapAtoms(std::size_t);
367  bool mapAtoms(std::size_t, std::size_t);
368 
369  bool mappingFound();
370 
371  bool hasPostMappingMatchExprs() const;
372  bool foundMappingMatches(const AtomBondMapping*) const;
373 
374  bool foundMappingUnique();
375 
376  void freeAtomBondMappings();
377  void freeAtomBondMapping();
378 
379  AtomBondMapping* createAtomBondMapping();
380 
381  class ABMappingMask
382  {
383 
384  public:
385  void initAtomMask(std::size_t);
386  void initBondMask(std::size_t);
387 
388  void setAtomBit(std::size_t);
389  void resetAtomBit(std::size_t);
390 
391  bool testAtomBit(std::size_t) const;
392 
393  void setBondBit(std::size_t);
394  void resetBondMask();
395 
396  bool operator<(const ABMappingMask&) const;
397  bool operator>(const ABMappingMask&) const;
398 
399  private:
400  Util::BitSet atomMask;
401  Util::BitSet bondMask;
402  };
403 
404  typedef std::vector<Util::BitSet> BitMatrix;
405  typedef std::vector<const Atom*> AtomMappingTable;
406  typedef std::vector<const Bond*> BondMappingTable;
407  typedef std::deque<std::size_t> AtomQueue;
408  typedef std::set<ABMappingMask> UniqueMappingList;
409  typedef std::vector<const Atom*> AtomList;
410  typedef std::vector<const Bond*> BondList;
411  typedef std::vector<AtomMatchExprPtr> AtomMatchExprTable;
412  typedef std::vector<BondMatchExprPtr> BondMatchExprTable;
413  typedef std::unordered_multimap<std::size_t, std::size_t> MappingConstraintMap;
414  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
415 
416  const MolecularGraph* query;
417  const MolecularGraph* target;
418  AtomMatchExpressionFunction atomMatchExprFunc;
419  BondMatchExpressionFunction bondMatchExprFunc;
420  MolecularGraphMatchExpressionFunction molGraphMatchExprFunc;
421  BitMatrix atomEquivMatrix;
422  BitMatrix bondEquivMatrix;
423  MappingConstraintMap atomMappingConstrs;
424  MappingConstraintMap bondMappingConstrs;
425  AtomQueue termQueryAtoms;
426  AtomMappingTable queryAtomMapping;
427  BondMappingTable queryBondMapping;
428  Util::BitSet queryMappingMask;
429  ABMappingMask targetMappingMask;
430  ABMappingList foundMappings;
431  UniqueMappingList uniqueMappings;
432  AtomMatchExprTable atomMatchExprTable;
433  BondMatchExprTable bondMatchExprTable;
434  MolGraphMatchExprPtr molGraphMatchExpr;
435  AtomList postMappingMatchAtoms;
436  BondList postMappingMatchBonds;
437  MappingCache mappingCache;
438  bool queryChanged;
439  bool initQueryData;
440  bool uniqueMatches;
441  bool saveMappings;
442  bool exitSearch;
443  std::size_t numQueryAtoms;
444  std::size_t numQueryBonds;
445  std::size_t numTargetAtoms;
446  std::size_t numTargetBonds;
447  std::size_t numMappedAtoms;
448  std::size_t maxNumMappings;
449  };
450  } // namespace Chem
451 } // namespace CDPL
452 
453 #endif // CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
Definition of class CDPL::Chem::AtomBondMapping.
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::MatchExpression.
Definition of class CDPL::Util::ObjectStack.
Data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
Generic boolean expression interface for the implementation of query/target object equivalence tests ...
Definition: MatchExpression.hpp:75
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
Subgraph-isomorphism search of a query molecular graph against a target molecular graph,...
Definition: SubstructureSearch.hpp:74
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
void clearAtomMappingConstraints()
Clears all previously defined query to target atom mapping constraints.
void clearBondMappingConstraints()
Clears all previously defined query to target bond mapping constraints.
void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx)
Adds a constraint on the allowed mappings between query and target structure bonds.
void stopSearch()
Aborts the currently running subgraph mapping search.
void setMolecularGraphMatchExpressionFunction(const MolecularGraphMatchExpressionFunction &func)
Installs a function that resolves the graph-level Chem::MatchExpression for the query molecular graph...
std::function< const BondMatchExprPtr &(const Bond &)> BondMatchExpressionFunction
Type of the functor used to retrieve the bond-level Chem::MatchExpression for a query bond.
Definition: SubstructureSearch.hpp:106
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx)
Adds a constraint on the allowed mappings between query and target structure atoms.
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last call to findMappings().
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
bool mappingExists(const MolecularGraph &target)
Tells whether the query structure matches a substructure of the specified target molecular graph.
void setBondMatchExpressionFunction(const BondMatchExpressionFunction &func)
Installs a function that resolves the bond-level Chem::MatchExpression for a query bond.
std::shared_ptr< SubstructureSearch > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated SubstructureSearch instances.
Definition: SubstructureSearch.hpp:86
SubstructureSearch()
Constructs and initializes a SubstructureSearch instance.
bool findMappings(const MolecularGraph &target)
Searches for all possible atom/bond mappings of the query structure to substructures of the specified...
SubstructureSearch(const MolecularGraph &query)
Constructs and initializes a SubstructureSearch instance for the specified query structure.
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
std::function< const MolGraphMatchExprPtr &(const MolecularGraph &)> MolecularGraphMatchExpressionFunction
Type of the functor used to retrieve the graph-level Chem::MatchExpression for the query molecular gr...
Definition: SubstructureSearch.hpp:111
void setAtomMatchExpressionFunction(const AtomMatchExpressionFunction &func)
Installs a function that resolves the atom-level Chem::MatchExpression for a query atom.
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:96
std::function< const AtomMatchExprPtr &(const Atom &)> AtomMatchExpressionFunction
Type of the functor used to retrieve the atom-level Chem::MatchExpression for a query atom.
Definition: SubstructureSearch.hpp:101
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:91
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
SubstructureSearch & operator=(const SubstructureSearch &)=delete
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
SubstructureSearch(const SubstructureSearch &)=delete
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
The namespace of the Chemical Data Processing Library.