Chemical Data Processing Library C++ API - Version 1.2.0
SubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * SubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <deque>
34 #include <set>
35 #include <cstddef>
36 #include <unordered_map>
37 #include <memory>
38 #include <functional>
39 
40 #include <boost/iterator/indirect_iterator.hpp>
41 
42 #include "CDPL/Chem/APIPrefix.hpp"
45 #include "CDPL/Util/BitSet.hpp"
47 
48 
49 namespace CDPL
50 {
51 
52  namespace Chem
53  {
54 
55  class MolecularGraph;
56  class Atom;
57  class Bond;
58 
64  {
65 
66  typedef std::vector<AtomBondMapping*> ABMappingList;
67 
68  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
71 
72  public:
73  typedef std::shared_ptr<SubstructureSearch> SharedPointer;
74 
78  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
79 
83  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
84 
85  typedef std::function<const AtomMatchExprPtr&(const Atom&)> AtomMatchExpressionFunction;
86  typedef std::function<const BondMatchExprPtr&(const Bond&)> BondMatchExpressionFunction;
87  typedef std::function<const MolGraphMatchExprPtr&(const MolecularGraph&)> MolecularGraphMatchExpressionFunction;
88 
93 
99 
101 
108 
110 
112 
114 
116 
121  void setQuery(const MolecularGraph& query);
122 
135  bool mappingExists(const MolecularGraph& target);
136 
151  bool findMappings(const MolecularGraph& target);
152 
153  void stopSearch();
154 
159  std::size_t getNumMappings() const;
160 
167  AtomBondMapping& getMapping(std::size_t idx);
168 
175  const AtomBondMapping& getMapping(std::size_t idx) const;
176 
182 
188 
194 
200 
206 
212 
218 
224 
236  void uniqueMappingsOnly(bool unique);
237 
243  bool uniqueMappingsOnly() const;
244 
255  void setMaxNumMappings(std::size_t max_num_mappings);
256 
262  std::size_t getMaxNumMappings() const;
263 
277  void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx);
278 
284 
298  void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx);
299 
305 
306  private:
307  bool init(const MolecularGraph&);
308 
309  void initMatchExpressions();
310 
311  bool findEquivAtoms();
312  bool findEquivBonds();
313 
314  bool mapAtoms();
315 
316  std::size_t nextQueryAtom() const;
317  bool nextTargetAtom(std::size_t, std::size_t&, std::size_t&) const;
318 
319  bool atomMappingAllowed(std::size_t, std::size_t) const;
320  bool checkAtomMappingConstraints(std::size_t, std::size_t) const;
321  bool checkBondMappingConstraints(std::size_t, std::size_t) const;
322 
323  bool mapBonds(std::size_t, std::size_t);
324  bool mapAtoms(std::size_t);
325  bool mapAtoms(std::size_t, std::size_t);
326 
327  bool mappingFound();
328 
329  bool hasPostMappingMatchExprs() const;
330  bool foundMappingMatches(const AtomBondMapping*) const;
331 
332  bool foundMappingUnique();
333 
334  void freeAtomBondMappings();
335  void freeAtomBondMapping();
336 
337  AtomBondMapping* createAtomBondMapping();
338 
339  class ABMappingMask
340  {
341 
342  public:
343  void initAtomMask(std::size_t);
344  void initBondMask(std::size_t);
345 
346  void setAtomBit(std::size_t);
347  void resetAtomBit(std::size_t);
348 
349  bool testAtomBit(std::size_t) const;
350 
351  void setBondBit(std::size_t);
352  void resetBondMask();
353 
354  bool operator<(const ABMappingMask&) const;
355  bool operator>(const ABMappingMask&) const;
356 
357  private:
358  Util::BitSet atomMask;
359  Util::BitSet bondMask;
360  };
361 
362  typedef std::vector<Util::BitSet> BitMatrix;
363  typedef std::vector<const Atom*> AtomMappingTable;
364  typedef std::vector<const Bond*> BondMappingTable;
365  typedef std::deque<std::size_t> AtomQueue;
366  typedef std::set<ABMappingMask> UniqueMappingList;
367  typedef std::vector<const Atom*> AtomList;
368  typedef std::vector<const Bond*> BondList;
369  typedef std::vector<AtomMatchExprPtr> AtomMatchExprTable;
370  typedef std::vector<BondMatchExprPtr> BondMatchExprTable;
371  typedef std::unordered_multimap<std::size_t, std::size_t> MappingConstraintMap;
372  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
373 
374  const MolecularGraph* query;
375  const MolecularGraph* target;
376  AtomMatchExpressionFunction atomMatchExprFunc;
377  BondMatchExpressionFunction bondMatchExprFunc;
378  MolecularGraphMatchExpressionFunction molGraphMatchExprFunc;
379  BitMatrix atomEquivMatrix;
380  BitMatrix bondEquivMatrix;
381  MappingConstraintMap atomMappingConstrs;
382  MappingConstraintMap bondMappingConstrs;
383  AtomQueue termQueryAtoms;
384  AtomMappingTable queryAtomMapping;
385  BondMappingTable queryBondMapping;
386  Util::BitSet queryMappingMask;
387  ABMappingMask targetMappingMask;
388  ABMappingList foundMappings;
389  UniqueMappingList uniqueMappings;
390  AtomMatchExprTable atomMatchExprTable;
391  BondMatchExprTable bondMatchExprTable;
392  MolGraphMatchExprPtr molGraphMatchExpr;
393  AtomList postMappingMatchAtoms;
394  BondList postMappingMatchBonds;
395  MappingCache mappingCache;
396  bool queryChanged;
397  bool initQueryData;
398  bool uniqueMatches;
399  bool saveMappings;
400  bool exitSearch;
401  std::size_t numQueryAtoms;
402  std::size_t numQueryBonds;
403  std::size_t numTargetAtoms;
404  std::size_t numTargetBonds;
405  std::size_t numMappedAtoms;
406  std::size_t maxNumMappings;
407  };
408  } // namespace Chem
409 } // namespace CDPL
410 
411 #endif // CDPL_CHEM_SUBSTRUCTURESEARCH_HPP
Definition of the class CDPL::Chem::AtomBondMapping.
Definition of the type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of the class CDPL::Chem::MatchExpression.
Definition of the class CDPL::Util::ObjectStack.
A data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
Atom.
Definition: Atom.hpp:52
Bond.
Definition: Bond.hpp:50
A generic boolean expression interface for the implementation of query/target object equivalence test...
Definition: MatchExpression.hpp:75
MolecularGraph.
Definition: MolecularGraph.hpp:52
SubstructureSearch.
Definition: SubstructureSearch.hpp:64
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
void clearAtomMappingConstraints()
Clears all previously defined query to target atom mapping constraints.
void clearBondMappingConstraints()
Clears all previously defined query to target bond mapping constraints.
void addBondMappingConstraint(std::size_t query_bond_idx, std::size_t target_bond_idx)
Adds a constraint on the allowed mappings between query and target structure bonds.
void setMolecularGraphMatchExpressionFunction(const MolecularGraphMatchExpressionFunction &func)
std::function< const BondMatchExprPtr &(const Bond &)> BondMatchExpressionFunction
Definition: SubstructureSearch.hpp:86
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
void addAtomMappingConstraint(std::size_t query_atom_idx, std::size_t target_atom_idx)
Adds a constraint on the allowed mappings between query and target structure atoms.
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last call to findMappings().
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
bool mappingExists(const MolecularGraph &target)
Tells whether the query structure matches a substructure of the specified target molecular graph.
void setBondMatchExpressionFunction(const BondMatchExpressionFunction &func)
std::shared_ptr< SubstructureSearch > SharedPointer
Definition: SubstructureSearch.hpp:73
SubstructureSearch()
Constructs and initializes a SubstructureSearch instance.
bool findMappings(const MolecularGraph &target)
Searches for all possible atom/bond mappings of the query structure to substructures of the specified...
SubstructureSearch(const MolecularGraph &query)
Constructs and initializes a SubstructureSearch instance for the specified query structure.
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
std::function< const MolGraphMatchExprPtr &(const MolecularGraph &)> MolecularGraphMatchExpressionFunction
Definition: SubstructureSearch.hpp:87
void setAtomMatchExpressionFunction(const AtomMatchExpressionFunction &func)
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:83
std::function< const AtomMatchExprPtr &(const Atom &)> AtomMatchExpressionFunction
Definition: SubstructureSearch.hpp:85
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: SubstructureSearch.hpp:78
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
SubstructureSearch & operator=(const SubstructureSearch &)=delete
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
SubstructureSearch(const SubstructureSearch &)=delete
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
The namespace of the Chemical Data Processing Library.