Chemical Data Processing Library C++ API - Version 1.4.0
SubstructureHistogramCalculator.hpp
Go to the documentation of this file.
1 /*
2  * SubstructureHistogramCalculator.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_SUBSTRUCTUREHISTOGRAMCALCULATOR_HPP
30 #define CDPL_CHEM_SUBSTRUCTUREHISTOGRAMCALCULATOR_HPP
31 
32 #include <cstddef>
33 #include <vector>
34 #include <map>
35 #include <utility>
36 #include <memory>
37 #include <functional>
38 
39 #include "CDPL/Chem/APIPrefix.hpp"
42 #include "CDPL/Util/BitSet.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Chem
49  {
50 
62  {
63 
64  public:
65  class Pattern;
66 
67  private:
68  typedef std::vector<Pattern> PatternList;
69 
70  public:
72  typedef std::shared_ptr<SubstructureHistogramCalculator> SharedPointer;
73 
75  typedef PatternList::const_iterator ConstPatternIterator;
77  typedef PatternList::iterator PatternIterator;
78 
83  {
84 
85  public:
95  Pattern(const MolecularGraph::SharedPointer& molgraph, std::size_t id, std::size_t priority = 0,
96  bool all_matches = true, bool unique_matches = true);
97 
103 
108  std::size_t getID() const;
109 
114  std::size_t getPriority() const;
115 
120  bool processAllMatches() const;
121 
127 
128  private:
130  std::size_t id;
131  std::size_t priority;
132  bool allMatches;
133  bool uniqueMatches;
134  };
135 
140 
146 
155  void addPattern(const MolecularGraph::SharedPointer& molgraph, std::size_t id, std::size_t priority = 0,
156  bool all_matches = true, bool unique_matches = true);
157 
162  void addPattern(const Pattern& pattern);
163 
170  const Pattern& getPattern(std::size_t idx) const;
171 
177  void removePattern(std::size_t idx);
178 
185 
189  void clear();
190 
195  std::size_t getNumPatterns() const;
196 
202 
208 
214 
220 
226 
232 
238 
244 
255  template <typename T>
256  void calculate(const MolecularGraph& molgraph, T& histo);
257 
264 
265  private:
266  typedef std::function<void(std::size_t)> HistoUpdateFunction;
267 
268  template <typename T>
269  class HistoUpdateFunctor
270  {
271 
272  public:
273  HistoUpdateFunctor(T& histo):
274  histo(histo) {}
275 
276  void operator()(std::size_t id)
277  {
278  histo[id] += 1;
279  }
280 
281  private:
282  T& histo;
283  };
284 
285  void doCalculate(const MolecularGraph& molgraph, const HistoUpdateFunction& func);
286 
287  void init(const MolecularGraph& molgraph);
288 
289  void processPattern(const Pattern& ptn, const HistoUpdateFunction& func);
290  bool processMatch(const AtomBondMapping& mapping, const Pattern& ptn, const HistoUpdateFunction& func);
291 
292  typedef std::pair<Util::BitSet, Util::BitSet> AtomBondMask;
293  typedef std::map<std::size_t, AtomBondMask> PriorityToAtomBondMaskMap;
294 
295  const MolecularGraph* molGraph;
296  PatternList patterns;
297  SubstructureSearch subSearch;
298  PriorityToAtomBondMaskMap matchedSubstructMasks;
299  AtomBondMask testingAtomBondMask;
300  Util::BitSet tmpMask;
301  };
302  } // namespace Chem
303 } // namespace CDPL
304 
305 
306 template <typename T>
308 {
309  doCalculate(molgraph, HistoUpdateFunctor<T>(histo));
310 }
311 
312 #endif // CDPL_CHEM_SUBSTRUCTUREHISTOGRAMCALCULATOR_HPP
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::MolecularGraph.
Definition of class CDPL::Chem::SubstructureSearch.
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
std::shared_ptr< MolecularGraph > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MolecularGraph instances.
Definition: MolecularGraph.hpp:63
Holds a single SMARTS query pattern, its histogram ID, its priority and match-handling flags.
Definition: SubstructureHistogramCalculator.hpp:83
Pattern(const MolecularGraph::SharedPointer &molgraph, std::size_t id, std::size_t priority=0, bool all_matches=true, bool unique_matches=true)
Constructs a pattern from the query molecular graph molgraph.
bool processUniqueMatchesOnly() const
Tells whether only one of multiple equivalent mappings is processed per match.
const MolecularGraph::SharedPointer & getStructure() const
Returns the SMARTS query molecular graph of this pattern.
std::size_t getPriority() const
Returns the pattern priority.
bool processAllMatches() const
Tells whether all substructure matches are processed.
std::size_t getID() const
Returns the histogram-bin ID of this pattern.
Counts occurrences of registered SMARTS substructure queries in a molecular graph,...
Definition: SubstructureHistogramCalculator.hpp:62
PatternIterator end()
Returns a mutable iterator pointing one past the last registered pattern (range-based for support).
PatternIterator begin()
Returns a mutable iterator pointing to the first registered pattern (range-based for support).
ConstPatternIterator begin() const
Returns a constant iterator pointing to the first registered pattern (range-based for support).
void removePattern(const PatternIterator &it)
Removes the registered pattern referenced by it.
SubstructureHistogramCalculator & operator=(const SubstructureHistogramCalculator &gen)
Replaces the state of this calculator by a copy of the state of gen.
ConstPatternIterator end() const
Returns a constant iterator pointing one past the last registered pattern (range-based for support).
ConstPatternIterator getPatternsEnd() const
Returns a constant iterator pointing one past the last registered pattern.
PatternList::iterator PatternIterator
A mutable iterator over the registered patterns.
Definition: SubstructureHistogramCalculator.hpp:77
PatternIterator getPatternsBegin()
Returns a mutable iterator pointing to the first registered pattern.
PatternIterator getPatternsEnd()
Returns a mutable iterator pointing one past the last registered pattern.
void addPattern(const MolecularGraph::SharedPointer &molgraph, std::size_t id, std::size_t priority=0, bool all_matches=true, bool unique_matches=true)
Registers a new pattern by its query molecular graph and per-pattern settings.
std::size_t getNumPatterns() const
Returns the number of registered patterns.
void removePattern(std::size_t idx)
Removes the registered pattern at index idx.
void addPattern(const Pattern &pattern)
Appends a copy of the pre-built pattern pattern.
PatternList::const_iterator ConstPatternIterator
A constant iterator over the registered patterns.
Definition: SubstructureHistogramCalculator.hpp:75
SubstructureHistogramCalculator()
Constructs an empty SubstructureHistogramCalculator instance.
void clear()
Removes all registered patterns.
SubstructureHistogramCalculator(const SubstructureHistogramCalculator &gen)
Constructs a copy of the SubstructureHistogramCalculator instance gen.
void calculate(const MolecularGraph &molgraph, T &histo)
Counts substructure occurrences in molgraph and writes the per-pattern hit counts to histo.
Definition: SubstructureHistogramCalculator.hpp:307
const Pattern & getPattern(std::size_t idx) const
Returns the registered pattern at index idx.
ConstPatternIterator getPatternsBegin() const
Returns a constant iterator pointing to the first registered pattern.
std::shared_ptr< SubstructureHistogramCalculator > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated SubstructureHistogramCalculator i...
Definition: SubstructureHistogramCalculator.hpp:72
constexpr unsigned int T
Specifies Hydrogen (Tritium).
Definition: AtomType.hpp:67
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.