Chemical Data Processing Library C++ API - Version 1.4.0
FragmentGenerator.hpp
Go to the documentation of this file.
1 /*
2  * FragmentGenerator.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_FRAGMENTGENERATOR_HPP
30 #define CDPL_CHEM_FRAGMENTGENERATOR_HPP
31 
32 #include <cstddef>
33 #include <vector>
34 #include <memory>
35 #include <functional>
36 
37 #include "CDPL/Chem/APIPrefix.hpp"
41 #include "CDPL/Util/BitSet.hpp"
42 
43 
44 namespace CDPL
45 {
46 
47  namespace Chem
48  {
49 
62  {
63 
64  public:
66  typedef std::shared_ptr<FragmentGenerator> SharedPointer;
67 
69  typedef std::function<bool(const MolecularGraph&)> FragmentFilterFunction;
70 
75  {
76 
77  public:
83  FragmentationRule(const MolecularGraph::SharedPointer& match_ptn, unsigned int id);
84 
90 
96 
101  unsigned int getID() const;
102 
107  void setID(unsigned int id);
108 
109  private:
111  unsigned int id;
112  };
113 
121  {
122 
123  public:
129  ExcludePattern(const MolecularGraph::SharedPointer& match_ptn, unsigned int rule_id);
130 
136 
142 
148 
153  unsigned int getRuleID() const;
154 
159  void setRuleID(unsigned int id);
160 
165  bool isGeneric() const;
166 
171  void setGeneric(bool generic);
172 
173  private:
175  unsigned int ruleID;
176  bool generic;
177  };
178 
183  {
184 
185  public:
195  FragmentLink(std::size_t frag1_idx, std::size_t frag2_idx, const Bond& bond,
196  unsigned int rule_id, unsigned int atom1_label, unsigned int atom2_label);
197 
202  std::size_t getFragment1Index() const;
203 
208  std::size_t getFragment2Index() const;
209 
214  const Bond& getBond() const;
215 
220  unsigned int getRuleID() const;
221 
226  unsigned int getAtom1Label() const;
227 
232  unsigned int getAtom2Label() const;
233 
234  private:
235  std::size_t frag1Idx;
236  std::size_t frag2Idx;
237  const Bond* bond;
238  unsigned int ruleID;
239  unsigned int atom1Label;
240  unsigned int atom2Label;
241  };
242 
243  private:
244  typedef std::vector<FragmentationRule> FragmentationRuleList;
245  typedef std::vector<ExcludePattern> ExcludePatternList;
246  typedef std::vector<FragmentLink> FragmentLinkList;
247 
248  public:
250  typedef FragmentationRuleList::const_iterator ConstFragmentationRuleIterator;
252  typedef FragmentationRuleList::iterator FragmentationRuleIterator;
253 
255  typedef ExcludePatternList::const_iterator ConstExcludePatternIterator;
257  typedef ExcludePatternList::iterator ExcludePatternIterator;
258 
260  typedef FragmentLinkList::const_iterator ConstFragmentLinkIterator;
261 
266 
272 
276  virtual ~FragmentGenerator() {}
277 
284 
290  void addFragmentationRule(const MolecularGraph::SharedPointer& match_ptn, unsigned int rule_id);
291 
297 
304  const FragmentationRule& getFragmentationRule(std::size_t idx) const;
305 
313 
319 
325 
331 
337 
343  void removeFragmentationRule(std::size_t idx);
344 
349  std::size_t getNumFragmentationRules() const;
350 
355 
361  void addExcludePattern(const MolecularGraph::SharedPointer& match_ptn, unsigned int rule_id);
362 
368 
373  void addExcludePattern(const ExcludePattern& excl_ptn);
374 
381  const ExcludePattern& getExcludePattern(std::size_t idx) const;
382 
390 
396 
402 
408 
414 
420  void removeExcludePattern(std::size_t idx);
421 
426  std::size_t getNumExcludePatterns() const;
427 
432 
437  bool splitBondsIncluded() const;
438 
443  void includeSplitBonds(bool include);
444 
450 
457 
464  void generate(const MolecularGraph& molgraph, FragmentList& frag_list, bool append = false);
465 
470  std::size_t getNumFragmentLinks() const;
471 
478  const FragmentLink& getFragmentLink(std::size_t idx) const;
479 
485 
491 
492  private:
493  void init(const MolecularGraph& molgraph);
494 
495  void processFragRuleMatches(const MolecularGraph& molgraph, const FragmentationRule& rule);
496  void processExcludePatternMatches(const MolecularGraph& molgraph, const ExcludePattern& ptn);
497  void splitIntoFragments(const MolecularGraph& molgraph, FragmentList& frag_list, bool append);
498  void growFragment(const Chem::Atom& atom, const Chem::MolecularGraph& molgraph, Chem::Fragment& frag);
499 
500  std::size_t findContainingFragment(const Chem::Atom& atom, const FragmentList& frag_list,
501  std::size_t start_idx) const;
502 
503  struct SplitBondData
504  {
505 
506  const Bond* bond;
507  unsigned int ruleID;
508  unsigned int atom1Label;
509  unsigned int atom2Label;
510  };
511 
512  typedef std::vector<SplitBondData> SplitBondDataArray;
513 
514  FragmentationRuleList fragRules;
515  ExcludePatternList exclPatterns;
516  FragmentLinkList fragLinks;
517  bool incSplitBonds;
518  FragmentFilterFunction fragFilterFunc;
519  SubstructureSearch subSearch;
520  Util::BitSet splitBondMask;
521  Util::BitSet visAtomMask;
522  SplitBondDataArray splitBondData;
523  };
524  } // namespace Chem
525 } // namespace CDPL
526 
527 #endif // CDPL_CHEM_FRAGMENTGENERATOR_HPP
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::FragmentList.
Definition of class CDPL::Chem::MolecularGraph.
Definition of class CDPL::Chem::SubstructureSearch.
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
A pattern overriding a fragmentation rule: bonds matching the pattern are not cleaved.
Definition: FragmentGenerator.hpp:121
ExcludePattern(const MolecularGraph::SharedPointer &match_ptn, unsigned int rule_id)
Constructs a rule-specific exclude pattern.
unsigned int getRuleID() const
Returns the rule ID this exclusion applies to (only meaningful when isGeneric() is false).
bool isGeneric() const
Tells whether this exclude pattern applies to all rules (generic) or only to a specific one.
void setMatchPattern(const MolecularGraph::SharedPointer &ptn)
Sets the SMARTS match pattern.
void setRuleID(unsigned int id)
Sets the rule ID this exclusion applies to.
void setGeneric(bool generic)
Sets whether this exclude pattern is generic.
ExcludePattern(const MolecularGraph::SharedPointer &match_ptn)
Constructs a generic exclude pattern (applies to all fragmentation rules).
const MolecularGraph::SharedPointer & getMatchPattern() const
Returns the SMARTS match pattern.
A single fragmentation rule, consisting of a SMARTS match pattern and a numeric rule ID.
Definition: FragmentGenerator.hpp:75
void setMatchPattern(const MolecularGraph::SharedPointer &ptn)
Sets the SMARTS match pattern of this rule.
unsigned int getID() const
Returns the rule identifier.
void setID(unsigned int id)
Sets the rule identifier.
const MolecularGraph::SharedPointer & getMatchPattern() const
Returns the SMARTS match pattern of this rule.
FragmentationRule(const MolecularGraph::SharedPointer &match_ptn, unsigned int id)
Constructs the fragmentation rule.
Generic rule-based molecule fragmentation engine that splits a molecular graph along bonds matching u...
Definition: FragmentGenerator.hpp:62
void removeFragmentationRule(std::size_t idx)
Removes the fragmentation rule at index idx.
void addExcludePattern(const MolecularGraph::SharedPointer &match_ptn, unsigned int rule_id)
Registers a rule-specific exclude pattern.
FragmentLinkList::const_iterator ConstFragmentLinkIterator
A constant iterator over the generated fragment links.
Definition: FragmentGenerator.hpp:260
ConstFragmentLinkIterator getFragmentLinksBegin() const
Returns a constant iterator pointing to the first fragment link.
ExcludePatternList::iterator ExcludePatternIterator
A mutable iterator over the registered exclude patterns.
Definition: FragmentGenerator.hpp:257
FragmentationRule & getFragmentationRule(std::size_t idx)
Returns the fragmentation rule at index idx.
bool splitBondsIncluded() const
Tells whether the split (cleaved) bonds are retained in the output fragments.
FragmentationRuleIterator getFragmentationRulesBegin()
Returns a mutable iterator pointing to the first registered fragmentation rule.
ConstFragmentationRuleIterator getFragmentationRulesBegin() const
Returns a constant iterator pointing to the first registered fragmentation rule.
void clearExcludePatterns()
Removes all registered exclude patterns.
FragmentationRuleIterator getFragmentationRulesEnd()
Returns a mutable iterator pointing one past the last registered fragmentation rule.
std::shared_ptr< FragmentGenerator > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated FragmentGenerator instances.
Definition: FragmentGenerator.hpp:66
const FragmentLink & getFragmentLink(std::size_t idx) const
Returns the fragment link at index idx.
ConstFragmentationRuleIterator getFragmentationRulesEnd() const
Returns a constant iterator pointing one past the last registered fragmentation rule.
const ExcludePattern & getExcludePattern(std::size_t idx) const
Returns the exclude pattern at index idx.
ConstExcludePatternIterator getExcludePatternsEnd() const
Returns a constant iterator pointing one past the last registered exclude pattern.
virtual ~FragmentGenerator()
Virtual destructor.
Definition: FragmentGenerator.hpp:276
ConstFragmentLinkIterator getFragmentLinksEnd() const
Returns a constant iterator pointing one past the last fragment link.
std::size_t getNumFragmentLinks() const
Returns the number of fragment links produced by the most recent generate() call.
FragmentationRuleList::const_iterator ConstFragmentationRuleIterator
A constant iterator over the registered fragmentation rules.
Definition: FragmentGenerator.hpp:250
const FragmentFilterFunction & getFragmentFilterFunction() const
Returns the predicate used to filter the generated fragments.
void setFragmentFilterFunction(const FragmentFilterFunction &func)
Sets the predicate used to filter the generated fragments (fragments for which the predicate returns ...
const FragmentationRule & getFragmentationRule(std::size_t idx) const
Returns the fragmentation rule at index idx.
std::size_t getNumExcludePatterns() const
Returns the number of registered exclude patterns.
std::size_t getNumFragmentationRules() const
Returns the number of registered fragmentation rules.
FragmentGenerator(const FragmentGenerator &gen)
Constructs a copy of the FragmentGenerator instance gen.
FragmentGenerator & operator=(const FragmentGenerator &gen)
Replaces the state of this generator by a copy of the state of gen.
ExcludePatternIterator getExcludePatternsBegin()
Returns a mutable iterator pointing to the first registered exclude pattern.
void addFragmentationRule(const MolecularGraph::SharedPointer &match_ptn, unsigned int rule_id)
Registers a new fragmentation rule by its SMARTS match pattern and rule ID.
ConstExcludePatternIterator getExcludePatternsBegin() const
Returns a constant iterator pointing to the first registered exclude pattern.
FragmentGenerator()
Constructs the FragmentGenerator instance.
void addExcludePattern(const ExcludePattern &excl_ptn)
Appends a copy of the pre-built exclude pattern excl_ptn.
ExcludePattern & getExcludePattern(std::size_t idx)
Returns the exclude pattern at index idx.
std::function< bool(const MolecularGraph &)> FragmentFilterFunction
Type of a predicate accepting/rejecting a generated fragment.
Definition: FragmentGenerator.hpp:69
void addExcludePattern(const MolecularGraph::SharedPointer &match_ptn)
Registers a generic exclude pattern (applies to all fragmentation rules).
void includeSplitBonds(bool include)
Specifies whether the split (cleaved) bonds shall be retained in the output fragments.
FragmentationRuleList::iterator FragmentationRuleIterator
A mutable iterator over the registered fragmentation rules.
Definition: FragmentGenerator.hpp:252
void addFragmentationRule(const FragmentationRule &rule)
Appends a copy of the pre-built fragmentation rule rule.
void clearFragmentationRules()
Removes all registered fragmentation rules.
void generate(const MolecularGraph &molgraph, FragmentList &frag_list, bool append=false)
Performs the fragmentation of molgraph and writes the resulting fragments to frag_list.
ExcludePatternIterator getExcludePatternsEnd()
Returns a mutable iterator pointing one past the last registered exclude pattern.
void removeExcludePattern(std::size_t idx)
Removes the exclude pattern at index idx.
ExcludePatternList::const_iterator ConstExcludePatternIterator
A constant iterator over the registered exclude patterns.
Definition: FragmentGenerator.hpp:255
Data type for the storage of Chem::Fragment objects.
Definition: FragmentList.hpp:49
Concrete Chem::MolecularGraph implementation that stores references to a selectable subset of atoms a...
Definition: Fragment.hpp:57
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
std::shared_ptr< MolecularGraph > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MolecularGraph instances.
Definition: MolecularGraph.hpp:63
Subgraph-isomorphism search of a query molecular graph against a target molecular graph,...
Definition: SubstructureSearch.hpp:74
CDPL_CHEM_API void splitIntoFragments(const MolecularGraph &molgraph, FragmentList &frag_list, const Util::BitSet &split_bond_mask, bool append=false)
Splits molgraph into connected fragments by removing the bonds flagged in split_bond_mask.
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.