Chemical Data Processing Library C++ API - Version 1.4.0
MaxCommonAtomSubstructureSearch.hpp
Go to the documentation of this file.
1 /*
2  * MaxCommonAtomSubstructureSearch.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
30 #define CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
31 
32 #include <vector>
33 #include <set>
34 #include <cstddef>
35 #include <memory>
36 
37 #include <boost/iterator/indirect_iterator.hpp>
38 
39 #include "CDPL/Chem/APIPrefix.hpp"
42 #include "CDPL/Util/BitSet.hpp"
44 
45 
46 namespace CDPL
47 {
48 
49  namespace Chem
50  {
51 
52  class MolecularGraph;
53  class Atom;
54  class Bond;
55 
72  {
73 
74  typedef std::vector<AtomBondMapping*> ABMappingList;
75 
76  public:
81  typedef std::shared_ptr<MaxCommonAtomSubstructureSearch> SharedPointer;
82 
86  typedef boost::indirect_iterator<ABMappingList::iterator, AtomBondMapping> MappingIterator;
87 
91  typedef boost::indirect_iterator<ABMappingList::const_iterator, const AtomBondMapping> ConstMappingIterator;
92 
97 
103 
105 
112 
114 
119  void setQuery(const MolecularGraph& query);
120 
134  bool mappingExists(const MolecularGraph& target);
135 
151  bool findAllMappings(const MolecularGraph& target);
152 
168  bool findMaxBondMappings(const MolecularGraph& target);
169 
175  std::size_t getNumMappings() const;
176 
183  AtomBondMapping& getMapping(std::size_t idx);
184 
191  const AtomBondMapping& getMapping(std::size_t idx) const;
192 
198 
204 
210 
216 
222 
228 
234 
240 
252  void uniqueMappingsOnly(bool unique);
253 
259  bool uniqueMappingsOnly() const;
260 
271  void setMaxNumMappings(std::size_t max_num_mappings);
272 
278  std::size_t getMaxNumMappings() const;
279 
289  void setMinSubstructureSize(std::size_t min_size);
290 
296  std::size_t getMinSubstructureSize() const;
297 
298  private:
299  class AGNode;
300 
301  bool init(const MolecularGraph&);
302 
303  void initMatchExpressions();
304 
305  bool buildAssocGraph();
306 
307  bool findAssocGraphCliques(std::size_t);
308  bool isLegal(const AGNode*);
309 
310  bool mappingFound();
311 
312  bool hasPostMappingMatchExprs() const;
313  bool foundMappingMatches(const AtomBondMapping*) const;
314 
315  bool foundMappingUnique();
316 
317  void clearMappings();
318 
319  void freeAtomBondMapping();
320  void freeAtomBondMappings();
321  void freeAssocGraph();
322 
323  AtomBondMapping* createAtomBondMapping();
324 
325  class AGEdge;
326 
327  AGNode* allocAGNode(const Atom*, const Atom*);
328  AGEdge* allocAGEdge(const Bond*, const Bond*);
329 
330  typedef std::vector<const AGEdge*> AGraphEdgeList;
331 
332  class AGNode
333  {
334 
335  public:
336  void setQueryAtom(const Atom*);
337  const Atom* getQueryAtom() const;
338 
339  void setAssocAtom(const Atom*);
340  const Atom* getAssocAtom() const;
341 
342  void addEdge(const AGEdge*);
343 
344  bool isConnected(const AGNode*) const;
345  const AGEdge* findEdge(const AGNode*) const;
346 
347  void clear();
348 
349  void setIndex(std::size_t idx);
350 
351  private:
352  std::size_t index;
353  const Atom* queryAtom;
354  const Atom* assocAtom;
355  Util::BitSet connNodes;
356  AGraphEdgeList bondEdges;
357  };
358 
359  class AGEdge
360  {
361 
362  public:
363  void setQueryBond(const Bond*);
364  const Bond* getQueryBond() const;
365 
366  void setAssocBond(const Bond*);
367  const Bond* getAssocBond() const;
368 
369  void setNode1(const AGNode*);
370  void setNode2(const AGNode*);
371 
372  const AGNode* getNode1() const;
373  const AGNode* getNode2() const;
374 
375  const AGNode* getOther(const AGNode*) const;
376 
377  private:
378  const Bond* queryBond;
379  const Bond* assocBond;
380  const AGNode* node1;
381  const AGNode* node2;
382  };
383 
384  class ABMappingMask
385  {
386 
387  public:
388  void initQueryAtomMask(std::size_t);
389  void initTargetAtomMask(std::size_t);
390 
391  void initQueryBondMask(std::size_t);
392  void initTargetBondMask(std::size_t);
393 
394  void setQueryAtomBit(std::size_t);
395  void setTargetAtomBit(std::size_t);
396 
397  void setQueryBondBit(std::size_t);
398  void setTargetBondBit(std::size_t);
399 
400  void reset();
401 
402  bool operator<(const ABMappingMask&) const;
403  bool operator>(const ABMappingMask&) const;
404 
405  private:
406  Util::BitSet queryAtomMask;
407  Util::BitSet targetAtomMask;
408  Util::BitSet queryBondMask;
409  Util::BitSet targetBondMask;
410  };
411 
412  typedef MatchExpression<MolecularGraph>::SharedPointer MolGraphMatchExprPtr;
413 
414  typedef std::vector<AGNode*> AGraphNodeList;
415  typedef std::vector<AGraphNodeList> AGraphNodeMatrix;
416  typedef std::set<ABMappingMask> UniqueMappingList;
417  typedef std::vector<const Atom*> AtomList;
418  typedef std::vector<const Bond*> BondList;
419  typedef std::vector<MatchExpression<Atom, MolecularGraph>::SharedPointer> AtomMatchExprTable;
420  typedef std::vector<MatchExpression<Bond, MolecularGraph>::SharedPointer> BondMatchExprTable;
421  typedef Util::ObjectStack<AGNode> NodeCache;
422  typedef Util::ObjectStack<AGEdge> EdgeCache;
423  typedef Util::ObjectStack<AtomBondMapping> MappingCache;
424 
425  const MolecularGraph* query;
426  const MolecularGraph* target;
427  AGraphNodeMatrix nodeMatrix;
428  ABMappingList foundMappings;
429  UniqueMappingList uniqueMappings;
430  AGraphEdgeList cliqueEdges;
431  AGraphNodeList cliqueNodes;
432  ABMappingMask mappingMask;
433  AtomMatchExprTable atomMatchExprTable;
434  BondMatchExprTable bondMatchExprTable;
435  MolGraphMatchExprPtr molGraphMatchExpr;
436  AtomList postMappingMatchAtoms;
437  BondList postMappingMatchBonds;
438  NodeCache nodeCache;
439  EdgeCache edgeCache;
440  MappingCache mappingCache;
441  bool queryChanged;
442  bool initQueryData;
443  bool uniqueMatches;
444  bool saveMappings;
445  bool maxBondMappingsOnly;
446  std::size_t numQueryAtoms;
447  std::size_t numQueryBonds;
448  std::size_t numTargetAtoms;
449  std::size_t numTargetBonds;
450  std::size_t maxAtomSubstructureSize;
451  std::size_t maxBondSubstructureSize;
452  std::size_t currNumNullNodes;
453  std::size_t minNumNullNodes;
454  std::size_t maxNumMappings;
455  std::size_t minSubstructureSize;
456  std::size_t currNodeIdx;
457  };
458  } // namespace Chem
459 } // namespace CDPL
460 
461 #endif // CDPL_CHEM_MAXCOMMONATOMSUBSTRUCTURESEARCH_HPP
Definition of class CDPL::Chem::AtomBondMapping.
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::MatchExpression.
Definition of class CDPL::Util::ObjectStack.
Data structure for the common storage of related atom to atom and bond to bond mappings.
Definition: AtomBondMapping.hpp:55
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
std::shared_ptr< MatchExpression > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MatchExpression instances.
Definition: MatchExpression.hpp:81
Computes the maximum common atom substructure between a query and a target molecular graph by reducin...
Definition: MaxCommonAtomSubstructureSearch.hpp:72
MaxCommonAtomSubstructureSearch(const MaxCommonAtomSubstructureSearch &)=delete
AtomBondMapping & getMapping(std::size_t idx)
Returns a non-const reference to the stored atom/bond mapping object at index idx.
MaxCommonAtomSubstructureSearch(const MolecularGraph &query)
Constructs and initializes a MaxCommonAtomSubstructureSearch instance for the specified query structu...
bool mappingExists(const MolecularGraph &target)
Searches for a common substructure between the query and the specified target molecular graph.
MappingIterator getMappingsEnd()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
MappingIterator begin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
bool findAllMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
MappingIterator end()
Returns a mutable iterator pointing to the end of the stored atom/bond mapping objects.
const AtomBondMapping & getMapping(std::size_t idx) const
Returns a const reference to the stored atom/bond mapping object at index idx.
boost::indirect_iterator< ABMappingList::iterator, AtomBondMapping > MappingIterator
A mutable random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:86
std::size_t getMaxNumMappings() const
Returns the specified limit on the number of stored atom/bond mappings.
MaxCommonAtomSubstructureSearch()
Constructs and initializes a MaxCommonAtomSubstructureSearch instance.
bool uniqueMappingsOnly() const
Tells whether duplicate atom/bond mappings are discarded.
void setMinSubstructureSize(std::size_t min_size)
Allows to specify the minimum accepted common substructure size.
ConstMappingIterator getMappingsEnd() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
ConstMappingIterator begin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
std::size_t getNumMappings() const
Returns the number of atom/bond mappings that were recorded in the last search for common substructur...
bool findMaxBondMappings(const MolecularGraph &target)
Searches for all atom/bond mappings of query subgraphs to substructures of the specified target molec...
void setQuery(const MolecularGraph &query)
Allows to specify a new query structure.
ConstMappingIterator getMappingsBegin() const
Returns a constant iterator pointing to the beginning of the stored atom/bond mapping objects.
ConstMappingIterator end() const
Returns a constant iterator pointing to the end of the stored atom/bond mapping objects.
MaxCommonAtomSubstructureSearch & operator=(const MaxCommonAtomSubstructureSearch &)=delete
MappingIterator getMappingsBegin()
Returns a mutable iterator pointing to the beginning of the stored atom/bond mapping objects.
std::size_t getMinSubstructureSize() const
Returns the minimum accepted common substructure size.
void uniqueMappingsOnly(bool unique)
Allows to specify whether or not to store only unique atom/bond mappings.
boost::indirect_iterator< ABMappingList::const_iterator, const AtomBondMapping > ConstMappingIterator
A constant random access iterator used to iterate over the stored atom/bond mapping objects.
Definition: MaxCommonAtomSubstructureSearch.hpp:91
std::shared_ptr< MaxCommonAtomSubstructureSearch > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated MaxCommonAtomSubstructureSearch i...
Definition: MaxCommonAtomSubstructureSearch.hpp:81
void setMaxNumMappings(std::size_t max_num_mappings)
Allows to specify a limit on the number of stored atom/bond mappings.
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
bool operator<(const Array< ValueType > &array1, const Array< ValueType > &array2)
Less than comparison operator.
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
bool operator>(const Array< ValueType > &array1, const Array< ValueType > &array2)
Greater than comparison operator.
The namespace of the Chemical Data Processing Library.