Chemical Data Processing Library C++ API - Version 1.4.0
CircularFingerprintGenerator.hpp
Go to the documentation of this file.
1 /*
2  * CircularFingerprintGenerator.hpp
3  *
4  * Implementation of SciTegic style Circular Fingerprints
5  *
6  * This file is part of the Chemical Data Processing Toolkit
7  *
8  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this library; see the file COPYING. If not, write to
22  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
31 #ifndef CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
32 #define CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
33 
34 #include <cstddef>
35 #include <cstdint>
36 #include <vector>
37 #include <utility>
38 #include <functional>
39 
40 #include <boost/random/linear_congruential.hpp>
41 
42 #include "CDPL/Descr/APIPrefix.hpp"
45 #include "CDPL/Util/BitSet.hpp"
46 
47 
48 namespace CDPL
49 {
50 
51  namespace Chem
52  {
53 
54  class MolecularGraph;
55  class Atom;
56  class Bond;
57  class Fragment;
58  class FragmentList;
59  } // namespace Chem
60 
61  namespace Descr
62  {
63 
79  {
80 
81  public:
86  static constexpr unsigned int DEF_ATOM_PROPERTY_FLAGS =
90 
95  static constexpr unsigned int DEF_BOND_PROPERTY_FLAGS =
97 
102  {
103 
104  public:
123  DefAtomIdentifierFunctor(unsigned int flags = DEF_ATOM_PROPERTY_FLAGS):
124  flags(flags) {}
125 
136  std::uint64_t operator()(const Chem::Atom& atom, const Chem::MolecularGraph& molgraph) const;
137 
138  private:
139  unsigned int flags;
140  };
141 
146  {
147 
148  public:
162  DefBondIdentifierFunctor(unsigned int flags = DEF_BOND_PROPERTY_FLAGS):
163  flags(flags) {}
164 
174  std::uint64_t operator()(const Chem::Bond& bond) const;
175 
176  private:
177  unsigned int flags;
178  };
179 
188  typedef std::function<std::uint64_t(const Chem::Atom&, const Chem::MolecularGraph&)> AtomIdentifierFunction;
189 
198  typedef std::function<std::uint64_t(const Chem::Bond&)> BondIdentifierFunction;
199 
204 
211 
220 
229 
235  void setNumIterations(std::size_t num_iter);
236 
241  std::size_t getNumIterations() const;
242 
247  void includeHydrogens(bool include);
248 
253  bool hydrogensIncluded() const;
254 
259  void includeChirality(bool include);
260 
265  bool chiralityIncluded() const;
266 
271  void generate(const Chem::MolecularGraph& molgraph);
272 
280  void setFeatureBits(Util::BitSet& bs, bool reset = true) const;
281 
291  void setFeatureBits(std::size_t atom_idx, Util::BitSet& bs, bool reset = true) const;
292 
297  std::size_t getNumFeatures() const;
298 
304  std::uint64_t getFeatureIdentifier(std::size_t ftr_idx) const;
305 
315  const Util::BitSet& getFeatureSubstructure(std::size_t ftr_idx) const;
316 
323  void getFeatureSubstructure(std::size_t ftr_idx, Chem::Fragment& frag, bool clear = true) const;
324 
333  void getFeatureSubstructures(std::size_t bit_idx, std::size_t bs_size, Chem::FragmentList& frags, bool clear = true) const;
334 
335  private:
336  typedef std::pair<std::pair<std::uint64_t, std::uint64_t>, const Chem::Atom*> NeighborData;
337 
338  void init(const Chem::MolecularGraph& molgraph);
339 
340  void performIteration(std::size_t iter_num);
341 
342  unsigned int getStereoFlag(const Chem::Atom& ctr_atom) const;
343 
344  void bitSetToFragment(const Util::BitSet& ab_mask, Chem::Fragment& frag) const;
345 
346  static bool compareNeighborData(const NeighborData& nbr1, const NeighborData& nbr2);
347 
348  typedef std::pair<std::uint64_t, Util::BitSet> Feature;
349  typedef std::vector<Feature> FeatureArray;
350  typedef std::vector<const Feature*> FeaturePtrList;
351  typedef std::vector<std::uint64_t> UInt64Array;
352  typedef std::vector<NeighborData> NeighborDataList;
353 
354  const Chem::MolecularGraph* molGraph;
355  std::size_t numIterations;
356  AtomIdentifierFunction atomIdentifierFunc;
357  BondIdentifierFunction bondIdentifierFunc;
358  bool incHydrogens;
359  bool incChirality;
360  boost::rand48 randGenerator;
361  UInt64Array bondIdentifiers;
362  FeatureArray features;
363  FeaturePtrList outputFeatures;
364  UInt64Array idCalculationData;
365  NeighborDataList neighborData;
366  Util::BitSet duplicateMask;
367  };
368  } // namespace Descr
369 } // namespace CDPL
370 
371 #endif // CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
Declaration of type CDPL::Util::BitSet.
Definition of constants in namespace CDPL::Chem::BondPropertyFlag.
Definition of constants in namespace CDPL::Chem::AtomPropertyFlag.
Definition of the preprocessor macro CDPL_DESCR_API.
#define CDPL_DESCR_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Abstract base class representing a chemical bond between two Chem::Atom instances.
Definition: Bond.hpp:54
Data type for the storage of Chem::Fragment objects.
Definition: FragmentList.hpp:49
Concrete Chem::MolecularGraph implementation that stores references to a selectable subset of atoms a...
Definition: Fragment.hpp:57
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
The functor for the generation of ECFP atom identifiers.
Definition: CircularFingerprintGenerator.hpp:102
DefAtomIdentifierFunctor(unsigned int flags=DEF_ATOM_PROPERTY_FLAGS)
Constructs the atom identifier functor object for the specified set of atomic properties.
Definition: CircularFingerprintGenerator.hpp:123
std::uint64_t operator()(const Chem::Atom &atom, const Chem::MolecularGraph &molgraph) const
Generates an identifier for the argument atom.
The default functor for the generation of bond identifiers.
Definition: CircularFingerprintGenerator.hpp:146
DefBondIdentifierFunctor(unsigned int flags=DEF_BOND_PROPERTY_FLAGS)
Constructs the bond identifier functor object for the specified set of bond properties.
Definition: CircularFingerprintGenerator.hpp:162
std::uint64_t operator()(const Chem::Bond &bond) const
Generates an identifier for the argument bond.
Generation of atom-centered circular substructure fingerprints in the spirit of SciTegic's Extended C...
Definition: CircularFingerprintGenerator.hpp:79
void setNumIterations(std::size_t num_iter)
Allows to specify the desired number of feature substructure growing iterations.
std::uint64_t getFeatureIdentifier(std::size_t ftr_idx) const
Returns the identifier of the feature at index ftr_idx.
void getFeatureSubstructure(std::size_t ftr_idx, Chem::Fragment &frag, bool clear=true) const
Extracts the substructure covered by the feature at index ftr_idx into frag.
void setAtomIdentifierFunction(const AtomIdentifierFunction &func)
Allows to specify a customized function for the generation of initial atom identifiers.
std::function< std::uint64_t(const Chem::Bond &)> BondIdentifierFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: CircularFingerprintGenerator.hpp:198
void includeHydrogens(bool include)
Specifies whether hydrogens shall be included in the generated fingerprint.
void generate(const Chem::MolecularGraph &molgraph)
Generates the atom-centered circular substructure fingerprint of the molecular graph molgraph.
void setFeatureBits(std::size_t atom_idx, Util::BitSet &bs, bool reset=true) const
Maps previously generated identifiers of structural features involving the atom specified by atom_idx...
std::size_t getNumFeatures() const
Returns the number of features generated by the most recent call to generate().
std::size_t getNumIterations() const
Returns the number of feature substructure growing iterations.
void setBondIdentifierFunction(const BondIdentifierFunction &func)
Allows to specify a customized function for the generation of initial bond identifiers.
void getFeatureSubstructures(std::size_t bit_idx, std::size_t bs_size, Chem::FragmentList &frags, bool clear=true) const
Extracts the substructures of every feature that, when folded into a bitset of size bs_size,...
void setFeatureBits(Util::BitSet &bs, bool reset=true) const
Maps previously generated feature identifiers to bit indices and sets the correponding bits of bs.
CircularFingerprintGenerator()
Constructs the CircularFingerprintGenerator instance.
const Util::BitSet & getFeatureSubstructure(std::size_t ftr_idx) const
Returns the atom-bit mask describing the substructure covered by the feature at index ftr_idx.
std::function< std::uint64_t(const Chem::Atom &, const Chem::MolecularGraph &)> AtomIdentifierFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: CircularFingerprintGenerator.hpp:188
bool chiralityIncluded() const
Tells whether atom chirality is considered during fingerprint generation.
bool hydrogensIncluded() const
Tells whether hydrogens are considered during fingerprint generation.
CircularFingerprintGenerator(const Chem::MolecularGraph &molgraph)
Constructs the CircularFingerprintGenerator instance and generates the atom-centered circular substru...
void includeChirality(bool include)
Specifies whether atom stereo configurations shall be incorporated into atom identifiers.
constexpr unsigned int FORMAL_CHARGE
Specifies the formal charge of an atom.
Definition: Chem/AtomPropertyFlag.hpp:73
constexpr unsigned int H_COUNT
Specifies the hydrogen count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:78
constexpr unsigned int ISOTOPE
Specifies the isotopic mass of an atom.
Definition: Chem/AtomPropertyFlag.hpp:68
constexpr unsigned int TOPOLOGY
Specifies the ring/chain topology of an atom.
Definition: Chem/AtomPropertyFlag.hpp:88
constexpr unsigned int HEAVY_BOND_COUNT
Specifies the heavy bond count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:108
constexpr unsigned int VALENCE
Specifies the valence of an atom.
Definition: Chem/AtomPropertyFlag.hpp:113
constexpr unsigned int TYPE
Specifies the generic type or element of an atom.
Definition: Chem/AtomPropertyFlag.hpp:63
constexpr unsigned int AROMATICITY
Specifies the membership of a bond in aromatic rings.
Definition: BondPropertyFlag.hpp:73
constexpr unsigned int ORDER
Specifies the order of a bond.
Definition: BondPropertyFlag.hpp:63
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.