Chemical Data Processing Library C++ API - Version 1.1.0
CircularFingerprintGenerator.hpp
Go to the documentation of this file.
1 /*
2  * CircularFingerprintGenerator.hpp
3  *
4  * Implementation of SciTegic style Circular Fingerprints
5  *
6  * This file is part of the Chemical Data Processing Toolkit
7  *
8  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this library; see the file COPYING. If not, write to
22  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
31 #ifndef CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
32 #define CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
33 
34 #include <cstddef>
35 #include <cstdint>
36 #include <vector>
37 #include <utility>
38 #include <functional>
39 
40 #include <boost/random/linear_congruential.hpp>
41 
42 #include "CDPL/Descr/APIPrefix.hpp"
45 #include "CDPL/Util/BitSet.hpp"
46 
47 
48 namespace CDPL
49 {
50 
51  namespace Chem
52  {
53 
54  class MolecularGraph;
55  class Atom;
56  class Bond;
57  class Fragment;
58  class FragmentList;
59  } // namespace Chem
60 
61  namespace Descr
62  {
63 
69  {
70 
71  public:
76  static constexpr unsigned int DEF_ATOM_PROPERTY_FLAGS =
80 
85  static constexpr unsigned int DEF_BOND_PROPERTY_FLAGS =
87 
92  {
93 
94  public:
113  DefAtomIdentifierFunctor(unsigned int flags = DEF_ATOM_PROPERTY_FLAGS):
114  flags(flags) {}
115 
127  std::uint64_t operator()(const Chem::Atom& atom, const Chem::MolecularGraph& molgraph) const;
128 
129  private:
130  unsigned int flags;
131  };
132 
137  {
138 
139  public:
153  DefBondIdentifierFunctor(unsigned int flags = DEF_BOND_PROPERTY_FLAGS):
154  flags(flags) {}
155 
165  std::uint64_t operator()(const Chem::Bond& bond) const;
166 
167  private:
168  unsigned int flags;
169  };
170 
179  typedef std::function<std::uint64_t(const Chem::Atom&, const Chem::MolecularGraph&)> AtomIdentifierFunction;
180 
189  typedef std::function<std::uint64_t(const Chem::Bond&)> BondIdentifierFunction;
190 
195 
202 
211 
220 
226  void setNumIterations(std::size_t num_iter);
227 
232  std::size_t getNumIterations() const;
233 
234  void includeHydrogens(bool include);
235 
236  bool hydrogensIncluded() const;
237 
238  void includeChirality(bool include);
239 
240  bool chiralityIncluded() const;
241 
246  void generate(const Chem::MolecularGraph& molgraph);
247 
255  void setFeatureBits(Util::BitSet& bs, bool reset = true) const;
256 
266  void setFeatureBits(std::size_t atom_idx, Util::BitSet& bs, bool reset = true) const;
267 
268  std::size_t getNumFeatures() const;
269 
270  std::uint64_t getFeatureIdentifier(std::size_t ftr_idx) const;
271 
272  const Util::BitSet& getFeatureSubstructure(std::size_t ftr_idx) const;
273 
274  void getFeatureSubstructure(std::size_t ftr_idx, Chem::Fragment& frag, bool clear = true) const;
275 
276  void getFeatureSubstructures(std::size_t bit_idx, std::size_t bs_size, Chem::FragmentList& frags, bool clear = true) const;
277 
278  private:
279  typedef std::pair<std::pair<std::uint64_t, std::uint64_t>, const Chem::Atom*> NeighborData;
280 
281  void init(const Chem::MolecularGraph& molgraph);
282 
283  void performIteration(std::size_t iter_num);
284 
285  unsigned int getStereoFlag(const Chem::Atom& ctr_atom) const;
286 
287  void bitSetToFragment(const Util::BitSet& ab_mask, Chem::Fragment& frag) const;
288 
289  static bool compareNeighborData(const NeighborData& nbr1, const NeighborData& nbr2);
290 
291  typedef std::pair<std::uint64_t, Util::BitSet> Feature;
292  typedef std::vector<Feature> FeatureArray;
293  typedef std::vector<const Feature*> FeaturePtrList;
294  typedef std::vector<std::uint64_t> UInt64Array;
295  typedef std::vector<NeighborData> NeighborDataList;
296 
297  const Chem::MolecularGraph* molGraph;
298  std::size_t numIterations;
299  AtomIdentifierFunction atomIdentifierFunc;
300  BondIdentifierFunction bondIdentifierFunc;
301  bool incHydrogens;
302  bool incChirality;
303  boost::rand48 randGenerator;
304  UInt64Array bondIdentifiers;
305  FeatureArray features;
306  FeaturePtrList outputFeatures;
307  UInt64Array idCalculationData;
308  NeighborDataList neighborData;
309  Util::BitSet duplicateMask;
310  };
311  } // namespace Descr
312 } // namespace CDPL
313 
314 #endif // CDPL_DESCR_CIRCULARFINGERPRINTGENERATOR_HPP
CDPL::Chem::AtomPropertyFlag::FORMAL_CHARGE
const unsigned int FORMAL_CHARGE
Specifies the formal charge of an atom.
Definition: Chem/AtomPropertyFlag.hpp:73
CDPL::Chem::BondPropertyFlag::AROMATICITY
const unsigned int AROMATICITY
Specifies the membership of a bond in aromatic rings.
Definition: BondPropertyFlag.hpp:73
CDPL::Descr::CircularFingerprintGenerator::CircularFingerprintGenerator
CircularFingerprintGenerator(const Chem::MolecularGraph &molgraph)
Constructs the CircularFingerprintGenerator instance and generates the atom-centered circular substru...
CDPL::Descr::CircularFingerprintGenerator::setAtomIdentifierFunction
void setAtomIdentifierFunction(const AtomIdentifierFunction &func)
Allows to specify a customized function for the generation of initial atom identifiers.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Descr::CircularFingerprintGenerator::generate
void generate(const Chem::MolecularGraph &molgraph)
Generates the atom-centered circular substructure fingerprint of the molecular graph molgraph.
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Descr::CircularFingerprintGenerator::BondIdentifierFunction
std::function< std::uint64_t(const Chem::Bond &)> BondIdentifierFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: CircularFingerprintGenerator.hpp:189
CDPL::Descr::CircularFingerprintGenerator::getFeatureSubstructure
const Util::BitSet & getFeatureSubstructure(std::size_t ftr_idx) const
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Chem::Fragment
Fragment.
Definition: Fragment.hpp:52
CDPL::Descr::CircularFingerprintGenerator::setFeatureBits
void setFeatureBits(Util::BitSet &bs, bool reset=true) const
Maps previously generated feature identifiers to bit indices and sets the correponding bits of bs.
CDPL::Descr::CircularFingerprintGenerator::setBondIdentifierFunction
void setBondIdentifierFunction(const BondIdentifierFunction &func)
Allows to specify a customized function for the generation of initial bond identifiers.
CDPL::Descr::CircularFingerprintGenerator::CircularFingerprintGenerator
CircularFingerprintGenerator()
Constructs the CircularFingerprintGenerator instance.
CDPL::Descr::CircularFingerprintGenerator::setNumIterations
void setNumIterations(std::size_t num_iter)
Allows to specify the desired number of feature substructure growing iterations.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
CDPL::Descr::CircularFingerprintGenerator::DefAtomIdentifierFunctor::DefAtomIdentifierFunctor
DefAtomIdentifierFunctor(unsigned int flags=DEF_ATOM_PROPERTY_FLAGS)
Constructs the atom identifier functor object for the specified set of atomic properties.
Definition: CircularFingerprintGenerator.hpp:113
AtomPropertyFlag.hpp
Definition of constants in namespace CDPL::Chem::AtomPropertyFlag.
CDPL::Descr::CircularFingerprintGenerator::DefBondIdentifierFunctor::DefBondIdentifierFunctor
DefBondIdentifierFunctor(unsigned int flags=DEF_BOND_PROPERTY_FLAGS)
Constructs the bond identifier functor object for the specified set of bond properties.
Definition: CircularFingerprintGenerator.hpp:153
CDPL::Chem::AtomPropertyFlag::VALENCE
const unsigned int VALENCE
Specifies the valence of an atom.
Definition: Chem/AtomPropertyFlag.hpp:113
CDPL::Chem::BondPropertyFlag::ORDER
const unsigned int ORDER
Specifies the order of a bond.
Definition: BondPropertyFlag.hpp:63
CDPL::Descr::CircularFingerprintGenerator::getFeatureSubstructure
void getFeatureSubstructure(std::size_t ftr_idx, Chem::Fragment &frag, bool clear=true) const
CDPL::Descr::CircularFingerprintGenerator::getNumIterations
std::size_t getNumIterations() const
Returns the number of feature substructure growing iterations.
CDPL::Descr::CircularFingerprintGenerator::DefBondIdentifierFunctor::operator()
std::uint64_t operator()(const Chem::Bond &bond) const
Generates an identifier for the argument bond.
CDPL::Descr::CircularFingerprintGenerator::AtomIdentifierFunction
std::function< std::uint64_t(const Chem::Atom &, const Chem::MolecularGraph &)> AtomIdentifierFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: CircularFingerprintGenerator.hpp:179
CDPL::Descr::CircularFingerprintGenerator::includeHydrogens
void includeHydrogens(bool include)
CDPL::Chem::AtomPropertyFlag::TYPE
const unsigned int TYPE
Specifies the generic type or element of an atom.
Definition: Chem/AtomPropertyFlag.hpp:63
CDPL::Descr::CircularFingerprintGenerator::getFeatureSubstructures
void getFeatureSubstructures(std::size_t bit_idx, std::size_t bs_size, Chem::FragmentList &frags, bool clear=true) const
CDPL::Chem::AtomPropertyFlag::ISOTOPE
const unsigned int ISOTOPE
Specifies the isotopic mass of an atom.
Definition: Chem/AtomPropertyFlag.hpp:68
CDPL::Chem::FragmentList
A data type for the storage of Chem::Fragment objects.
Definition: FragmentList.hpp:49
CDPL::Descr::CircularFingerprintGenerator::setFeatureBits
void setFeatureBits(std::size_t atom_idx, Util::BitSet &bs, bool reset=true) const
Maps previously generated identifiers of structural features involving the atom specified by atom_idx...
CDPL::Descr::CircularFingerprintGenerator::getFeatureIdentifier
std::uint64_t getFeatureIdentifier(std::size_t ftr_idx) const
CDPL::Chem::AtomPropertyFlag::TOPOLOGY
const unsigned int TOPOLOGY
Specifies the ring/chain topology of an atom.
Definition: Chem/AtomPropertyFlag.hpp:88
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Descr::CircularFingerprintGenerator::includeChirality
void includeChirality(bool include)
CDPL::Chem::AtomPropertyFlag::HEAVY_BOND_COUNT
const unsigned int HEAVY_BOND_COUNT
Specifies the heavy bond count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:108
CDPL::Descr::CircularFingerprintGenerator::hydrogensIncluded
bool hydrogensIncluded() const
CDPL::Descr::CircularFingerprintGenerator::chiralityIncluded
bool chiralityIncluded() const
CDPL::Chem::AtomPropertyFlag::H_COUNT
const unsigned int H_COUNT
Specifies the hydrogen count of an atom.
Definition: Chem/AtomPropertyFlag.hpp:78
BondPropertyFlag.hpp
Definition of constants in namespace CDPL::Chem::BondPropertyFlag.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_DESCR_API.
CDPL::Descr::CircularFingerprintGenerator
CircularFingerprintGenerator.
Definition: CircularFingerprintGenerator.hpp:69
CDPL_DESCR_API
#define CDPL_DESCR_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Descr::CircularFingerprintGenerator::DefBondIdentifierFunctor
The default functor for the generation of bond identifiers.
Definition: CircularFingerprintGenerator.hpp:137
CDPL::Descr::CircularFingerprintGenerator::DefAtomIdentifierFunctor::operator()
std::uint64_t operator()(const Chem::Atom &atom, const Chem::MolecularGraph &molgraph) const
Generates an identifier for the argument atom.
CDPL::Descr::CircularFingerprintGenerator::getNumFeatures
std::size_t getNumFeatures() const
CDPL::Descr::CircularFingerprintGenerator::DefAtomIdentifierFunctor
The functor for the generation of ECFP atom identifiers.
Definition: CircularFingerprintGenerator.hpp:92