Chemical Data Processing Library C++ API - Version 1.1.0
PathFingerprintGenerator.hpp
Go to the documentation of this file.
1 /*
2  * PathFingerprintGenerator.hpp
3  *
4  * Implementation of Daylight style Path Fingerprints
5  *
6  * This file is part of the Chemical Data Processing Toolkit
7  *
8  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this library; see the file COPYING. If not, write to
22  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
31 #ifndef CDPL_DESCR_PATHFINGERPRINTGENERATOR_HPP
32 #define CDPL_DESCR_PATHFINGERPRINTGENERATOR_HPP
33 
34 #include <cstddef>
35 #include <cstdint>
36 #include <vector>
37 #include <functional>
38 
39 #include <boost/random/linear_congruential.hpp>
40 
41 #include "CDPL/Descr/APIPrefix.hpp"
44 #include "CDPL/Util/BitSet.hpp"
45 
46 
47 namespace CDPL
48 {
49 
50  namespace Chem
51  {
52 
53  class MolecularGraph;
54  class Atom;
55  class Bond;
56  } // namespace Chem
57 
58  namespace Descr
59  {
60 
66  {
67 
68  public:
73  static constexpr unsigned int DEF_ATOM_PROPERTY_FLAGS =
76 
81  static constexpr unsigned int DEF_BOND_PROPERTY_FLAGS =
83 
88  {
89 
90  public:
103  DefAtomDescriptorFunctor(unsigned int flags = DEF_ATOM_PROPERTY_FLAGS):
104  flags(flags) {}
105 
115  std::uint64_t operator()(const Chem::Atom& atom) const;
116 
117  private:
118  unsigned int flags;
119  };
120 
125  {
126 
127  public:
139  DefBondDescriptorFunctor(unsigned int flags = DEF_BOND_PROPERTY_FLAGS):
140  flags(flags) {}
141 
151  std::uint64_t operator()(const Chem::Bond& bond) const;
152 
153  private:
154  unsigned int flags;
155  };
156 
165  typedef std::function<std::uint64_t(const Chem::Atom&)> AtomDescriptorFunction;
166 
175  typedef std::function<std::uint64_t(const Chem::Bond&)> BondDescriptorFunction;
176 
181 
189 
196 
203 
213  void setMinPathLength(std::size_t min_length);
214 
220  std::size_t getMinPathLength() const;
221 
231  void setMaxPathLength(std::size_t max_length);
232 
238  std::size_t getMaxPathLength() const;
239 
245  void setNumBits(std::size_t num_bits);
246 
251  std::size_t getNumBits() const;
252 
258  void generate(const Chem::MolecularGraph& molgraph, Util::BitSet& fp);
259 
260  private:
261  void calcFingerprint(const Chem::MolecularGraph&, Util::BitSet&);
262 
263  void growPath(const Chem::Atom&, Util::BitSet&);
264 
265  std::size_t calcBitIndex();
266 
267  typedef std::vector<std::size_t> IndexList;
268  typedef std::vector<std::uint64_t> UInt64Array;
269 
270  const Chem::MolecularGraph* molGraph;
271  std::size_t numBits;
272  std::size_t minPathLength;
273  std::size_t maxPathLength;
274  AtomDescriptorFunction atomDescriptorFunc;
275  BondDescriptorFunction bondDescriptorFunc;
276  Util::BitSet visBondMask;
277  UInt64Array atomDescriptors;
278  UInt64Array bondDescriptors;
279  IndexList atomPath;
280  IndexList bondPath;
281  UInt64Array fwdPathDescriptor;
282  UInt64Array revPathDescriptor;
283  boost::rand48 randGenerator;
284  };
285  } // namespace Descr
286 } // namespace CDPL
287 
288 #endif // CDPL_DESCR_PATHFINGERPRINTGENERATOR_HPP
CDPL::Chem::AtomPropertyFlag::FORMAL_CHARGE
const unsigned int FORMAL_CHARGE
Specifies the formal charge of an atom.
Definition: Chem/AtomPropertyFlag.hpp:73
CDPL::Chem::BondPropertyFlag::AROMATICITY
const unsigned int AROMATICITY
Specifies the membership of a bond in aromatic rings.
Definition: BondPropertyFlag.hpp:73
CDPL::Descr::PathFingerprintGenerator::setMaxPathLength
void setMaxPathLength(std::size_t max_length)
Allows to specify the maximum considered path length.
CDPL::Descr::PathFingerprintGenerator::setAtomDescriptorFunction
void setAtomDescriptorFunction(const AtomDescriptorFunction &func)
Allows to specify a custom function for the generation of atom descriptors.
CDPL::Chem::Bond
Bond.
Definition: Bond.hpp:50
CDPL::Descr::PathFingerprintGenerator::getNumBits
std::size_t getNumBits() const
Returns the size of the generated fingerprints.
CDPL::Descr::PathFingerprintGenerator::setMinPathLength
void setMinPathLength(std::size_t min_length)
Allows to specify the minimum length a path must have to contribute to the generated fingerprint.
CDPL::Descr::PathFingerprintGenerator::getMaxPathLength
std::size_t getMaxPathLength() const
Returns the maximum considered path length.
CDPL::Descr::PathFingerprintGenerator::DefAtomDescriptorFunctor
The default functor for the generation of atom descriptors.
Definition: PathFingerprintGenerator.hpp:88
CDPL::Util::BitSet
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
CDPL::Chem::Atom
Atom.
Definition: Atom.hpp:52
CDPL::Descr::PathFingerprintGenerator::PathFingerprintGenerator
PathFingerprintGenerator(const Chem::MolecularGraph &molgraph, Util::BitSet &fp)
Constructs the PathFingerprintGenerator instance and generates the fingerprint of the molecular graph...
CDPL::Descr::PathFingerprintGenerator::setBondDescriptorFunction
void setBondDescriptorFunction(const BondDescriptorFunction &func)
Allows to specify a custom function for the generation of bond descriptors.
CDPL::Chem::MolecularGraph
MolecularGraph.
Definition: MolecularGraph.hpp:52
BitSet.hpp
Definition of the type CDPL::Util::BitSet.
AtomPropertyFlag.hpp
Definition of constants in namespace CDPL::Chem::AtomPropertyFlag.
CDPL::Descr::PathFingerprintGenerator::DefBondDescriptorFunctor
The default functor for the generation of bond descriptors.
Definition: PathFingerprintGenerator.hpp:125
CDPL::Chem::BondPropertyFlag::ORDER
const unsigned int ORDER
Specifies the order of a bond.
Definition: BondPropertyFlag.hpp:63
CDPL::Descr::PathFingerprintGenerator::DefBondDescriptorFunctor::DefBondDescriptorFunctor
DefBondDescriptorFunctor(unsigned int flags=DEF_BOND_PROPERTY_FLAGS)
Constructs the bond descriptor functor object for the specified set of bond properties.
Definition: PathFingerprintGenerator.hpp:139
CDPL::Chem::BondPropertyFlag::TOPOLOGY
const unsigned int TOPOLOGY
Specifies the ring/chain topology of a bond.
Definition: BondPropertyFlag.hpp:68
CDPL::Chem::AtomPropertyFlag::TYPE
const unsigned int TYPE
Specifies the generic type or element of an atom.
Definition: Chem/AtomPropertyFlag.hpp:63
CDPL::Chem::AtomPropertyFlag::ISOTOPE
const unsigned int ISOTOPE
Specifies the isotopic mass of an atom.
Definition: Chem/AtomPropertyFlag.hpp:68
CDPL::Descr::PathFingerprintGenerator::setNumBits
void setNumBits(std::size_t num_bits)
Allows to specify the desired fingerprint size.
CDPL
The namespace of the Chemical Data Processing Library.
CDPL::Descr::PathFingerprintGenerator::DefBondDescriptorFunctor::operator()
std::uint64_t operator()(const Chem::Bond &bond) const
Generates a descriptor for the argument bond.
CDPL::Descr::PathFingerprintGenerator::BondDescriptorFunction
std::function< std::uint64_t(const Chem::Bond &)> BondDescriptorFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: PathFingerprintGenerator.hpp:175
CDPL::Descr::PathFingerprintGenerator::DefAtomDescriptorFunctor::DefAtomDescriptorFunctor
DefAtomDescriptorFunctor(unsigned int flags=DEF_ATOM_PROPERTY_FLAGS)
Constructs the atom descriptor functor object for the specified set of atomic properties.
Definition: PathFingerprintGenerator.hpp:103
CDPL::Chem::AtomPropertyFlag::AROMATICITY
const unsigned int AROMATICITY
Specifies the membership of an atom in aromatic rings.
Definition: Chem/AtomPropertyFlag.hpp:93
CDPL::Descr::PathFingerprintGenerator::generate
void generate(const Chem::MolecularGraph &molgraph, Util::BitSet &fp)
Generates the fingerprint of the molecular graph molgraph.
CDPL::Descr::PathFingerprintGenerator
PathFingerprintGenerator.
Definition: PathFingerprintGenerator.hpp:66
BondPropertyFlag.hpp
Definition of constants in namespace CDPL::Chem::BondPropertyFlag.
APIPrefix.hpp
Definition of the preprocessor macro CDPL_DESCR_API.
CDPL_DESCR_API
#define CDPL_DESCR_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
CDPL::Descr::PathFingerprintGenerator::AtomDescriptorFunction
std::function< std::uint64_t(const Chem::Atom &)> AtomDescriptorFunction
Type of the generic functor class used to store user-defined functions or function objects for the ge...
Definition: PathFingerprintGenerator.hpp:165
CDPL::Descr::PathFingerprintGenerator::DefAtomDescriptorFunctor::operator()
std::uint64_t operator()(const Chem::Atom &atom) const
Generates a descriptor for the argument atom.
CDPL::Descr::PathFingerprintGenerator::getMinPathLength
std::size_t getMinPathLength() const
Returns the minimum length a path must have to contribute to the generated fingerprint.
CDPL::Descr::PathFingerprintGenerator::PathFingerprintGenerator
PathFingerprintGenerator()
Constructs the PathFingerprintGenerator instance.