Loading [MathJax]/jax/output/SVG/config.js
Chemical Data Processing Library C++ API - Version 1.3.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BulkSimilarityCalculator.hpp
Go to the documentation of this file.
1 /*
2  * BulkSimilarityCalculator.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
30 #define CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
31 
32 #include <vector>
33 #include <cstddef>
34 #include <functional>
35 #include <memory>
36 #include <utility>
37 #include <algorithm>
38 
39 #include <boost/iterator/indirect_iterator.hpp>
40 
42 #include "CDPL/Base/Exceptions.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Descr
49  {
50 
54  template <typename DT = Util::BitSet, typename SVT = double>
56  {
57 
58  public:
59  typedef DT Descriptor;
60  typedef std::shared_ptr<DT> DescriptorPointer;
61  typedef SVT SimilarityValue;
62  typedef std::function<SVT(const DT&, const DT&)> SimilarityFunction;
63  typedef std::pair<std::size_t, SVT> Result;
64  typedef std::shared_ptr<BulkSimilarityCalculator> SharedPointer;
65 
66  private:
67  typedef std::vector<DescriptorPointer> DescriptorList;
68  typedef std::vector<Result> ResultList;
69 
70  public:
71  typedef boost::indirect_iterator<typename DescriptorList::const_iterator, DT>
73  typedef boost::indirect_iterator<typename DescriptorList::iterator, DT>
75 
76  typedef typename ResultList::const_iterator ConstResultIterator;
77 
79 
82 
83  template <typename SF>
84  BulkSimilarityCalculator(SF&& sim_func):
85  simFunc(std::forward<SF>(sim_func)) {}
86 
89 
91 
92  template <typename SF>
93  void setSimilarityFunction(SF&& func);
94 
95  void clear();
96 
99 
102 
103  std::size_t getNumDescriptors() const;
104 
105  void addDescriptor(const Descriptor& descr);
106  void addDescriptor(const DescriptorPointer& descr_ptr);
107 
108  bool containsDescriptor(const Descriptor& descr) const;
109 
110  const Descriptor& getDescriptor(std::size_t idx) const;
111 
112  void removeDescriptor(std::size_t idx);
114 
115  void calculate(const Descriptor& descr, bool sort = false, bool sort_desc = true);
116 
119 
120  ConstResultIterator begin() const;
121  ConstResultIterator end() const;
122 
123  const Result& getResult(std::size_t idx) const;
124 
125  const SimilarityValue& getSimilarity(std::size_t idx) const;
126  std::size_t getDescriptorIndex(std::size_t idx) const;
127 
128  private:
130  DescriptorList descriptors;
131  ResultList results;
132  };
133 
134  } // namespace Descr
135 } // namespace CDPL
136 
137 
138 // Implementation
139 
140 template <typename DT, typename SVT>
143 {
144  return simFunc;
145 }
146 
147 template <typename DT, typename SVT>
148 template <typename SF>
150 {
151  simFunc = std::forward<SF>(func);
152 }
153 
154 template <typename DT, typename SVT>
156 {
157  descriptors.clear();
158  results.clear();
159 }
160 
161 template <typename DT, typename SVT>
164 {
165  return descriptors.begin();
166 }
167 
168 template <typename DT, typename SVT>
171 {
172  return descriptors.end();
173 }
174 
175 template <typename DT, typename SVT>
178 {
179  return descriptors.begin();
180 }
181 
182 template <typename DT, typename SVT>
185 {
186  return descriptors.end();
187 }
188 
189 template <typename DT, typename SVT>
191 {
192  return descriptors.size();
193 }
194 
195 template <typename DT, typename SVT>
197 {
198  descriptors.emplace_back(DescriptorPointer(new Descriptor(descr)));
199 }
200 
201 template <typename DT, typename SVT>
203 {
204  descriptors.emplace_back(descr_ptr);
205 }
206 
207 template <typename DT, typename SVT>
209 {
210  for (auto& dp : descriptors)
211  if (*dp == descr)
212  return true;
213 
214  return false;
215 }
216 
217 template <typename DT, typename SVT>
220 {
221  if (idx >= descriptors.size())
222  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
223 
224  return *descriptors[idx];
225 }
226 
227 template <typename DT, typename SVT>
229 {
230  if (idx >= descriptors.size())
231  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
232 
233  descriptors.erase(descriptors.begin() + idx);
234 }
235 
236 template <typename DT, typename SVT>
239 {
240  auto& b_it = it.base();
241 
242  if ((b_it < descriptors.begin()) || (b_it >= descriptors.end()))
243  throw Base::RangeError("BulkSimilarityCalculator: descriptor iterator out of valid range");
244 
245  return descriptors.erase(b_it);
246 }
247 
248 template <typename DT, typename SVT>
249 void CDPL::Descr::BulkSimilarityCalculator<DT, SVT>::calculate(const Descriptor& descr, bool sort, bool sort_desc)
250 {
251  results.clear();
252  results.reserve(descriptors.size());
253 
254  for (std::size_t i = 0, num_descrs = descriptors.size(); i < num_descrs; i++)
255  results.emplace_back(i, simFunc(descr, *descriptors[i]));
256 
257  if (!sort)
258  return;
259 
260  if (sort_desc)
261  std::sort(results.begin(), results.end(),
262  [](const Result& r1, const Result& r2) {
263  return (r1.second > r2.second);
264  });
265  else
266  std::sort(results.begin(), results.end(),
267  [](const Result& r1, const Result& r2) {
268  return (r1.second < r2.second);
269  });
270 }
271 
272 template <typename DT, typename SVT>
275 {
276  return results.begin();
277 }
278 
279 template <typename DT, typename SVT>
282 {
283  return results.end();
284 }
285 
286 template <typename DT, typename SVT>
289 {
290  return results.begin();
291 }
292 
293 template <typename DT, typename SVT>
296 {
297  return results.end();
298 }
299 
300 template <typename DT, typename SVT>
303 {
304  if (idx >= results.size())
305  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
306 
307  return results[idx];
308 }
309 
310 template <typename DT, typename SVT>
313 {
314  if (idx >= results.size())
315  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
316 
317  return results[idx].second;
318 }
319 
320 template <typename DT, typename SVT>
322 {
323  if (idx >= results.size())
324  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
325 
326  return results[idx].first;
327 }
328 
329 #endif // CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
Definition of exception classes.
Definition of functor classes for the calculation of various similarity and distance measures.
Thrown to indicate that an index is out of range.
Definition: Base/Exceptions.hpp:152
Thrown to indicate that a value is out of range.
Definition: Base/Exceptions.hpp:114
Definition: BulkSimilarityCalculator.hpp:56
bool containsDescriptor(const Descriptor &descr) const
Definition: BulkSimilarityCalculator.hpp:208
std::shared_ptr< BulkSimilarityCalculator > SharedPointer
Definition: BulkSimilarityCalculator.hpp:64
DescriptorIterator getDescriptorsBegin()
Definition: BulkSimilarityCalculator.hpp:163
ConstResultIterator begin() const
Definition: BulkSimilarityCalculator.hpp:288
const SimilarityValue & getSimilarity(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:312
const SimilarityFunction & getSimilarityFunction() const
Definition: BulkSimilarityCalculator.hpp:142
void removeDescriptor(std::size_t idx)
Definition: BulkSimilarityCalculator.hpp:228
DT Descriptor
Definition: BulkSimilarityCalculator.hpp:59
std::function< SVT(const DT &, const DT &)> SimilarityFunction
Definition: BulkSimilarityCalculator.hpp:62
boost::indirect_iterator< typename DescriptorList::const_iterator, DT > ConstDescriptorIterator
Definition: BulkSimilarityCalculator.hpp:72
std::size_t getDescriptorIndex(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:321
const Result & getResult(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:302
std::pair< std::size_t, SVT > Result
Definition: BulkSimilarityCalculator.hpp:63
void setSimilarityFunction(SF &&func)
Definition: BulkSimilarityCalculator.hpp:149
std::shared_ptr< DT > DescriptorPointer
Definition: BulkSimilarityCalculator.hpp:60
const Descriptor & getDescriptor(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:219
ConstResultIterator getResultsBegin() const
Definition: BulkSimilarityCalculator.hpp:274
std::size_t getNumDescriptors() const
Definition: BulkSimilarityCalculator.hpp:190
BulkSimilarityCalculator & operator=(BulkSimilarityCalculator &&calc)=default
BulkSimilarityCalculator(const BulkSimilarityCalculator &calc)=default
DescriptorIterator getDescriptorsEnd()
Definition: BulkSimilarityCalculator.hpp:170
void addDescriptor(const Descriptor &descr)
Definition: BulkSimilarityCalculator.hpp:196
BulkSimilarityCalculator(SF &&sim_func)
Definition: BulkSimilarityCalculator.hpp:84
ResultList::const_iterator ConstResultIterator
Definition: BulkSimilarityCalculator.hpp:76
ConstResultIterator end() const
Definition: BulkSimilarityCalculator.hpp:295
BulkSimilarityCalculator & operator=(const BulkSimilarityCalculator &calc)=default
void clear()
Definition: BulkSimilarityCalculator.hpp:155
boost::indirect_iterator< typename DescriptorList::iterator, DT > DescriptorIterator
Definition: BulkSimilarityCalculator.hpp:74
ConstResultIterator getResultsEnd() const
Definition: BulkSimilarityCalculator.hpp:281
SVT SimilarityValue
Definition: BulkSimilarityCalculator.hpp:61
void calculate(const Descriptor &descr, bool sort=false, bool sort_desc=true)
Definition: BulkSimilarityCalculator.hpp:249
BulkSimilarityCalculator(BulkSimilarityCalculator &&calc)=default
Functor class for calculating Tanimoto Similarities [CITB] of bitsets and vectors.
Definition: SimilarityFunctors.hpp:46
The namespace of the Chemical Data Processing Library.