Loading [MathJax]/extensions/tex2jax.js
Chemical Data Processing Library C++ API - Version 1.2.3
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BulkSimilarityCalculator.hpp
Go to the documentation of this file.
1 /*
2  * BulkSimilarityCalculator.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
30 #define CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
31 
32 #include <vector>
33 #include <cstddef>
34 #include <functional>
35 #include <memory>
36 #include <utility>
37 #include <algorithm>
38 
39 #include <boost/iterator/indirect_iterator.hpp>
40 
42 #include "CDPL/Base/Exceptions.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Descr
49  {
50 
51  template <typename DT = Util::BitSet, typename SVT = double>
53  {
54 
55  public:
56  typedef DT Descriptor;
57  typedef std::shared_ptr<DT> DescriptorPointer;
58  typedef SVT SimilarityValue;
59  typedef std::function<SVT(const DT&, const DT&)> SimilarityFunction;
60  typedef std::pair<std::size_t, SVT> Result;
61  typedef std::shared_ptr<BulkSimilarityCalculator> SharedPointer;
62 
63  private:
64  typedef std::vector<DescriptorPointer> DescriptorList;
65  typedef std::vector<Result> ResultList;
66 
67  public:
68  typedef boost::indirect_iterator<typename DescriptorList::const_iterator, DT>
70  typedef boost::indirect_iterator<typename DescriptorList::iterator, DT>
72 
73  typedef typename ResultList::const_iterator ConstResultIterator;
74 
76 
79 
80  template <typename SF>
81  BulkSimilarityCalculator(SF&& sim_func):
82  simFunc(std::forward<SF>(sim_func)) {}
83 
86 
88 
89  template <typename SF>
90  void setSimilarityFunction(SF&& func);
91 
92  void clear();
93 
96 
99 
100  std::size_t getNumDescriptors() const;
101 
102  void addDescriptor(const Descriptor& descr);
103  void addDescriptor(const DescriptorPointer& descr_ptr);
104 
105  bool containsDescriptor(const Descriptor& descr) const;
106 
107  const Descriptor& getDescriptor(std::size_t idx) const;
108 
109  void removeDescriptor(std::size_t idx);
111 
112  void calculate(const Descriptor& descr, bool sort = false, bool sort_desc = true);
113 
116 
117  ConstResultIterator begin() const;
118  ConstResultIterator end() const;
119 
120  const Result& getResult(std::size_t idx) const;
121 
122  const SimilarityValue& getSimilarity(std::size_t idx) const;
123  std::size_t getDescriptorIndex(std::size_t idx) const;
124 
125  private:
127  DescriptorList descriptors;
128  ResultList results;
129  };
130 
131  } // namespace Descr
132 } // namespace CDPL
133 
134 
135 // Implementation
136 
137 template <typename DT, typename SVT>
140 {
141  return simFunc;
142 }
143 
144 template <typename DT, typename SVT>
145 template <typename SF>
147 {
148  simFunc = std::forward<SF>(func);
149 }
150 
151 template <typename DT, typename SVT>
153 {
154  descriptors.clear();
155  results.clear();
156 }
157 
158 template <typename DT, typename SVT>
161 {
162  return descriptors.begin();
163 }
164 
165 template <typename DT, typename SVT>
168 {
169  return descriptors.end();
170 }
171 
172 template <typename DT, typename SVT>
175 {
176  return descriptors.begin();
177 }
178 
179 template <typename DT, typename SVT>
182 {
183  return descriptors.end();
184 }
185 
186 template <typename DT, typename SVT>
188 {
189  return descriptors.size();
190 }
191 
192 template <typename DT, typename SVT>
194 {
195  descriptors.emplace_back(DescriptorPointer(new Descriptor(descr)));
196 }
197 
198 template <typename DT, typename SVT>
200 {
201  descriptors.emplace_back(descr_ptr);
202 }
203 
204 template <typename DT, typename SVT>
206 {
207  for (auto& dp : descriptors)
208  if (*dp == descr)
209  return true;
210 
211  return false;
212 }
213 
214 template <typename DT, typename SVT>
217 {
218  if (idx >= descriptors.size())
219  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
220 
221  return *descriptors[idx];
222 }
223 
224 template <typename DT, typename SVT>
226 {
227  if (idx >= descriptors.size())
228  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
229 
230  descriptors.erase(descriptors.begin() + idx);
231 }
232 
233 template <typename DT, typename SVT>
236 {
237  auto& b_it = it.base();
238 
239  if ((b_it < descriptors.begin()) || (b_it >= descriptors.end()))
240  throw Base::RangeError("BulkSimilarityCalculator: descriptor iterator out of valid range");
241 
242  return descriptors.erase(b_it);
243 }
244 
245 template <typename DT, typename SVT>
246 void CDPL::Descr::BulkSimilarityCalculator<DT, SVT>::calculate(const Descriptor& descr, bool sort, bool sort_desc)
247 {
248  results.clear();
249  results.reserve(descriptors.size());
250 
251  for (std::size_t i = 0, num_descrs = descriptors.size(); i < num_descrs; i++)
252  results.emplace_back(i, simFunc(descr, *descriptors[i]));
253 
254  if (!sort)
255  return;
256 
257  if (sort_desc)
258  std::sort(results.begin(), results.end(),
259  [](const Result& r1, const Result& r2) {
260  return (r1.second > r2.second);
261  });
262  else
263  std::sort(results.begin(), results.end(),
264  [](const Result& r1, const Result& r2) {
265  return (r1.second < r2.second);
266  });
267 }
268 
269 template <typename DT, typename SVT>
272 {
273  return results.begin();
274 }
275 
276 template <typename DT, typename SVT>
279 {
280  return results.end();
281 }
282 
283 template <typename DT, typename SVT>
286 {
287  return results.begin();
288 }
289 
290 template <typename DT, typename SVT>
293 {
294  return results.end();
295 }
296 
297 template <typename DT, typename SVT>
300 {
301  if (idx >= results.size())
302  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
303 
304  return results[idx];
305 }
306 
307 template <typename DT, typename SVT>
310 {
311  if (idx >= results.size())
312  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
313 
314  return results[idx].second;
315 }
316 
317 template <typename DT, typename SVT>
319 {
320  if (idx >= results.size())
321  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
322 
323  return results[idx].first;
324 }
325 
326 #endif // CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
Definition of exception classes.
Definition of functor classes for the calculation of various similarity and distance measures.
Thrown to indicate that an index is out of range.
Definition: Base/Exceptions.hpp:152
Thrown to indicate that a value is out of range.
Definition: Base/Exceptions.hpp:114
Definition: BulkSimilarityCalculator.hpp:53
bool containsDescriptor(const Descriptor &descr) const
Definition: BulkSimilarityCalculator.hpp:205
std::shared_ptr< BulkSimilarityCalculator > SharedPointer
Definition: BulkSimilarityCalculator.hpp:61
DescriptorIterator getDescriptorsBegin()
Definition: BulkSimilarityCalculator.hpp:160
ConstResultIterator begin() const
Definition: BulkSimilarityCalculator.hpp:285
const SimilarityValue & getSimilarity(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:309
const SimilarityFunction & getSimilarityFunction() const
Definition: BulkSimilarityCalculator.hpp:139
void removeDescriptor(std::size_t idx)
Definition: BulkSimilarityCalculator.hpp:225
DT Descriptor
Definition: BulkSimilarityCalculator.hpp:56
std::function< SVT(const DT &, const DT &)> SimilarityFunction
Definition: BulkSimilarityCalculator.hpp:59
boost::indirect_iterator< typename DescriptorList::const_iterator, DT > ConstDescriptorIterator
Definition: BulkSimilarityCalculator.hpp:69
std::size_t getDescriptorIndex(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:318
const Result & getResult(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:299
std::pair< std::size_t, SVT > Result
Definition: BulkSimilarityCalculator.hpp:60
void setSimilarityFunction(SF &&func)
Definition: BulkSimilarityCalculator.hpp:146
std::shared_ptr< DT > DescriptorPointer
Definition: BulkSimilarityCalculator.hpp:57
const Descriptor & getDescriptor(std::size_t idx) const
Definition: BulkSimilarityCalculator.hpp:216
ConstResultIterator getResultsBegin() const
Definition: BulkSimilarityCalculator.hpp:271
std::size_t getNumDescriptors() const
Definition: BulkSimilarityCalculator.hpp:187
BulkSimilarityCalculator & operator=(BulkSimilarityCalculator &&calc)=default
BulkSimilarityCalculator(const BulkSimilarityCalculator &calc)=default
DescriptorIterator getDescriptorsEnd()
Definition: BulkSimilarityCalculator.hpp:167
void addDescriptor(const Descriptor &descr)
Definition: BulkSimilarityCalculator.hpp:193
BulkSimilarityCalculator(SF &&sim_func)
Definition: BulkSimilarityCalculator.hpp:81
ResultList::const_iterator ConstResultIterator
Definition: BulkSimilarityCalculator.hpp:73
ConstResultIterator end() const
Definition: BulkSimilarityCalculator.hpp:292
BulkSimilarityCalculator & operator=(const BulkSimilarityCalculator &calc)=default
void clear()
Definition: BulkSimilarityCalculator.hpp:152
boost::indirect_iterator< typename DescriptorList::iterator, DT > DescriptorIterator
Definition: BulkSimilarityCalculator.hpp:71
ConstResultIterator getResultsEnd() const
Definition: BulkSimilarityCalculator.hpp:278
SVT SimilarityValue
Definition: BulkSimilarityCalculator.hpp:58
void calculate(const Descriptor &descr, bool sort=false, bool sort_desc=true)
Definition: BulkSimilarityCalculator.hpp:246
BulkSimilarityCalculator(BulkSimilarityCalculator &&calc)=default
Functor class for calculating Tanimoto Similarities [CITB] of bitsets and vectors.
Definition: SimilarityFunctors.hpp:45
The namespace of the Chemical Data Processing Library.