Chemical Data Processing Library C++ API - Version 1.4.0
BulkSimilarityCalculator.hpp
Go to the documentation of this file.
1 /*
2  * BulkSimilarityCalculator.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
30 #define CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
31 
32 #include <vector>
33 #include <cstddef>
34 #include <functional>
35 #include <memory>
36 #include <utility>
37 #include <algorithm>
38 
39 #include <boost/iterator/indirect_iterator.hpp>
40 
42 #include "CDPL/Base/Exceptions.hpp"
43 
44 
45 namespace CDPL
46 {
47 
48  namespace Descr
49  {
50 
64  template <typename DT = Util::BitSet, typename SVT = double>
66  {
67 
68  public:
72  typedef DT Descriptor;
73 
77  typedef std::shared_ptr<DT> DescriptorPointer;
78 
82  typedef SVT SimilarityValue;
83 
87  typedef std::function<SVT(const DT&, const DT&)> SimilarityFunction;
88 
92  typedef std::pair<std::size_t, SVT> Result;
93 
97  typedef std::shared_ptr<BulkSimilarityCalculator> SharedPointer;
98 
99  private:
100  typedef std::vector<DescriptorPointer> DescriptorList;
101  typedef std::vector<Result> ResultList;
102 
103  public:
107  typedef boost::indirect_iterator<typename DescriptorList::const_iterator, DT>
109 
113  typedef boost::indirect_iterator<typename DescriptorList::iterator, DT>
115 
119  typedef typename ResultList::const_iterator ConstResultIterator;
120 
126 
132 
138 
144  template <typename SF>
145  BulkSimilarityCalculator(SF&& sim_func):
146  simFunc(std::forward<SF>(sim_func)) {}
147 
154 
161 
167 
173  template <typename SF>
174  void setSimilarityFunction(SF&& func);
175 
179  void clear();
180 
186 
192 
198 
204 
209  std::size_t getNumDescriptors() const;
210 
215  void addDescriptor(const Descriptor& descr);
216 
221  void addDescriptor(const DescriptorPointer& descr_ptr);
222 
228  bool containsDescriptor(const Descriptor& descr) const;
229 
236  const Descriptor& getDescriptor(std::size_t idx) const;
237 
243  void removeDescriptor(std::size_t idx);
244 
253 
260  void calculate(const Descriptor& descr, bool sort = false, bool sort_desc = true);
261 
267 
273 
278  ConstResultIterator begin() const;
279 
284  ConstResultIterator end() const;
285 
292  const Result& getResult(std::size_t idx) const;
293 
300  const SimilarityValue& getSimilarity(std::size_t idx) const;
301 
308  std::size_t getDescriptorIndex(std::size_t idx) const;
309 
310  private:
312  DescriptorList descriptors;
313  ResultList results;
314  };
315 
316  } // namespace Descr
317 } // namespace CDPL
318 
319 
320 // Implementation
321 
322 template <typename DT, typename SVT>
325 {
326  return simFunc;
327 }
328 
329 template <typename DT, typename SVT>
330 template <typename SF>
332 {
333  simFunc = std::forward<SF>(func);
334 }
335 
336 template <typename DT, typename SVT>
338 {
339  descriptors.clear();
340  results.clear();
341 }
342 
343 template <typename DT, typename SVT>
346 {
347  return descriptors.begin();
348 }
349 
350 template <typename DT, typename SVT>
353 {
354  return descriptors.end();
355 }
356 
357 template <typename DT, typename SVT>
360 {
361  return descriptors.begin();
362 }
363 
364 template <typename DT, typename SVT>
367 {
368  return descriptors.end();
369 }
370 
371 template <typename DT, typename SVT>
373 {
374  return descriptors.size();
375 }
376 
377 template <typename DT, typename SVT>
379 {
380  descriptors.emplace_back(DescriptorPointer(new Descriptor(descr)));
381 }
382 
383 template <typename DT, typename SVT>
385 {
386  descriptors.emplace_back(descr_ptr);
387 }
388 
389 template <typename DT, typename SVT>
391 {
392  for (auto& dp : descriptors)
393  if (*dp == descr)
394  return true;
395 
396  return false;
397 }
398 
399 template <typename DT, typename SVT>
402 {
403  if (idx >= descriptors.size())
404  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
405 
406  return *descriptors[idx];
407 }
408 
409 template <typename DT, typename SVT>
411 {
412  if (idx >= descriptors.size())
413  throw Base::IndexError("BulkSimilarityCalculator: descriptor index out of bounds");
414 
415  descriptors.erase(descriptors.begin() + idx);
416 }
417 
418 template <typename DT, typename SVT>
421 {
422  auto& b_it = it.base();
423 
424  if ((b_it < descriptors.begin()) || (b_it >= descriptors.end()))
425  throw Base::RangeError("BulkSimilarityCalculator: descriptor iterator out of valid range");
426 
427  return descriptors.erase(b_it);
428 }
429 
430 template <typename DT, typename SVT>
431 void CDPL::Descr::BulkSimilarityCalculator<DT, SVT>::calculate(const Descriptor& descr, bool sort, bool sort_desc)
432 {
433  results.clear();
434  results.reserve(descriptors.size());
435 
436  for (std::size_t i = 0, num_descrs = descriptors.size(); i < num_descrs; i++)
437  results.emplace_back(i, simFunc(descr, *descriptors[i]));
438 
439  if (!sort)
440  return;
441 
442  if (sort_desc)
443  std::sort(results.begin(), results.end(),
444  [](const Result& r1, const Result& r2) {
445  return (r1.second > r2.second);
446  });
447  else
448  std::sort(results.begin(), results.end(),
449  [](const Result& r1, const Result& r2) {
450  return (r1.second < r2.second);
451  });
452 }
453 
454 template <typename DT, typename SVT>
457 {
458  return results.begin();
459 }
460 
461 template <typename DT, typename SVT>
464 {
465  return results.end();
466 }
467 
468 template <typename DT, typename SVT>
471 {
472  return results.begin();
473 }
474 
475 template <typename DT, typename SVT>
478 {
479  return results.end();
480 }
481 
482 template <typename DT, typename SVT>
485 {
486  if (idx >= results.size())
487  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
488 
489  return results[idx];
490 }
491 
492 template <typename DT, typename SVT>
495 {
496  if (idx >= results.size())
497  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
498 
499  return results[idx].second;
500 }
501 
502 template <typename DT, typename SVT>
504 {
505  if (idx >= results.size())
506  throw Base::IndexError("BulkSimilarityCalculator: result index out of bounds");
507 
508  return results[idx].first;
509 }
510 
511 #endif // CDPL_DESCR_BULKSIMILARITYCALCULATOR_HPP
Definition of exception classes.
Definition of functor classes for the calculation of various similarity and distance measures.
Thrown to indicate that an index is out of range.
Definition: Base/Exceptions.hpp:152
Thrown to indicate that a value is out of range.
Definition: Base/Exceptions.hpp:114
Calculator that performs a bulk pairwise comparison of a single query descriptor against a stored set...
Definition: BulkSimilarityCalculator.hpp:66
bool containsDescriptor(const Descriptor &descr) const
Tells whether a descriptor that compares equal to descr is stored.
Definition: BulkSimilarityCalculator.hpp:390
std::shared_ptr< BulkSimilarityCalculator > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated BulkSimilarityCalculator instance...
Definition: BulkSimilarityCalculator.hpp:97
DescriptorIterator getDescriptorsBegin()
Returns a mutable iterator pointing to the first stored descriptor.
Definition: BulkSimilarityCalculator.hpp:345
ConstResultIterator begin() const
Returns a constant iterator pointing to the first result (range-based for support).
Definition: BulkSimilarityCalculator.hpp:470
const SimilarityValue & getSimilarity(std::size_t idx) const
Returns the similarity value of the result at the given index.
Definition: BulkSimilarityCalculator.hpp:494
const SimilarityFunction & getSimilarityFunction() const
Returns the currently configured similarity function.
Definition: BulkSimilarityCalculator.hpp:324
void removeDescriptor(std::size_t idx)
Removes the descriptor at index idx.
Definition: BulkSimilarityCalculator.hpp:410
DT Descriptor
The descriptor type.
Definition: BulkSimilarityCalculator.hpp:72
std::function< SVT(const DT &, const DT &)> SimilarityFunction
Type of the generic functor used to compute the similarity of two descriptors.
Definition: BulkSimilarityCalculator.hpp:87
boost::indirect_iterator< typename DescriptorList::const_iterator, DT > ConstDescriptorIterator
A constant iterator over the stored target descriptors.
Definition: BulkSimilarityCalculator.hpp:108
std::size_t getDescriptorIndex(std::size_t idx) const
Returns the index of the target descriptor referenced by the result at the given result index.
Definition: BulkSimilarityCalculator.hpp:503
const Result & getResult(std::size_t idx) const
Returns the result at the given index.
Definition: BulkSimilarityCalculator.hpp:484
std::pair< std::size_t, SVT > Result
A single calculation result: (target descriptor index, similarity value).
Definition: BulkSimilarityCalculator.hpp:92
void setSimilarityFunction(SF &&func)
Sets the similarity function.
Definition: BulkSimilarityCalculator.hpp:331
std::shared_ptr< DT > DescriptorPointer
A smart pointer to a stored descriptor.
Definition: BulkSimilarityCalculator.hpp:77
const Descriptor & getDescriptor(std::size_t idx) const
Returns the stored descriptor at index idx.
Definition: BulkSimilarityCalculator.hpp:401
ConstResultIterator getResultsBegin() const
Returns a constant iterator pointing to the first result of the last calculate() call.
Definition: BulkSimilarityCalculator.hpp:456
std::size_t getNumDescriptors() const
Returns the number of stored descriptors.
Definition: BulkSimilarityCalculator.hpp:372
BulkSimilarityCalculator & operator=(BulkSimilarityCalculator &&calc)=default
Move assignment operator.
BulkSimilarityCalculator(const BulkSimilarityCalculator &calc)=default
Copy constructor.
DescriptorIterator getDescriptorsEnd()
Returns a mutable iterator pointing one past the last stored descriptor.
Definition: BulkSimilarityCalculator.hpp:352
void addDescriptor(const Descriptor &descr)
Adds a copy of descr to the stored descriptor list.
Definition: BulkSimilarityCalculator.hpp:378
BulkSimilarityCalculator(SF &&sim_func)
Constructs the BulkSimilarityCalculator instance with the given similarity function.
Definition: BulkSimilarityCalculator.hpp:145
ResultList::const_iterator ConstResultIterator
A constant iterator over the calculation results.
Definition: BulkSimilarityCalculator.hpp:119
ConstResultIterator end() const
Returns a constant iterator pointing one past the last result (range-based for support).
Definition: BulkSimilarityCalculator.hpp:477
BulkSimilarityCalculator & operator=(const BulkSimilarityCalculator &calc)=default
Copy assignment operator.
void clear()
Removes all stored descriptors and calculation results.
Definition: BulkSimilarityCalculator.hpp:337
boost::indirect_iterator< typename DescriptorList::iterator, DT > DescriptorIterator
A mutable iterator over the stored target descriptors.
Definition: BulkSimilarityCalculator.hpp:114
ConstResultIterator getResultsEnd() const
Returns a constant iterator pointing one past the last result of the last calculate() call.
Definition: BulkSimilarityCalculator.hpp:463
SVT SimilarityValue
The similarity-value type.
Definition: BulkSimilarityCalculator.hpp:82
void calculate(const Descriptor &descr, bool sort=false, bool sort_desc=true)
Computes the similarity between the query descr and every stored descriptor.
Definition: BulkSimilarityCalculator.hpp:431
BulkSimilarityCalculator()=default
Constructs the BulkSimilarityCalculator instance with the default similarity function (TanimotoSimila...
BulkSimilarityCalculator(BulkSimilarityCalculator &&calc)=default
Move constructor.
Functor class for calculating Tanimoto Similarities [CITB] of bitsets and vectors.
Definition: SimilarityFunctors.hpp:46
The namespace of the Chemical Data Processing Library.