Chemical Data Processing Library C++ API - Version 1.2.3
SimilarityFunctions.hpp
Go to the documentation of this file.
1 /*
2  * SimilarityFunctions.hpp
3  *
4  * This file is part of the Chemical Data Processing Toolkit
5  *
6  * Copyright (C) 2003 Thomas Seidel <thomas.seidel@univie.ac.at>
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this library; see the file COPYING. If not, write to
20  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  */
23 
29 #ifndef CDPL_DESCR_SIMILARITYFUNCTIONS_HPP
30 #define CDPL_DESCR_SIMILARITYFUNCTIONS_HPP
31 
32 #include <cstddef>
33 #include <cmath>
34 
35 #include "CDPL/Descr/APIPrefix.hpp"
36 #include "CDPL/Util/BitSet.hpp"
38 
39 
40 namespace CDPL
41 {
42 
43  namespace Descr
44  {
45 
66 
80  template <typename V>
81  inline double calcTanimotoSimilarity(const V& v1, const V& v2);
82 
103 
117  template <typename V>
118  inline double calcCosineSimilarity(const V& v1, const V& v2);
119 
141 
163 
184 
209  CDPL_DESCR_API double calcTverskySimilarity(const Util::BitSet& bs1, const Util::BitSet& bs2, double a, double b);
210 
230  CDPL_DESCR_API std::size_t calcHammingDistance(const Util::BitSet& bs1, const Util::BitSet& bs2);
231 
245  template <typename V>
246  inline double calcManhattanDistance(const V& v1, const V& v2);
247 
268 
282  template <typename V>
283  inline double calcEuclideanDistance(const V& v1, const V& v2);
284 
285  } // namespace Descr
286 } // namespace CDPL
287 
288 
289 // Implementation
290 
291 template <typename V>
292 inline double CDPL::Descr::calcTanimotoSimilarity(const V& v1, const V& v2)
293 {
294  double ep12 = innerProd(v1, v2);
295 
296  return (ep12 / (innerProd(v1, v1) + innerProd(v2, v2) - ep12));
297 }
298 
299 template <typename V>
300 inline double CDPL::Descr::calcCosineSimilarity(const V& v1, const V& v2)
301 {
302  return angleCos(v1, v2, double(norm2(v1) * norm2(v2)));
303 }
304 
305 template <typename V>
306 inline double CDPL::Descr::calcManhattanDistance(const V& v1, const V& v2)
307 {
308  return norm1(v1 - v2);
309 }
310 
311 template <typename V>
312 inline double CDPL::Descr::calcEuclideanDistance(const V& v1, const V& v2)
313 {
314  auto dv = v1 - v2;
315 
316  return std::sqrt(double(innerProd(dv, dv)));
317 }
318 
319 #endif // CDPL_DESCR_SIMILARITYFUNCTIONS_HPP
Definition of the type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_DESCR_API.
#define CDPL_DESCR_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of various vector expression types and operations.
constexpr unsigned int V
Specifies Vanadium.
Definition: AtomType.hpp:177
CDPL_DESCR_API double calcManhattanSimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Manhattan Similarity [GSIM] of the bitsets bs1 and bs2.
CDPL_DESCR_API double calcCosineSimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Cosine Similarity [WCOS] of the bitsets bs1 and bs2.
CDPL_DESCR_API double calcTverskySimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2, double a, double b)
Calculates the Tversky Similarity [GSIM] of the bitsets bs1 and bs2.
CDPL_DESCR_API double calcTanimotoSimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Tanimoto Similarity [CITB] of the bitsets bs1 and bs2.
CDPL_DESCR_API double calcDiceSimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Dice Similarity [GSIM] of the bitsets bs1 and bs2.
CDPL_DESCR_API double calcEuclideanSimilarity(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Euclidean Similarity [GSIM] of the bitsets bs1 and bs2.
CDPL_DESCR_API std::size_t calcHammingDistance(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Hamming Distance [WHAM, CITB] between the bitsets bs1 and bs2.
CDPL_DESCR_API double calcEuclideanDistance(const Util::BitSet &bs1, const Util::BitSet &bs2)
Calculates the Euclidean Distance [CITB] between the bitsets bs1 and bs2.
double calcManhattanDistance(const V &v1, const V &v2)
Calculates the Manhattan Distance [MADI] between the vectors v1 and v2.
Definition: SimilarityFunctions.hpp:306
VectorInnerProduct< E1, E2 >::ResultType innerProd(const VectorExpression< E1 > &e1, const VectorExpression< E2 > &e2)
Definition: VectorExpression.hpp:504
QuaternionNorm2< E >::ResultType norm2(const QuaternionExpression< E > &e)
Definition: QuaternionExpression.hpp:804
VectorAngleCosine< E1, E2, T >::ResultType angleCos(const VectorExpression< E1 > &e1, const VectorExpression< E2 > &e2, const T &sd, bool clamp=true)
Definition: VectorExpression.hpp:511
MatrixNorm1< E >::ResultType norm1(const MatrixExpression< E > &e)
Definition: MatrixExpression.hpp:903
boost::dynamic_bitset BitSet
A dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.