From d7b4841c42660c098ab860762fbce2e3706b44b0 Mon Sep 17 00:00:00 2001
From: NicklasXYZ <nsa200293@live.dk>
Date: Sun, 17 Mar 2024 00:03:50 +0100
Subject: [PATCH 1/4] Add set similarity measures

---
 src/gleam_community/maths/metrics.gleam       | 129 +++++++++++++++++-
 test/gleam_community/maths/metrics_test.gleam |  22 +++
 2 files changed, 147 insertions(+), 4 deletions(-)
diff --git a/src/gleam_community/maths/metrics.gleam b/src/gleam_community/maths/metrics.gleam
index e35e336..cd9bfa6 100644
--- a/src/gleam_community/maths/metrics.gleam
+++ b/src/gleam_community/maths/metrics.gleam
@@ -30,6 +30,9 @@
 ////   * [`manhatten_distance`](#float_manhatten_distance)
 ////   * [`minkowski_distance`](#minkowski_distance)
 ////   * [`euclidean_distance`](#euclidean_distance)
+////   * [`jaccard_index`](#jaccard_index)
+////   * [`sorensen_dice_coefficient`](#sorensen_dice_coefficient)
+////   * [`tversky_index`](#tversky_index)
 //// * **Basic statistical measures**
 ////   * [`mean`](#mean)
 ////   * [`median`](#median)
@@ -44,6 +47,7 @@ import gleam_community/maths/predicates
 import gleam_community/maths/conversion
 import gleam/list
 import gleam/pair
+import gleam/set
 import gleam/float
 
 /// <div style="text-align: right;">
@@ -292,7 +296,7 @@ pub fn euclidean_distance(
 }
 
 /// <div style="text-align: right;">
-///     <a href="https://github.com/nicklasxyz/gleam_stats/issues">
+///     <a href="https://github.com/gleam-community/maths/issues">
 ///         <small>Spot a typo? Open an issue!</small>
 ///     </a>
 /// </div>
@@ -347,7 +351,7 @@ pub fn mean(arr: List(Float)) -> Result(Float, String) {
 }
 
 /// <div style="text-align: right;">
-///     <a href="https://github.com/nicklasxyz/gleam_stats/issues">
+///     <a href="https://github.com/gleam-community/maths/issues">
 ///         <small>Spot a typo? Open an issue!</small>
 ///     </a>
 /// </div>
@@ -414,7 +418,7 @@ pub fn median(arr: List(Float)) -> Result(Float, String) {
 }
 
 /// <div style="text-align: right;">
-///     <a href="https://github.com/nicklasxyz/gleam_stats/issues">
+///     <a href="https://github.com/gleam-community/maths/issues">
 ///         <small>Spot a typo? Open an issue!</small>
 ///     </a>
 /// </div>
@@ -490,7 +494,7 @@ pub fn variance(arr: List(Float), ddof: Int) -> Result(Float, String) {
 }
 
 /// <div style="text-align: right;">
-///     <a href="https://github.com/nicklasxyz/gleam_stats/issues">
+///     <a href="https://github.com/gleam-community/maths/issues">
 ///         <small>Spot a typo? Open an issue!</small>
 ///     </a>
 /// </div>
@@ -555,3 +559,120 @@ pub fn standard_deviation(arr: List(Float), ddof: Int) -> Result(Float, String)
       }
   }
 }
+
+/// <div style="text-align: right;">
+///     <a href="https://github.com/gleam-community/maths/issues">
+///         <small>Spot a typo? Open an issue!</small>
+///     </a>
+/// </div>
+///
+/// <details>
+///     <summary>Example:</summary>
+///
+///     import gleeunit/should
+///     import gleam_community/maths/metrics
+///
+///     pub fn example () {
+///     }
+/// </details>
+///
+/// <div style="text-align: right;">
+///     <a href="#">
+///         <small>Back to top ↑</small>
+///     </a>
+/// </div>
+///
+pub fn jaccard_index(aset: set.Set(a), bset: set.Set(a)) -> Float {
+  let assert Ok(result) = tversky_index(aset, bset, 1.0, 1.0)
+  result
+}
+
+/// <div style="text-align: right;">
+///     <a href="https://github.com/gleam-community/maths/issues">
+///         <small>Spot a typo? Open an issue!</small>
+///     </a>
+/// </div>
+///
+/// <details>
+///     <summary>Example:</summary>
+///
+///     import gleeunit/should
+///     import gleam_community/maths/metrics
+///
+///     pub fn example () {
+///     }
+/// </details>
+///
+/// <div style="text-align: right;">
+///     <a href="#">
+///         <small>Back to top ↑</small>
+///     </a>
+/// </div>
+///
+pub fn sorensen_dice_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
+  let assert Ok(result) = tversky_index(aset, bset, 0.5, 0.5)
+  result
+}
+
+/// <div style="text-align: right;">
+///     <a href="https://github.com/gleam-community/maths/issues">
+///         <small>Spot a typo? Open an issue!</small>
+///     </a>
+/// </div>
+/// 
+/// The Tversky index is a generalization of the Sørensen–Dice coefficient and the Jaccard index. 
+/// 
+/// <details>
+///     <summary>Example:</summary>
+///
+///     import gleeunit/should
+///     import gleam_community/maths/metrics
+///
+///     pub fn example () {
+///     }
+/// </details>
+///
+/// <div style="text-align: right;">
+///     <a href="#">
+///         <small>Back to top ↑</small>
+///     </a>
+/// </div>
+///
+pub fn tversky_index(
+  aset: set.Set(a),
+  bset: set.Set(a),
+  alpha: Float,
+  beta: Float,
+) -> Result(Float, String) {
+  case alpha >=. 0.0, beta >=. 0.0 {
+    True, True -> {
+      let intersection: Float =
+        set.intersection(aset, bset)
+        |> set.size()
+        |> conversion.int_to_float()
+      let difference1: Float =
+        set.difference(aset, bset)
+        |> set.size()
+        |> conversion.int_to_float()
+      let difference2: Float =
+        set.difference(bset, aset)
+        |> set.size()
+        |> conversion.int_to_float()
+      intersection
+      /. { intersection +. alpha *. difference1 +. beta *. difference2 }
+      |> Ok
+    }
+    False, True -> {
+      "Invalid input argument: alpha < 0. Valid input is alpha >= 0."
+      |> Error
+    }
+    True, False -> {
+      "Invalid input argument: beta < 0. Valid input is beta >= 0."
+      |> Error
+    }
+    _, _ -> {
+      "Invalid input argument: alpha < 0 and beta < 0. Valid input is alpha >= 0 and beta >= 0."
+      |> Error
+    }
+  }
+}
diff --git a/test/gleam_community/maths/metrics_test.gleam b/test/gleam_community/maths/metrics_test.gleam
index 8e407e6..cbd8d5e 100644
--- a/test/gleam_community/maths/metrics_test.gleam
+++ b/test/gleam_community/maths/metrics_test.gleam
@@ -2,6 +2,7 @@ import gleam_community/maths/elementary
 import gleam_community/maths/metrics
 import gleam_community/maths/predicates
 import gleeunit/should
+import gleam/set
 
 pub fn float_list_norm_test() {
   let assert Ok(tol) = elementary.power(-10.0, -6.0)
@@ -212,3 +213,24 @@ pub fn example_standard_deviation_test() {
   |> metrics.standard_deviation(ddof)
   |> should.equal(Ok(1.0))
 }
+
+pub fn example_jaccard_index_test() {
+  metrics.jaccard_index(set.from_list([]), set.from_list([]))
+  |> should.equal(0.0)
+
+  let set_a: set.Set(Int) = set.from_list([0, 1, 2, 5, 6, 8, 9])
+  let set_b: set.Set(Int) = set.from_list([0, 2, 3, 4, 5, 7, 9])
+  metrics.jaccard_index(set_a, set_b)
+  |> should.equal(4.0 /. 10.0)
+
+  let set_c: set.Set(Int) = set.from_list([0, 1, 2, 3, 4, 5])
+  let set_d: set.Set(Int) = set.from_list([6, 7, 8, 9, 10])
+  metrics.jaccard_index(set_c, set_d)
+  |> should.equal(0.0 /. 11.0)
+
+  let set_e: set.Set(String) = set.from_list(["cat", "dog", "hippo", "monkey"])
+  let set_f: set.Set(String) =
+    set.from_list(["monkey", "rhino", "ostrich", "salmon"])
+  metrics.jaccard_index(set_e, set_f)
+  |> should.equal(1.0 /. 7.0)
+}

From c825bb522f6a3228107e254dd8149deb92c4d6f3 Mon Sep 17 00:00:00 2001
From: NicklasXYZ <nsa200293@live.dk>
Date: Sun, 17 Mar 2024 00:18:45 +0100
Subject: [PATCH 2/4] Add overlap coefficient

---
 src/gleam_community/maths/metrics.gleam       | 35 +++++++++++++++++++
 test/gleam_community/maths/metrics_test.gleam | 22 ++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/src/gleam_community/maths/metrics.gleam b/src/gleam_community/maths/metrics.gleam
index cd9bfa6..4c45285 100644
--- a/src/gleam_community/maths/metrics.gleam
+++ b/src/gleam_community/maths/metrics.gleam
@@ -49,6 +49,7 @@ import gleam/list
 import gleam/pair
 import gleam/set
 import gleam/float
+import gleam/int
 
 /// <div style="text-align: right;">
 ///     <a href="https://github.com/gleam-community/maths/issues">
@@ -676,3 +677,37 @@ pub fn tversky_index(
     }
   }
 }
+
+/// <div style="text-align: right;">
+///     <a href="https://github.com/gleam-community/maths/issues">
+///         <small>Spot a typo? Open an issue!</small>
+///     </a>
+/// </div>
+/// 
+/// 
+/// <details>
+///     <summary>Example:</summary>
+///
+///     import gleeunit/should
+///     import gleam_community/maths/metrics
+///
+///     pub fn example () {
+///     }
+/// </details>
+///
+/// <div style="text-align: right;">
+///     <a href="#">
+///         <small>Back to top ↑</small>
+///     </a>
+/// </div>
+///
+pub fn overlap_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
+  let intersection: Float =
+    set.intersection(aset, bset)
+    |> set.size()
+    |> conversion.int_to_float()
+  let minsize: Float =
+    piecewise.minimum(set.size(aset), set.size(bset), int.compare)
+    |> conversion.int_to_float()
+  intersection /. minsize
+}
diff --git a/test/gleam_community/maths/metrics_test.gleam b/test/gleam_community/maths/metrics_test.gleam
index cbd8d5e..2cb5ef1 100644
--- a/test/gleam_community/maths/metrics_test.gleam
+++ b/test/gleam_community/maths/metrics_test.gleam
@@ -234,3 +234,25 @@ pub fn example_jaccard_index_test() {
   metrics.jaccard_index(set_e, set_f)
   |> should.equal(1.0 /. 7.0)
 }
+
+pub fn example_overlap_coefficient_test() {
+  metrics.overlap_coefficient(set.from_list([]), set.from_list([]))
+  |> should.equal(0.0)
+
+  let set_a: set.Set(Int) = set.from_list([0, 1, 2, 5, 6, 8, 9])
+  let set_b: set.Set(Int) = set.from_list([0, 2, 3, 4, 5, 7, 9])
+  metrics.overlap_coefficient(set_a, set_b)
+  |> should.equal(4.0 /. 7.0)
+
+  let set_c: set.Set(Int) = set.from_list([0, 1, 2, 3, 4, 5])
+  let set_d: set.Set(Int) = set.from_list([6, 7, 8, 9, 10])
+  metrics.overlap_coefficient(set_c, set_d)
+  |> should.equal(0.0 /. 5.0)
+
+  let set_e: set.Set(String) =
+    set.from_list(["cat", "dog", "hippo", "monkey", "rhino"])
+  let set_f: set.Set(String) =
+    set.from_list(["monkey", "rhino", "ostrich", "salmon"])
+  metrics.overlap_coefficient(set_e, set_f)
+  |> should.equal(2.0 /. 4.0)
+}

From 24e496a4a844a3386b94f4a672340b44bdf1f617 Mon Sep 17 00:00:00 2001
From: NicklasXYZ <nsa200293@live.dk>
Date: Tue, 19 Mar 2024 15:04:44 +0100
Subject: [PATCH 3/4] Add new distance & similarity measures

---
 src/gleam_community/maths/metrics.gleam       | 204 ++++++++++++++++--
 test/gleam_community/maths/metrics_test.gleam |  55 ++++-
 2 files changed, 242 insertions(+), 17 deletions(-)

diff --git a/src/gleam_community/maths/metrics.gleam b/src/gleam_community/maths/metrics.gleam
index 4c45285..a4ee8ab 100644
--- a/src/gleam_community/maths/metrics.gleam
+++ b/src/gleam_community/maths/metrics.gleam
@@ -137,7 +137,7 @@ pub fn norm(arr: List(Float), p: Float) -> Float {
 ///       let assert Ok(tol) = elementary.power(-10.0, -6.0)
 ///     
 ///       // Empty lists returns 0.0
-///       metrics.float_manhatten_distance([], [])
+///       metrics.manhatten_distance([], [])
 ///       |> should.equal(Ok(0.0))
 ///     
 ///       // Differing lengths returns error
@@ -567,13 +567,36 @@ pub fn standard_deviation(arr: List(Float), ddof: Int) -> Result(Float, String)
 ///     </a>
 /// </div>
 ///
+/// The Jaccard index measures similarity between two sets of elements. Mathematically, the Jaccard index 
+/// is defined as:
+/// 
+/// \\[
+/// \text{JI}(X, Y) = \frac{|X \cap Y|}{|X \cup Y|} \in \left[0, 1\right]
+/// \\]
+/// 
+/// where:
+///
+/// - $$X$$ and $$Y$$ are two sets being compared,
+/// - $$|X \cap Y|$$ represents the size of the intersection of the two sets
+/// - $$|X \cup Y|$$ denotes the size of the union of the two sets
+/// 
+/// The value of the Jaccard index ranges from 0 to 1, where 0 indicates that the two sets share no elements
+/// and 1 indicates that the sets are identical. The Jaccard index is a special case of the 
+/// [Tversky index](#tversky_index) (with $$\alpha=\beta=1$$).
+/// 
 /// <details>
 ///     <summary>Example:</summary>
 ///
 ///     import gleeunit/should
 ///     import gleam_community/maths/metrics
+///     import gleam/set
 ///
 ///     pub fn example () {
+///       let xset: set.Set(String) = set.from_list(["cat", "dog", "hippo", "monkey"])
+///       let yset: set.Set(String) =
+///         set.from_list(["monkey", "rhino", "ostrich", "salmon"])
+///       metrics.jaccard_index(xset, yset)
+///       |> should.equal(1.0 /. 7.0)
 ///     }
 /// </details>
 ///
@@ -583,8 +606,8 @@ pub fn standard_deviation(arr: List(Float), ddof: Int) -> Result(Float, String)
 ///     </a>
 /// </div>
 ///
-pub fn jaccard_index(aset: set.Set(a), bset: set.Set(a)) -> Float {
-  let assert Ok(result) = tversky_index(aset, bset, 1.0, 1.0)
+pub fn jaccard_index(xset: set.Set(a), yset: set.Set(a)) -> Float {
+  let assert Ok(result) = tversky_index(xset, yset, 1.0, 1.0)
   result
 }
 
@@ -594,13 +617,36 @@ pub fn jaccard_index(aset: set.Set(a), bset: set.Set(a)) -> Float {
 ///     </a>
 /// </div>
 ///
+/// The Sørensen-Dice coefficient measures the similarity between two sets of elements. Mathematically, the 
+/// coefficient is defined as:
+/// 
+/// \\[
+/// \text{DSC}(X, Y) = \frac{2 \times |X \cap Y|}{|X| + |Y|} \in \left[0, 1\right]
+/// \\]
+/// 
+/// where:
+/// - $$X$$ and $$Y$$ are two sets being compared
+/// - $$|X \cap Y|$$ is the size of the intersection of the two sets (i.e., the number of elements common to both sets)
+/// - $$|X|$$ and $$|Y|$$ are the sizes of the sets $$X$$ and $$Y$$, respectively
+/// 
+/// The coefficient ranges from 0 to 1, where 0 indicates no similarity (the sets share no elements) and 1 
+/// indicates perfect similarity (the sets are identical). The higher the coefficient, the greater the similarity 
+/// between the two sets. The Sørensen-Dice coefficient is a special case of the 
+/// [Tversky index](#tversky_index) (with $$\alpha=\beta=0.5$$).
+/// 
 /// <details>
 ///     <summary>Example:</summary>
 ///
 ///     import gleeunit/should
 ///     import gleam_community/maths/metrics
+///     import gleam/set
 ///
 ///     pub fn example () {
+///       let xset: set.Set(String) = set.from_list(["cat", "dog", "hippo", "monkey"])
+///       let yset: set.Set(String) =
+///         set.from_list(["monkey", "rhino", "ostrich", "salmon", "spider"])
+///       metrics.sorensen_dice_coefficient(xset, yset)
+///       |> should.equal(2.0 *. 1.0 /. { 4.0 +. 5.0 })
 ///     }
 /// </details>
 ///
@@ -610,8 +656,8 @@ pub fn jaccard_index(aset: set.Set(a), bset: set.Set(a)) -> Float {
 ///     </a>
 /// </div>
 ///
-pub fn sorensen_dice_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
-  let assert Ok(result) = tversky_index(aset, bset, 0.5, 0.5)
+pub fn sorensen_dice_coefficient(xset: set.Set(a), yset: set.Set(a)) -> Float {
+  let assert Ok(result) = tversky_index(xset, yset, 0.5, 0.5)
   result
 }
 
@@ -621,15 +667,39 @@ pub fn sorensen_dice_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
 ///     </a>
 /// </div>
 /// 
-/// The Tversky index is a generalization of the Sørensen–Dice coefficient and the Jaccard index. 
+/// The Tversky index is a generalization of the Jaccard index and Sørensen-Dice coefficient, which adds 
+/// flexibility through two parameters, $$\alpha$$ and $$\beta$$, allowing for asymmetric similarity 
+/// measures between sets. The Tversky index is defined as:
 /// 
+/// \\[
+/// \text{TI}(X, Y) = \frac{|X \cap Y|}{|X \cap Y| + \alpha|X - Y| + \beta|Y - X|}
+/// \\]
+/// 
+/// where:
+/// 
+/// - $$X$$ and $$Y$$ are the sets being compared
+/// - $$|X - Y|$$ and $$|Y - X|$$ are the sizes of the relative complements of $$Y$$ in $$X$$ and $$X$$ in $$Y$$, respectively,
+/// - $$\alpha$$ and $$\beta$$ are parameters that weigh the relative importance of the elements unique to $$X$$ and $$Y$$
+/// 
+/// The Tversky index reduces to the Jaccard index when \(\alpha = \beta = 1\) and to the Sorensen-Dice 
+/// coefficient when \(\alpha = \beta = 0.5\). In general, the Tversky index can take on any non-negative value, including 0.
+/// The index equals 0 when there is no intersection between the two sets, indicating no similarity. However, unlike similarity
+/// measures bounded strictly between 0 and 1, the Tversky index does not have a strict upper limit of 1 when $$\alpha \neq \beta$$.
+///  
 /// <details>
 ///     <summary>Example:</summary>
 ///
 ///     import gleeunit/should
 ///     import gleam_community/maths/metrics
+///     import gleam/set
 ///
 ///     pub fn example () {
+///       let yset: set.Set(String) = set.from_list(["cat", "dog", "hippo", "monkey"])
+///       let xset: set.Set(String) =
+///         set.from_list(["monkey", "rhino", "ostrich", "salmon"])
+///       // Test Jaccard index (alpha = beta = 1)
+///       metrics.tversky_index(xset, yset, 1.0, 1.0)
+///       |> should.equal(1.0 /. 7.0)
 ///     }
 /// </details>
 ///
@@ -640,23 +710,23 @@ pub fn sorensen_dice_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
 /// </div>
 ///
 pub fn tversky_index(
-  aset: set.Set(a),
-  bset: set.Set(a),
+  xset: set.Set(a),
+  yset: set.Set(a),
   alpha: Float,
   beta: Float,
 ) -> Result(Float, String) {
   case alpha >=. 0.0, beta >=. 0.0 {
     True, True -> {
       let intersection: Float =
-        set.intersection(aset, bset)
+        set.intersection(xset, yset)
         |> set.size()
         |> conversion.int_to_float()
       let difference1: Float =
-        set.difference(aset, bset)
+        set.difference(xset, yset)
         |> set.size()
         |> conversion.int_to_float()
       let difference2: Float =
-        set.difference(bset, aset)
+        set.difference(yset, xset)
         |> set.size()
         |> conversion.int_to_float()
       intersection
@@ -684,14 +754,39 @@ pub fn tversky_index(
 ///     </a>
 /// </div>
 /// 
-/// 
+/// The Overlap coefficient, also known as the Szymkiewicz–Simpson coefficient, is a measure of 
+/// similarity between two sets that focuses on the size of the intersection relative to the 
+/// smaller of the two sets. It is defined mathematically as:
+///
+/// \\[
+/// \text{OC}(X, Y) = \frac{|X \cap Y|}{\min(|X|, |Y|)} \in \left[0, 1\right]
+/// \\]
+///
+/// where:
+///
+/// - $$X$$ and $$Y$$ are the sets being compared
+/// - $$|X \cap Y|$$ is the size of the intersection of the sets
+/// - $$\min(|X|, |Y|)$$ is the size of the smaller set among $$X$$ and $$Y$$
+///
+/// The coefficient ranges from 0 to 1, where 0 indicates no overlap and 1 indicates that the 
+/// smaller set is a suyset of the larger set. This measure is especially useful in situations
+/// where the similarity in terms of the proportion of overlap is more relevant than the 
+/// difference in sizes between the two sets.
+///
 /// <details>
 ///     <summary>Example:</summary>
 ///
 ///     import gleeunit/should
 ///     import gleam_community/maths/metrics
+///     import gleam/set
 ///
 ///     pub fn example () {
+///       let set_a: set.Set(String) =
+///         set.from_list(["horse", "dog", "hippo", "monkey", "bird"])
+///       let set_b: set.Set(String) =
+///         set.from_list(["monkey", "bird", "ostrich", "salmon"])
+///       metrics.overlap_coefficient(set_a, set_b)
+///       |> should.equal(2.0 /. 4.0)
 ///     }
 /// </details>
 ///
@@ -701,13 +796,92 @@ pub fn tversky_index(
 ///     </a>
 /// </div>
 ///
-pub fn overlap_coefficient(aset: set.Set(a), bset: set.Set(a)) -> Float {
+pub fn overlap_coefficient(xset: set.Set(a), yset: set.Set(a)) -> Float {
   let intersection: Float =
-    set.intersection(aset, bset)
+    set.intersection(xset, yset)
     |> set.size()
     |> conversion.int_to_float()
   let minsize: Float =
-    piecewise.minimum(set.size(aset), set.size(bset), int.compare)
+    piecewise.minimum(set.size(xset), set.size(yset), int.compare)
     |> conversion.int_to_float()
   intersection /. minsize
 }
+
+/// <div style="text-align: right;">
+///     <a href="https://github.com/gleam-community/maths/issues">
+///         <small>Spot a typo? Open an issue!</small>
+///     </a>
+/// </div>
+/// 
+/// Calculate the cosine similarity between two lists (representing vectors):
+///
+/// \\[
+/// \frac{\sum_{i=1}^n x_i \cdot y_i}{\left(\sum_{i=1}^n x_i^2\right)^{\frac{1}{2}} \cdot \left(\sum_{i=1}^n y_i^2\right)^{\frac{1}{2}}}
+/// \\]
+///
+/// In the formula, $n$ is the length of the two lists and $x_i, y_i$ are the values in the respective input lists indexed by $i$. The numerator
+/// represents the dot product of the two vectors, while the denominator is the product of the magnitudes (Euclidean norms) of the two vectors. 
+/// The cosine similarity provides a value between -1 and 1, where 1 means the vectors are in the same direction, -1 means they are in exactly 
+/// opposite directions, and 0 indicates orthogonality. 
+/// 
+/// <details>
+///     <summary>Example:</summary>
+///
+///     import gleeunit/should
+///     import gleam_community/maths/metrics
+///
+///     pub fn example () {
+///       // Two orthogonal vectors
+///       metrics.cosine_similarity([-1.0, 1.0, 0.0], [1.0, 1.0, -1.0])
+///       |> should.equal(Ok(0.0))
+///     
+///       // Two identical (parallel) vectors
+///       metrics.cosine_similarity([1.0, 2.0, 3.0], [1.0, 2.0, 3.0])
+///       |> should.equal(Ok(1.0))
+///     
+///       // Two parallel, but oppositely oriented vectors
+///       metrics.cosine_similarity([-1.0, -2.0, -3.0], [1.0, 2.0, 3.0])
+///       |> should.equal(Ok(-1.0))
+///     }
+/// </details>
+///
+/// <div style="text-align: right;">
+///     <a href="#">
+///         <small>Back to top ↑</small>
+///     </a>
+/// </div>
+///
+pub fn cosine_similarity(
+  xarr: List(Float),
+  yarr: List(Float),
+) -> Result(Float, String) {
+  let xlen: Int = list.length(xarr)
+  let ylen: Int = list.length(yarr)
+  case xarr, yarr {
+    [], _ ->
+      "Invalid input argument: The list xarr is empty."
+      |> Error
+    _, [] ->
+      "Invalid input argument: The list yarr is empty."
+      |> Error
+    _, _ -> {
+      case xlen == ylen {
+        False ->
+          "Invalid input argument: length(xarr) != length(yarr). Valid input is when length(xarr) == length(yarr)."
+          |> Error
+        True -> {
+          list.fold(
+            list.zip(xarr, yarr),
+            0.0,
+            fn(acc: Float, a: #(Float, Float)) -> Float {
+              let result: Float = pair.first(a) *. pair.second(a)
+              result +. acc
+            },
+          )
+          /. { norm(xarr, 2.0) *. norm(yarr, 2.0) }
+          |> Ok
+        }
+      }
+    }
+  }
+}
diff --git a/test/gleam_community/maths/metrics_test.gleam b/test/gleam_community/maths/metrics_test.gleam
index 2cb5ef1..e2f7307 100644
--- a/test/gleam_community/maths/metrics_test.gleam
+++ b/test/gleam_community/maths/metrics_test.gleam
@@ -235,6 +235,27 @@ pub fn example_jaccard_index_test() {
   |> should.equal(1.0 /. 7.0)
 }
 
+pub fn example_sorensen_dice_coefficient_test() {
+  metrics.sorensen_dice_coefficient(set.from_list([]), set.from_list([]))
+  |> should.equal(0.0)
+
+  let set_a: set.Set(Int) = set.from_list([0, 1, 2, 5, 6, 8, 9])
+  let set_b: set.Set(Int) = set.from_list([0, 2, 3, 4, 5, 7, 9])
+  metrics.sorensen_dice_coefficient(set_a, set_b)
+  |> should.equal(2.0 *. 4.0 /. { 7.0 +. 7.0 })
+
+  let set_c: set.Set(Int) = set.from_list([0, 1, 2, 3, 4, 5])
+  let set_d: set.Set(Int) = set.from_list([6, 7, 8, 9, 10])
+  metrics.sorensen_dice_coefficient(set_c, set_d)
+  |> should.equal(2.0 *. 0.0 /. { 6.0 +. 5.0 })
+
+  let set_e: set.Set(String) = set.from_list(["cat", "dog", "hippo", "monkey"])
+  let set_f: set.Set(String) =
+    set.from_list(["monkey", "rhino", "ostrich", "salmon", "spider"])
+  metrics.sorensen_dice_coefficient(set_e, set_f)
+  |> should.equal(2.0 *. 1.0 /. { 4.0 +. 5.0 })
+}
+
 pub fn example_overlap_coefficient_test() {
   metrics.overlap_coefficient(set.from_list([]), set.from_list([]))
   |> should.equal(0.0)
@@ -250,9 +271,39 @@ pub fn example_overlap_coefficient_test() {
   |> should.equal(0.0 /. 5.0)
 
   let set_e: set.Set(String) =
-    set.from_list(["cat", "dog", "hippo", "monkey", "rhino"])
+    set.from_list(["horse", "dog", "hippo", "monkey", "bird"])
   let set_f: set.Set(String) =
-    set.from_list(["monkey", "rhino", "ostrich", "salmon"])
+    set.from_list(["monkey", "bird", "ostrich", "salmon"])
   metrics.overlap_coefficient(set_e, set_f)
   |> should.equal(2.0 /. 4.0)
 }
+
+pub fn example_cosine_similarity_test() {
+  // Empty lists returns an error
+  metrics.cosine_similarity([], [])
+  |> should.be_error()
+
+  // One empty list returns an error
+  metrics.cosine_similarity([1.0, 2.0, 3.0], [])
+  |> should.be_error()
+
+  // One empty list returns an error
+  metrics.cosine_similarity([], [1.0, 2.0, 3.0])
+  |> should.be_error()
+
+  // Differen sized lists returns an error
+  metrics.cosine_similarity([1.0, 2.0], [1.0, 2.0, 3.0, 4.0])
+  |> should.be_error()
+
+  // Two orthogonal vectors (represented by lists)
+  metrics.cosine_similarity([-1.0, 1.0, 0.0], [1.0, 1.0, -1.0])
+  |> should.equal(Ok(0.0))
+
+  // Two identical (parallel) vectors (represented by lists)
+  metrics.cosine_similarity([1.0, 2.0, 3.0], [1.0, 2.0, 3.0])
+  |> should.equal(Ok(1.0))
+
+  // Two parallel, but oppositely oriented vectors (represented by lists)
+  metrics.cosine_similarity([-1.0, -2.0, -3.0], [1.0, 2.0, 3.0])
+  |> should.equal(Ok(-1.0))
+}

From fc4a4a8b0994534b0533c162efc0028a6f63da10 Mon Sep 17 00:00:00 2001
From: NicklasXYZ <nsa200293@live.dk>
Date: Tue, 19 Mar 2024 15:18:07 +0100
Subject: [PATCH 4/4] Fix typos

---
 src/gleam_community/maths/metrics.gleam | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/gleam_community/maths/metrics.gleam b/src/gleam_community/maths/metrics.gleam
index a4ee8ab..772b80d 100644
--- a/src/gleam_community/maths/metrics.gleam
+++ b/src/gleam_community/maths/metrics.gleam
@@ -25,14 +25,17 @@
 //// 
 //// Metrics: A module offering functions for calculating distances and other types of metrics.
 //// 
-//// * **Distances**
+//// * **Distance measures**
 ////   * [`norm`](#norm)
-////   * [`manhatten_distance`](#float_manhatten_distance)
+////   * [`manhatten_distance`](#manhatten_distance)
 ////   * [`minkowski_distance`](#minkowski_distance)
 ////   * [`euclidean_distance`](#euclidean_distance)
+////   * [`cosine_similarity`](#cosine_similarity)
+//// * **Set & string similarity measures**
 ////   * [`jaccard_index`](#jaccard_index)
 ////   * [`sorensen_dice_coefficient`](#sorensen_dice_coefficient)
 ////   * [`tversky_index`](#tversky_index)
+////   * [`overlap_coefficient`](#overlap_coefficient)
 //// * **Basic statistical measures**
 ////   * [`mean`](#mean)
 ////   * [`median`](#median)
@@ -571,7 +574,7 @@ pub fn standard_deviation(arr: List(Float), ddof: Int) -> Result(Float, String)
 /// is defined as:
 /// 
 /// \\[
-/// \text{JI}(X, Y) = \frac{|X \cap Y|}{|X \cup Y|} \in \left[0, 1\right]
+/// \frac{|X \cap Y|}{|X \cup Y|} \\; \in \\; \left[0, 1\right]
 /// \\]
 /// 
 /// where:
@@ -621,7 +624,7 @@ pub fn jaccard_index(xset: set.Set(a), yset: set.Set(a)) -> Float {
 /// coefficient is defined as:
 /// 
 /// \\[
-/// \text{DSC}(X, Y) = \frac{2 \times |X \cap Y|}{|X| + |Y|} \in \left[0, 1\right]
+/// \frac{2 |X \cap Y|}{|X| + |Y|} \\; \in \\; \left[0, 1\right]
 /// \\]
 /// 
 /// where:
@@ -672,7 +675,7 @@ pub fn sorensen_dice_coefficient(xset: set.Set(a), yset: set.Set(a)) -> Float {
 /// measures between sets. The Tversky index is defined as:
 /// 
 /// \\[
-/// \text{TI}(X, Y) = \frac{|X \cap Y|}{|X \cap Y| + \alpha|X - Y| + \beta|Y - X|}
+/// \frac{|X \cap Y|}{|X \cap Y| + \alpha|X - Y| + \beta|Y - X|}
 /// \\]
 /// 
 /// where:
@@ -759,7 +762,7 @@ pub fn tversky_index(
 /// smaller of the two sets. It is defined mathematically as:
 ///
 /// \\[
-/// \text{OC}(X, Y) = \frac{|X \cap Y|}{\min(|X|, |Y|)} \in \left[0, 1\right]
+/// \frac{|X \cap Y|}{\min(|X|, |Y|)} \\; \in \\; \left[0, 1\right]
 /// \\]
 ///
 /// where:
@@ -816,13 +819,13 @@ pub fn overlap_coefficient(xset: set.Set(a), yset: set.Set(a)) -> Float {
 /// Calculate the cosine similarity between two lists (representing vectors):
 ///
 /// \\[
-/// \frac{\sum_{i=1}^n x_i \cdot y_i}{\left(\sum_{i=1}^n x_i^2\right)^{\frac{1}{2}} \cdot \left(\sum_{i=1}^n y_i^2\right)^{\frac{1}{2}}}
+/// \frac{\sum_{i=1}^n x_i \cdot y_i}{\left(\sum_{i=1}^n x_i^2\right)^{\frac{1}{2}} \cdot \left(\sum_{i=1}^n y_i^2\right)^{\frac{1}{2}}} \\; \in \\; \left[-1, 1\right]
 /// \\]
 ///
-/// In the formula, $n$ is the length of the two lists and $x_i, y_i$ are the values in the respective input lists indexed by $i$. The numerator
-/// represents the dot product of the two vectors, while the denominator is the product of the magnitudes (Euclidean norms) of the two vectors. 
-/// The cosine similarity provides a value between -1 and 1, where 1 means the vectors are in the same direction, -1 means they are in exactly 
-/// opposite directions, and 0 indicates orthogonality. 
+/// In the formula, $$n$$ is the length of the two lists and $$x_i$$, $$y_i$$ are the values in the respective input lists indexed by $$i$$. 
+/// The numerator represents the dot product of the two vectors, while the denominator is the product of the magnitudes (Euclidean norms) of 
+/// the two vectors. The cosine similarity provides a value between -1 and 1, where 1 means the vectors are in the same direction, -1 means 
+/// they are in exactly opposite directions, and 0 indicates orthogonality. 
 /// 
 /// <details>
 ///     <summary>Example:</summary>