Skip to content

Commit

Permalink
Merge branch 'master' into feature/string-similarity/OptimalStringAli…
Browse files Browse the repository at this point in the history
…gnment
  • Loading branch information
Kalkwst committed Aug 21, 2024
2 parents 0477aec + 351b95b commit 466bbb8
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 0 deletions.
116 changes: 116 additions & 0 deletions Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using Algorithms.Strings.Similarity;
using NUnit.Framework;

namespace Algorithms.Tests.Strings.Similarity;

[TestFixture]
public class DamerauLevenshteinDistanceTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsZero()
{
var str1 = "test";
var str2 = "test";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString()
{
var str1 = "abc";
var str2 = "xyz";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string.");
}

[Test]
public void Calculate_OneEmptyString_ReturnsLengthOfOtherString()
{
var str1 = "test";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string.");
}

[Test]
public void Calculate_BothEmptyStrings_ReturnsZero()
{
var str1 = "";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_DifferentLengths_ReturnsCorrectValue()
{
var str1 = "short";
var str2 = "longer";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_SpecialCharacters_ReturnsCorrectValue()
{
var str1 = "hello!";
var str2 = "hello?";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_DifferentCases_ReturnsCorrectValue()
{
var str1 = "Hello";
var str2 = "hello";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonPrefixes_ReturnsCorrectValue()
{
var str1 = "prefix";
var str2 = "pre";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonSuffixes_ReturnsCorrectValue()
{
var str1 = "suffix";
var str2 = "fix";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_Transpositions_ReturnsCorrectValue()
{
var str1 = "abcd";
var str2 = "acbd";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_RepeatedCharacters_ReturnsCorrectValue()
{
var str1 = "aaa";
var str2 = "aaaaa";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_UnicodeCharacters_ReturnsCorrectValue()
{
var str1 = "こんにちは";
var str2 = "こんばんは";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance.");
}
}
104 changes: 104 additions & 0 deletions Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;

namespace Algorithms.Strings.Similarity;

public static class DamerauLevenshteinDistance
{
/// <summary>
/// Calculates the Damerau-Levenshtein distance between two strings.
/// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences.
/// It is calculated as the minimum number of operations needed to transform one sequence into the other.
/// The possible operations are insertion, deletion, substitution, and transposition.
/// </summary>
/// <param name="left">The first string.</param>
/// <param name="right">The second string.</param>
/// <returns>The Damerau-Levenshtein distance between the two strings.</returns>
public static int Calculate(string left, string right)
{
// Get the lengths of the input strings.
var leftSize = left.Length;
var rightSize = right.Length;

// Initialize a matrix of distances between the two strings.
var distances = InitializeDistanceArray(leftSize, rightSize);

// Iterate over each character in the left string.
for (var i = 1; i < leftSize + 1; i++)
{
// Iterate over each character in the right string.
for (var j = 1; j < rightSize + 1; j++)
{
// Calculate the cost of the current operation.
// If the characters at the current positions are the same, the cost is 0.
// Otherwise, the cost is 1.
var cost = left[i - 1] == right[j - 1] ? 0 : 1;

// Calculate the minimum distance by considering three possible operations:
// deletion, insertion, and substitution.
distances[i, j] = Math.Min(
Math.Min( // deletion
distances[i - 1, j] + 1, // delete the character from the left string
distances[i, j - 1] + 1), // insert the character into the right string
distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string

// If the current character in the left string is the same as the character
// two positions to the left in the right string and the current character
// in the right string is the same as the character one position to the right
// in the left string, then we can also consider a transposition operation.
if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1])
{
distances[i, j] = Math.Min(
distances[i, j], // current minimum distance
distances[i - 2, j - 2] + cost); // transpose the last two characters
}
}
}

// Return the distance between the two strings.
return distances[leftSize, rightSize];
}

/// <summary>
/// Initializes a matrix of distances between two string representations.
///
/// This method creates a matrix of distances where the dimensions are one larger
/// than the input strings. The first row of the matrix represents the distances
/// when the left string is empty, and the first column represents the distances
/// when the right string is empty. The values in the first row and first column
/// are the lengths of the corresponding strings.
///
/// The matrix is used by the Damerau-Levenshtein algorithm to calculate the
/// minimum number of single-character edits (insertions, deletions, or substitutions)
/// required to change one word into the other.
/// The matrix is initialized with dimensions one larger than the input strings.
/// The first row of the matrix represents the distances when the left string is empty.
/// The first column of the matrix represents the distances when the right string is empty.
/// The values in the first row and first column are the lengths of the corresponding strings.
/// Initializes a matrix of distances between two strings representations.
/// </summary>
/// <param name="leftSize">The size of the left string.</param>
/// <param name="rightSize">The size of the right string.</param>
/// <returns>A matrix of distances.</returns>
private static int[,] InitializeDistanceArray(int leftSize, int rightSize)
{
// Initialize a matrix of distances with dimensions one larger than the input strings.
var matrix = new int[leftSize + 1, rightSize + 1];

// Set the values in the first row to the lengths of the left string.
// This represents the distance when the left string is empty.
for (var i = 1; i < leftSize + 1; i++)
{
matrix[i, 0] = i;
}

// Set the values in the first column to the lengths of the right string.
// This represents the distance when the right string is empty.
for (var i = 1; i < rightSize + 1; i++)
{
matrix[0, i] = i;
}

// Return the initialized matrix of distances.
return matrix;
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ find more than one implementation for the same objective but using different alg
* [String](./Algorithms/Strings)
* [Similarity](./Algorithms/Strings/Similarity/)
* [Cosine Similarity](./Algorithms/Strings/Similarity/CosineSimilarity.cs)
* [Damerau-Levenshtein Distance](./Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs)
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)
Expand Down

0 comments on commit 466bbb8

Please sign in to comment.