-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into feature/string-similarity/OptimalStringAli…
…gnment
- Loading branch information
Showing
3 changed files
with
221 additions
and
0 deletions.
There are no files selected for viewing
116 changes: 116 additions & 0 deletions
116
Algorithms.Tests/Strings/Similarity/DamerauLevenshteinDistanceTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
using Algorithms.Strings.Similarity; | ||
using NUnit.Framework; | ||
|
||
namespace Algorithms.Tests.Strings.Similarity; | ||
|
||
[TestFixture] | ||
public class DamerauLevenshteinDistanceTests | ||
{ | ||
[Test] | ||
public void Calculate_IdenticalStrings_ReturnsZero() | ||
{ | ||
var str1 = "test"; | ||
var str2 = "test"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString() | ||
{ | ||
var str1 = "abc"; | ||
var str2 = "xyz"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_OneEmptyString_ReturnsLengthOfOtherString() | ||
{ | ||
var str1 = "test"; | ||
var str2 = ""; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_BothEmptyStrings_ReturnsZero() | ||
{ | ||
var str1 = ""; | ||
var str2 = ""; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_DifferentLengths_ReturnsCorrectValue() | ||
{ | ||
var str1 = "short"; | ||
var str2 = "longer"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_SpecialCharacters_ReturnsCorrectValue() | ||
{ | ||
var str1 = "hello!"; | ||
var str2 = "hello?"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_DifferentCases_ReturnsCorrectValue() | ||
{ | ||
var str1 = "Hello"; | ||
var str2 = "hello"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_CommonPrefixes_ReturnsCorrectValue() | ||
{ | ||
var str1 = "prefix"; | ||
var str2 = "pre"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_CommonSuffixes_ReturnsCorrectValue() | ||
{ | ||
var str1 = "suffix"; | ||
var str2 = "fix"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_Transpositions_ReturnsCorrectValue() | ||
{ | ||
var str1 = "abcd"; | ||
var str2 = "acbd"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_RepeatedCharacters_ReturnsCorrectValue() | ||
{ | ||
var str1 = "aaa"; | ||
var str2 = "aaaaa"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance."); | ||
} | ||
|
||
[Test] | ||
public void Calculate_UnicodeCharacters_ReturnsCorrectValue() | ||
{ | ||
var str1 = "こんにちは"; | ||
var str2 = "こんばんは"; | ||
var result = DamerauLevenshteinDistance.Calculate(str1, str2); | ||
Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance."); | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
using System; | ||
|
||
namespace Algorithms.Strings.Similarity; | ||
|
||
public static class DamerauLevenshteinDistance | ||
{ | ||
/// <summary> | ||
/// Calculates the Damerau-Levenshtein distance between two strings. | ||
/// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences. | ||
/// It is calculated as the minimum number of operations needed to transform one sequence into the other. | ||
/// The possible operations are insertion, deletion, substitution, and transposition. | ||
/// </summary> | ||
/// <param name="left">The first string.</param> | ||
/// <param name="right">The second string.</param> | ||
/// <returns>The Damerau-Levenshtein distance between the two strings.</returns> | ||
public static int Calculate(string left, string right) | ||
{ | ||
// Get the lengths of the input strings. | ||
var leftSize = left.Length; | ||
var rightSize = right.Length; | ||
|
||
// Initialize a matrix of distances between the two strings. | ||
var distances = InitializeDistanceArray(leftSize, rightSize); | ||
|
||
// Iterate over each character in the left string. | ||
for (var i = 1; i < leftSize + 1; i++) | ||
{ | ||
// Iterate over each character in the right string. | ||
for (var j = 1; j < rightSize + 1; j++) | ||
{ | ||
// Calculate the cost of the current operation. | ||
// If the characters at the current positions are the same, the cost is 0. | ||
// Otherwise, the cost is 1. | ||
var cost = left[i - 1] == right[j - 1] ? 0 : 1; | ||
|
||
// Calculate the minimum distance by considering three possible operations: | ||
// deletion, insertion, and substitution. | ||
distances[i, j] = Math.Min( | ||
Math.Min( // deletion | ||
distances[i - 1, j] + 1, // delete the character from the left string | ||
distances[i, j - 1] + 1), // insert the character into the right string | ||
distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string | ||
|
||
// If the current character in the left string is the same as the character | ||
// two positions to the left in the right string and the current character | ||
// in the right string is the same as the character one position to the right | ||
// in the left string, then we can also consider a transposition operation. | ||
if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1]) | ||
{ | ||
distances[i, j] = Math.Min( | ||
distances[i, j], // current minimum distance | ||
distances[i - 2, j - 2] + cost); // transpose the last two characters | ||
} | ||
} | ||
} | ||
|
||
// Return the distance between the two strings. | ||
return distances[leftSize, rightSize]; | ||
} | ||
|
||
/// <summary> | ||
/// Initializes a matrix of distances between two string representations. | ||
/// | ||
/// This method creates a matrix of distances where the dimensions are one larger | ||
/// than the input strings. The first row of the matrix represents the distances | ||
/// when the left string is empty, and the first column represents the distances | ||
/// when the right string is empty. The values in the first row and first column | ||
/// are the lengths of the corresponding strings. | ||
/// | ||
/// The matrix is used by the Damerau-Levenshtein algorithm to calculate the | ||
/// minimum number of single-character edits (insertions, deletions, or substitutions) | ||
/// required to change one word into the other. | ||
/// The matrix is initialized with dimensions one larger than the input strings. | ||
/// The first row of the matrix represents the distances when the left string is empty. | ||
/// The first column of the matrix represents the distances when the right string is empty. | ||
/// The values in the first row and first column are the lengths of the corresponding strings. | ||
/// Initializes a matrix of distances between two strings representations. | ||
/// </summary> | ||
/// <param name="leftSize">The size of the left string.</param> | ||
/// <param name="rightSize">The size of the right string.</param> | ||
/// <returns>A matrix of distances.</returns> | ||
private static int[,] InitializeDistanceArray(int leftSize, int rightSize) | ||
{ | ||
// Initialize a matrix of distances with dimensions one larger than the input strings. | ||
var matrix = new int[leftSize + 1, rightSize + 1]; | ||
|
||
// Set the values in the first row to the lengths of the left string. | ||
// This represents the distance when the left string is empty. | ||
for (var i = 1; i < leftSize + 1; i++) | ||
{ | ||
matrix[i, 0] = i; | ||
} | ||
|
||
// Set the values in the first column to the lengths of the right string. | ||
// This represents the distance when the right string is empty. | ||
for (var i = 1; i < rightSize + 1; i++) | ||
{ | ||
matrix[0, i] = i; | ||
} | ||
|
||
// Return the initialized matrix of distances. | ||
return matrix; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters