Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Optimal String Alignment (OSA) Distance Algorithm #464

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using Algorithms.Strings.Similarity;
using FluentAssertions;
using NUnit.Framework;
using System;

namespace Algorithms.Tests.Strings.Similarity
{
[TestFixture]
public class OptimalStringAlignmentTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsZero()
{
var result = OptimalStringAlignment.Calculate("example", "example");
result.Should().Be(0.0);
}

[Test]
public void Calculate_FirstStringEmpty_ReturnsLengthOfSecondString()
{
var result = OptimalStringAlignment.Calculate("", "example");
result.Should().Be("example".Length);
}

[Test]
public void Calculate_SecondStringEmpty_ReturnsLengthOfFirstString()
{
var result = OptimalStringAlignment.Calculate("example", "");
result.Should().Be("example".Length);
}

[Test]
public void Calculate_BothStringsEmpty_ReturnsZero()
{
var result = OptimalStringAlignment.Calculate("", "");
result.Should().Be(0.0);
}

[Test]
public void Calculate_OneInsertion_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "examples");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneDeletion_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("examples", "example");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneSubstitution_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "exbmple");
result.Should().Be(1.0);
}

[Test]
public void Calculate_OneTransposition_ReturnsOne()
{
var result = OptimalStringAlignment.Calculate("example", "exmaple");
result.Should().Be(1.0);
}

[Test]
public void Calculate_MultipleOperations_ReturnsCorrectDistance()
{
var result = OptimalStringAlignment.Calculate("kitten", "sitting");
result.Should().Be(3.0);
}
}
}
157 changes: 157 additions & 0 deletions Algorithms/Strings/Similarity/OptimalStringAlignment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
using System;

namespace Algorithms.Strings.Similarity
{
/// <summary>
/// Provides methods to calculate the Optimal String Alignment distance between two strings.
///
/// The Optimal String Alignment distance, also known as the restricted Damerau-Levenshtein distance,
/// is a string metric used to measure the difference between two sequences. It is similar to the
/// Levenshtein distance, but it also considers transpositions (swapping of two adjacent characters)
/// as a single operation. This metric is particularly useful when adjacent characters are commonly
/// transposed, such as in typographical errors.
///
/// The OSA distance between two strings is defined as the minimum number of operations required to
/// transform one string into the other, where the operations include:
///
/// 1. Insertion: Adding a single character.
/// 2. Deletion: Removing a single character.
/// 3. Substitution: Replacing one character with another.
/// 4. Transposition: Swapping two adjacent characters (this is what distinguishes OSA from the
/// traditional Levenshtein distance).
///
/// The OSA distance algorithm ensures that no operation is applied more than once to the same
/// character in the same position. This is the main difference between the OSA and the more general
/// Damerau-Levenshtein distance, which does not have this restriction.
///
/// <example>
/// Example Usage:
/// <code>
/// int distance = OptimalStringAlignmentDistance("example", "exmaple");
/// Console.WriteLine(distance); // Output: 1
/// </code>
/// In this example, the strings "example" and "exmaple" differ by one transposition of adjacent characters ('a' and 'm'),
/// so the OSA distance is 1.
///
/// <code>
/// int distance = OptimalStringAlignmentDistance("kitten", "sitting");
/// Console.WriteLine(distance); // Output: 3
/// </code>
/// Here, the strings "kitten" and "sitting" have three differences (substitutions 'k' to 's', 'e' to 'i', and insertion of 'g'),
/// resulting in an OSA distance of 3.
/// </example>
/// </summary>
/// <remarks>
/// This algorithm has a time complexity of O(n * m), where n and m are the lengths of the two input strings.
/// It is efficient for moderate-sized strings but may become computationally expensive for very long strings.
/// </remarks>
public static class OptimalStringAlignment
{
/// <summary>
/// Calculates the Optimal String Alignment distance between two strings.
/// </summary>
/// <param name="firstString">The first string.</param>
/// <param name="secondString">The second string.</param>
/// <returns>The Optimal String Alignment distance between the two strings.</returns>
/// <exception cref="ArgumentNullException">Thrown when either of the input strings is null.</exception>
public static double Calculate(string firstString, string secondString)
{
ArgumentNullException.ThrowIfNull(nameof(firstString));
ArgumentNullException.ThrowIfNull(nameof(secondString));

if (firstString == secondString)
{
return 0.0;
}

if (firstString.Length == 0)
{
return secondString.Length;
}

if (secondString.Length == 0)
{
return firstString.Length;
}

var distanceMatrix = GenerateDistanceMatrix(firstString.Length, secondString.Length);
distanceMatrix = CalculateDistance(firstString, secondString, distanceMatrix);

return distanceMatrix[firstString.Length, secondString.Length];
}

/// <summary>
/// Generates the initial distance matrix for the given lengths of the two strings.
/// </summary>
/// <param name="firstLength">The length of the first string.</param>
/// <param name="secondLength">The length of the second string.</param>
/// <returns>The initialized distance matrix.</returns>
private static int[,] GenerateDistanceMatrix(int firstLength, int secondLength)
{
var distanceMatrix = new int[firstLength + 2, secondLength + 2];

for (var i = 0; i <= firstLength; i++)
{
distanceMatrix[i, 0] = i;
}

for (var j = 0; j <= secondLength; j++)
{
distanceMatrix[0, j] = j;
}

return distanceMatrix;
}

/// <summary>
/// Calculates the distance matrix for the given strings using the Optimal String Alignment algorithm.
/// </summary>
/// <param name="firstString">The first string.</param>
/// <param name="secondString">The second string.</param>
/// <param name="distanceMatrix">The initial distance matrix.</param>
/// <returns>The calculated distance matrix.</returns>
private static int[,] CalculateDistance(string firstString, string secondString, int[,] distanceMatrix)
{
for (var i = 1; i <= firstString.Length; i++)
{
for (var j = 1; j <= secondString.Length; j++)
{
var cost = 1;

if (firstString[i - 1] == secondString[j - 1])
{
cost = 0;
}

distanceMatrix[i, j] = Minimum(
distanceMatrix[i - 1, j - 1] + cost, // substitution
distanceMatrix[i, j - 1] + 1, // insertion
distanceMatrix[i - 1, j] + 1); // deletion

if (i > 1 && j > 1
&& firstString[i - 1] == secondString[j - 2]
&& firstString[i - 2] == secondString[j - 1])
{
distanceMatrix[i, j] = Math.Min(
distanceMatrix[i, j],
distanceMatrix[i - 2, j - 2] + cost); // transposition
}
}
}

return distanceMatrix;
}

/// <summary>
/// Returns the minimum of three integers.
/// </summary>
/// <param name="a">The first integer.</param>
/// <param name="b">The second integer.</param>
/// <param name="c">The third integer.</param>
/// <returns>The minimum of the three integers.</returns>
private static int Minimum(int a, int b, int c)
{
return Math.Min(a, Math.Min(b, c));
}
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ find more than one implementation for the same objective but using different alg
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)
* [Optimal String Alignment](./Algorithms/Strings/Similarity/OptimalStringAlignment.cs)
* [Pattern Matching](./Algorithms/Strings/PatternMatching/)
* [Bitop Pattern Matching](./Algorithms/Strings/PatternMatching/Bitap.cs)
* [Naive String Search](./Algorithms/Strings/PatternMatching/NaiveStringSearch.cs)
Expand Down
Loading