Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of Damerau-Levenshtein Distance Algorithm #460

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using Algorithms.Strings.Similarity;
using NUnit.Framework;

namespace Algorithms.Tests.Strings.Similarity;

[TestFixture]
public class DamerauLevenshteinDistanceTests
{
[Test]
public void Calculate_IdenticalStrings_ReturnsZero()
{
var str1 = "test";
var str2 = "test";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Identical strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_CompletelyDifferentStrings_ReturnsLengthOfLongestString()
{
var str1 = "abc";
var str2 = "xyz";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3),"Completely different strings should have a Damerau-Levenshtein distance equal to the length of the longest string.");
}

[Test]
public void Calculate_OneEmptyString_ReturnsLengthOfOtherString()
{
var str1 = "test";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(4),"One empty string should have a Damerau-Levenshtein distance equal to the length of the other string.");
}

[Test]
public void Calculate_BothEmptyStrings_ReturnsZero()
{
var str1 = "";
var str2 = "";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(0), "Both empty strings should have a Damerau-Levenshtein distance of 0.");
}

[Test]
public void Calculate_DifferentLengths_ReturnsCorrectValue()
{
var str1 = "short";
var str2 = "longer";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(6), "Strings of different lengths should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_SpecialCharacters_ReturnsCorrectValue()
{
var str1 = "hello!";
var str2 = "hello?";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with special characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_DifferentCases_ReturnsCorrectValue()
{
var str1 = "Hello";
var str2 = "hello";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with different cases should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonPrefixes_ReturnsCorrectValue()
{
var str1 = "prefix";
var str2 = "pre";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common prefixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_CommonSuffixes_ReturnsCorrectValue()
{
var str1 = "suffix";
var str2 = "fix";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(3), "Strings with common suffixes should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_Transpositions_ReturnsCorrectValue()
{
var str1 = "abcd";
var str2 = "acbd";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(1), "Strings with transpositions should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_RepeatedCharacters_ReturnsCorrectValue()
{
var str1 = "aaa";
var str2 = "aaaaa";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with repeated characters should return the correct Damerau-Levenshtein distance.");
}

[Test]
public void Calculate_UnicodeCharacters_ReturnsCorrectValue()
{
var str1 = "こんにちは";
var str2 = "こんばんは";
var result = DamerauLevenshteinDistance.Calculate(str1, str2);
Assert.That(result, Is.EqualTo(2), "Strings with Unicode characters should return the correct Damerau-Levenshtein distance.");
}
}
104 changes: 104 additions & 0 deletions Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using System;

namespace Algorithms.Strings.Similarity;

public static class DamerauLevenshteinDistance
{
/// <summary>
/// Calculates the Damerau-Levenshtein distance between two strings.
/// The Damerau-Levenshtein distance is a string metric for measuring the difference between two sequences.
/// It is calculated as the minimum number of operations needed to transform one sequence into the other.
/// The possible operations are insertion, deletion, substitution, and transposition.
/// </summary>
/// <param name="left">The first string.</param>
/// <param name="right">The second string.</param>
/// <returns>The Damerau-Levenshtein distance between the two strings.</returns>
public static int Calculate(string left, string right)
{
// Get the lengths of the input strings.
var leftSize = left.Length;
var rightSize = right.Length;

// Initialize a matrix of distances between the two strings.
var distances = InitializeDistanceArray(leftSize, rightSize);

// Iterate over each character in the left string.
for (var i = 1; i < leftSize + 1; i++)
{
// Iterate over each character in the right string.
for (var j = 1; j < rightSize + 1; j++)
{
// Calculate the cost of the current operation.
// If the characters at the current positions are the same, the cost is 0.
// Otherwise, the cost is 1.
var cost = left[i - 1] == right[j - 1] ? 0 : 1;

// Calculate the minimum distance by considering three possible operations:
// deletion, insertion, and substitution.
distances[i, j] = Math.Min(
Math.Min( // deletion
distances[i - 1, j] + 1, // delete the character from the left string
distances[i, j - 1] + 1), // insert the character into the right string
distances[i - 1, j - 1] + cost); // substitute the character in the left string with the character in the right string

// If the current character in the left string is the same as the character
// two positions to the left in the right string and the current character
// in the right string is the same as the character one position to the right
// in the left string, then we can also consider a transposition operation.
if (i > 1 && j > 1 && left[i - 1] == right[j - 2] && left[i - 2] == right[j - 1])
{
distances[i, j] = Math.Min(
distances[i, j], // current minimum distance
distances[i - 2, j - 2] + cost); // transpose the last two characters
}
}
}

// Return the distance between the two strings.
return distances[leftSize, rightSize];
}

/// <summary>
/// Initializes a matrix of distances between two string representations.
///
/// This method creates a matrix of distances where the dimensions are one larger
/// than the input strings. The first row of the matrix represents the distances
/// when the left string is empty, and the first column represents the distances
/// when the right string is empty. The values in the first row and first column
/// are the lengths of the corresponding strings.
///
/// The matrix is used by the Damerau-Levenshtein algorithm to calculate the
/// minimum number of single-character edits (insertions, deletions, or substitutions)
/// required to change one word into the other.
/// The matrix is initialized with dimensions one larger than the input strings.
/// The first row of the matrix represents the distances when the left string is empty.
/// The first column of the matrix represents the distances when the right string is empty.
/// The values in the first row and first column are the lengths of the corresponding strings.
/// Initializes a matrix of distances between two strings representations.
/// </summary>
/// <param name="leftSize">The size of the left string.</param>
/// <param name="rightSize">The size of the right string.</param>
/// <returns>A matrix of distances.</returns>
private static int[,] InitializeDistanceArray(int leftSize, int rightSize)
{
// Initialize a matrix of distances with dimensions one larger than the input strings.
var matrix = new int[leftSize + 1, rightSize + 1];

// Set the values in the first row to the lengths of the left string.
// This represents the distance when the left string is empty.
for (var i = 1; i < leftSize + 1; i++)
{
matrix[i, 0] = i;
}

// Set the values in the first column to the lengths of the right string.
// This represents the distance when the right string is empty.
for (var i = 1; i < rightSize + 1; i++)
{
matrix[0, i] = i;
}

// Return the initialized matrix of distances.
return matrix;
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ find more than one implementation for the same objective but using different alg
* [String](./Algorithms/Strings)
* [Similarity](./Algorithms/Strings/Similarity/)
* [Cosine Similarity](./Algorithms/Strings/Similarity/CosineSimilarity.cs)
* [Damerau-Levenshtein Distance](./Algorithms/Strings/Similarity/DamerauLevenshteinDistance.cs)
* [Hamming Distance](./Algorithms/Strings/Similarity/HammingDistance.cs)
* [Jaro Similarity](./Algorithms/Strings/Similarity/JaroSimilarity.cs)
* [Jaro-Winkler Distance](./Algorithms/Strings/Similarity/JaroWinklerDistance.cs)
Expand Down
Loading