|
| 1 | +/* |
| 2 | +https://www.geeksforgeeks.org/edit-distance-dp-5/ |
| 3 | +https://www.techiedelight.com/levenshtein-distance-edit-distance-problem/ |
| 4 | +*/ |
| 5 | + |
| 6 | +/* |
| 7 | +The problem is to find the number of minimum edits required to transform one string into another. |
| 8 | +Edit Distance is a way of quantifying how dissimilar two strings are to one another by counting minimum number of operations required to transform one string into another. |
| 9 | +
|
| 10 | +What are the subproblems? |
| 11 | +The idea is process all characters one by one staring either from left or right of both strings. |
| 12 | +Let us traverse from right corner, there are two possibilities for every pair of character being traversed. |
| 13 | +
|
| 14 | +n: Length of X (first string) |
| 15 | +m: Length of Y (second string) |
| 16 | +
|
| 17 | +1. If (X[n] == Y[m]) we don't have to do anything as they are the same. Ignore the last characters and recur for X[1...n-1] and Y[1...m-1] to find their edit-distance |
| 18 | +2. Else (If last characters are not same), we consider all operations on ‘X’, consider all three operations on last character of first string ie. X[n], recursively compute minimum cost for all three operations and take minimum of three values. |
| 19 | + Insert: Recur for n and m-1 |
| 20 | + Remove: Recur for n-1 and m |
| 21 | + Replace: Recur for n-1 and m-1 |
| 22 | +
|
| 23 | +*/ |
| 24 | + |
| 25 | +//RECURSIVE APPROACH |
| 26 | +public int editDist(String X, String Y, int n, int m){ |
| 27 | + //If first string is empty we need to insert all remaining characters from second string |
| 28 | + if(n==0) return m; |
| 29 | + |
| 30 | + //If second string is empty we need to insert all remaining characters from first string |
| 31 | + else if(m==0) return n; |
| 32 | + |
| 33 | + else if(X.charAt(n-1) == Y.charAt(m-1)) return editDist(X, Y, n-1, m-1); |
| 34 | + |
| 35 | + else{ |
| 36 | + return (1 + Math.min( //one added as a cost of operation |
| 37 | + editDist(X, Y, n-1, m-1), //replacement of X[n] to Y[m] |
| 38 | + editDist(X, Y, n, m-1), //insertion of Y[m] in X |
| 39 | + editDist(X, Y, n-1, m) //deletion of X[n] from X |
| 40 | + )); |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +/* |
| 45 | +Time Complexity |
| 46 | +Worse Case - O(3^n) |
| 47 | +Worse Case arises when none of the characters of the two strings match. So we have to explore all three operations n times |
| 48 | +where n is the length of the first string and the three operations are - insertion, deletion and replacement |
| 49 | +*/ |
| 50 | + |
| 51 | + |
| 52 | +/* |
| 53 | +DYNAMIC PROGRAMMING - BOTTOM UP APPROACH |
| 54 | +The problem can be divided into subproblems and the solution to the main problem can be computed by solving the subproblems ==> Optimal Substructure Property |
| 55 | +The subproblems are overlapping which can be seen by drawing the recursion call tree ==> Overlapping Subproblems Property |
| 56 | +
|
| 57 | +Hence the problem can be solved using DP. |
| 58 | +*/ |
| 59 | + |
| 60 | +public int editDist(String X, String Y){ |
| 61 | + int n = X.length(); |
| 62 | + int m = Y.length(); |
| 63 | + |
| 64 | + int dist[][] = new int[n+1][m+1]; |
| 65 | + |
| 66 | + for(int i=0 ; i<=n ; i++){ |
| 67 | + for(int j=0 ; j<=m ; j++){ |
| 68 | + |
| 69 | + //If the first string is empty, then we need to insert all characters of the second string into the first string |
| 70 | + if(i==0) dist[i][j] = j; //Min Operations = j (add j characters) |
| 71 | + |
| 72 | + //If the second string is empty, then we need to delete all characters from first string |
| 73 | + else if(j==0) dist[i][j] = i; //Min Operations = i (remove i characters) |
| 74 | + |
| 75 | + /* |
| 76 | + If last characters are the same, no need for any operation. The edit-distance will remain the same as it was |
| 77 | + before considering X[i] and Y[j]. |
| 78 | + */ |
| 79 | + else if( X.charAt(i-1) == Y.charAt(j-1) ) dist[i][j] = dist[i-1][j-1]; |
| 80 | + |
| 81 | + /* |
| 82 | + last characters of the strings do not match. |
| 83 | + We need to explore all three operations and choose the one which results in minimum cost.*/ |
| 84 | + else { |
| 85 | + dist[i][j] = 1 + Math.min( // 1 is added as the cost of the chosen operation |
| 86 | + dist[i-1][j], // delete X[i] from X |
| 87 | + Math.min(dist[i][j-1], //insert Y[j] in X |
| 88 | + dist[i-1][j-1]) //replace X[i] with Y[j] |
| 89 | + ) |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + return dist[n][m]; |
| 95 | + |
| 96 | +} |
| 97 | +/* |
| 98 | +Time Complexity - O(n*m) |
| 99 | +We solve each subproblem once, number of subproblems = n*m |
| 100 | +Space Complexity - O(n*m) |
| 101 | +*/ |
| 102 | + |
| 103 | +/* |
| 104 | +DYNAMIC PROGRAMMING - BOTTOM UP APPROACH - SPACE OPTIMIZED |
| 105 | +We need the values from only the previous rows so we maintain a table of size (2*) |
| 106 | +*/ |
| 107 | + |
| 108 | +public int editDist(String X, String Y){ |
| 109 | + int n = X.length(); |
| 110 | + int m = Y.length(); |
| 111 | + |
| 112 | + int dist[][] = new int[2][n+1]; |
| 113 | + |
| 114 | + //when second string is empty ==> delete all characters from first string |
| 115 | + for(int i=0;i<=n;i++) dist[0][i] = i; |
| 116 | + |
| 117 | + for(int i=1;i<=m;i++){ |
| 118 | + for(int j=0;j<=n;j++){ |
| 119 | + if(j==0) dist[i%2][j] = i; |
| 120 | + else if(X.charAt(i-1) == Y.charAt(j-1)) dist[i % 2][j] = dist[(i-1) % 2][j-1]; |
| 121 | + else{ |
| 122 | + |
| 123 | + dist[i%2][j] = 1 + Math.min( |
| 124 | + dist[(i-1) % 2][j-1], //replacement |
| 125 | + Math.min( dist[(i-1) % 2][j], //insertion |
| 126 | + dist[i % 2][j-1]) //deletion |
| 127 | + ); |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + return dist[m%2][n]; |
| 133 | +} |
| 134 | + |
| 135 | +/* |
| 136 | +Time Complexity - O(n*m) |
| 137 | +Space Complexity - O(2*n) |
| 138 | +where n = length of string1 |
| 139 | +*/ |
| 140 | + |
| 141 | + |
0 commit comments