1
+ IF OBJECT_ID (N ' LongestCommonSubsequence' ) IS NOT NULL
2
+ DROP FUNCTION LongestCommonSubsequence
3
+ GO
4
+
5
+ Create FUNCTION LongestCommonSubsequence
6
+ /* *
7
+ summary: >
8
+ The longest common subsequence (LCS) problem is the problem of finding the
9
+ longest subsequence common to all sequences in two sequences. It differs
10
+ from problems of finding common substrings: unlike substrings, subsequences
11
+ are not required to occupy consecutive positions within the original
12
+ sequences. For example, the sequences "1234" and "1224533324" have an LCS
13
+ of "1234":
14
+ Author: Phil Factor
15
+ Revision: 1.0
16
+ date: 05 Dec 2014
17
+ example:
18
+ code: |
19
+ Select dbo.LongestCommonSubsequence ('1234', '1224533324')
20
+ Select dbo.LongestCommonSubsequence ('thisisatest', 'testing123testing')
21
+ Select dbo.LongestCommonSubsequence ( 'XMJYAUZ', 'MZJAWXU')
22
+ Select dbo.LongestCommonSubsequence ( 'beginning-middle-ending',
23
+ 'beginning-diddle-dum-ending')
24
+ returns: >
25
+ the longest common subsequence as a string
26
+ **/
27
+ (
28
+ @firstString Varchar (max ),
29
+ @SecondString Varchar (max )
30
+ )
31
+ RETURNS varchar (max )
32
+ as begin
33
+
34
+ Declare @Array Varchar (MAX )
35
+ Declare @ArrayMax int
36
+ Declare @west char (1 )
37
+ Declare @Lines Varchar (max )
38
+ Declare @ii int , @jj int , @iiMax int , @jjMax int , @index int
39
+
40
+ Select @iiMax= len (@FirstString), -- the length of the first string
41
+ @jjMax= len (@SecondString), -- the length of the second string
42
+ @index= @jjMax+ 1 -- where the first new item (matrix has an extra row & column)
43
+ Select @ArrayMax= (@iiMax)* (@jjMax)-- total length of the array iterated
44
+ Select @Array= replicate (char (0 ), @jjMax+ 1 ) -- add in the first nought-filled row
45
+
46
+ Select
47
+ @Index= @Index+ case when (number - 1 ) % @jjMax = 0 then 2 else 1 end , -- current index
48
+ @west= case when (number - 1 ) % @jjMax = 0 then Char (0 ) else substring (@Array,@index- 1 ,1 ) end ,
49
+ -- as we add an extra column in, this is a dangerous operation otherwise
50
+ @Array= @Array -- we add a character to the string array, Char(0)--char
51
+ + case when (number - 1 ) % @jjMax = 0 then Char (0 ) else ' ' end -- begin new row always zero
52
+ + case -- check first to see if there was a match. If so, take north west number incremented
53
+ when substring (@firstString,((Number - 1 )/ @jjMax)+ 1 ,1 )
54
+ = substring (@SecondString, ((number - 1 )% @jjMax)+ 1 ,1 )
55
+ -- remember to set the appropriate collation for this comparison!
56
+ then Char (Ascii (substring (@Array,@index- @jjmax- 2 ,1 ))+ 1 )-- increment the NW value
57
+ when Ascii (substring (@Array,@index- @jjmax- 1 ,1 ))> ascii (@west) -- compare west to north
58
+ then substring (@Array,@index- @jjmax- 1 ,1 ) -- and take the larger
59
+ else @west-- number to west if larger
60
+ end
61
+ from numbers where number <= @ArrayMax
62
+
63
+ -- Now all we need to do is to backtrack through the matrix to find the best solution
64
+ Declare @commonString Varchar (max ), @X_Y int
65
+ Select @CommonString = ' ' -- this contains the string
66
+ Select @ii= @iimax+ 1 ,@jj= @jjmax+ 1
67
+ while (@ii> 1 and @jj> 1 )
68
+ begin
69
+ Select @X_Y = ((@ii- 1 )* (@jjMax+ 1 ))+ @jj -- the current matrix location
70
+ if (substring (@firststring,@ii- 1 ,1 ) = substring (@Secondstring,@jj- 1 ,1 ))
71
+ BEGIN -- if there is a match, add the character (we'll reverse it later)
72
+ Select @CommonString= @CommonString+ substring (@firststring,@ii- 1 ,1 )
73
+ select @jj= @jj- 1 , @ii= @ii- 1 -- go north-west
74
+ end
75
+ else if ascii (Substring (@Array,@X_Y,1 )) = ascii (Substring (@Array, @X_Y- @jjMax- 1 , 1 ))
76
+ select @ii= @ii- 1
77
+ ELSE if ascii (Substring (@Array,@X_Y,1 )) = ascii (Substring (@Array, @X_Y- 1 , 1 ))
78
+ select @jj= @jj- 1
79
+ else break
80
+ if @@error > 0 break
81
+ end
82
+ return Reverse (@CommonString)
83
+ end
84
+ go
85
+
86
+ Declare @timing datetime Select @Timing= GetDate ()
87
+
88
+ if dbo .LongestCommonSubsequence (' 1234' , ' 1224533324' )<> ' 1234'
89
+ raiserror (' test 1 failed' ,16 ,1 )
90
+ if dbo .LongestCommonSubsequence (' thisisatest' , ' testing123testing' )<> ' tsitest'
91
+ raiserror (' test 2 failed' ,16 ,1 )
92
+ if dbo .LongestCommonSubsequence (' XMJYAUZ' , ' MZJAWXU' )<> ' MJAU'
93
+ raiserror (' test 3 failed' ,16 ,1 )
94
+ if dbo .LongestCommonSubsequence (' yab' , ' xabyrbyab' )<> ' yab'
95
+ raiserror (' test 4 failed' ,16 ,1 )
96
+ if dbo .LongestCommonSubsequence (' beginning-middle-ending' ,' beginning-diddle-dum-ending' )
97
+ <> ' beginning-iddle-ending' raiserror (' test 5 failed' ,16 ,1 )
98
+
99
+ select datediff (millisecond ,@timing,GetDate ()) as milliseconds
0 commit comments