@@ -1361,39 +1361,159 @@ diff_match_patch.prototype.diff_toDelta = function(diffs) {
13611361 var text = [ ] ;
13621362 var lastEnd ;
13631363 for ( var x = 0 ; x < diffs . length ; x ++ ) {
1364-
13651364 var thisDiff = diffs [ x ] ;
13661365 var thisTop = thisDiff [ 1 ] [ 0 ] ;
13671366 var thisEnd = thisDiff [ 1 ] [ thisDiff [ 1 ] . length - 1 ] ;
13681367
1368+ if ( 0 === thisDiff [ 1 ] . length ) {
1369+ continue ;
1370+ }
1371+
1372+ // trap a trailing high-surrogate so we can
1373+ // distribute it to the successive edits
13691374 if ( thisEnd && this . isHighSurrogate ( thisEnd ) ) {
1375+ lastEnd = thisEnd ;
13701376 thisDiff [ 1 ] = thisDiff [ 1 ] . slice ( 0 , - 1 ) ;
13711377 }
13721378
13731379 if ( lastEnd && thisTop && this . isHighSurrogate ( lastEnd ) && this . isLowSurrogate ( thisTop ) ) {
13741380 thisDiff [ 1 ] = lastEnd + thisDiff [ 1 ] ;
13751381 }
13761382
1377- lastEnd = thisEnd ;
1378- if ( 0 === thisDiff [ 1 ] . length ) {
1383+ if ( 0 === thisDiff [ 1 ] . length ) {
13791384 continue ;
13801385 }
13811386
1382- switch ( diffs [ x ] [ 0 ] ) {
1387+ switch ( thisDiff [ 0 ] ) {
13831388 case DIFF_INSERT :
1384- text [ x ] = '+' + encodeURI ( diffs [ x ] [ 1 ] ) ;
1389+ text . push ( '+' + encodeURI ( thisDiff [ 1 ] ) ) ;
13851390 break ;
13861391 case DIFF_DELETE :
1387- text [ x ] = '-' + diffs [ x ] [ 1 ] . length ;
1392+ text . push ( '-' + thisDiff [ 1 ] . length ) ;
13881393 break ;
13891394 case DIFF_EQUAL :
1390- text [ x ] = '=' + diffs [ x ] [ 1 ] . length ;
1395+ text . push ( '=' + thisDiff [ 1 ] . length ) ;
13911396 break ;
13921397 }
13931398 }
13941399 return text . join ( '\t' ) . replace ( / % 2 0 / g, ' ' ) ;
13951400} ;
13961401
1402+ diff_match_patch . prototype . digit16 = function ( c ) {
1403+ switch ( c ) {
1404+ case '0' : return 0 ;
1405+ case '1' : return 1 ;
1406+ case '2' : return 2 ;
1407+ case '3' : return 3 ;
1408+ case '4' : return 4 ;
1409+ case '5' : return 5 ;
1410+ case '6' : return 6 ;
1411+ case '7' : return 7 ;
1412+ case '8' : return 8 ;
1413+ case '9' : return 9 ;
1414+ case 'A' : case 'a' : return 10 ;
1415+ case 'B' : case 'b' : return 11 ;
1416+ case 'C' : case 'c' : return 12 ;
1417+ case 'D' : case 'd' : return 13 ;
1418+ case 'E' : case 'e' : return 14 ;
1419+ case 'F' : case 'f' : return 15 ;
1420+ default : throw new Error ( 'Invalid hex-code' ) ;
1421+ }
1422+ } ;
1423+
1424+ /**
1425+ * Decode URI-encoded string but allow for encoded surrogate halves
1426+ *
1427+ * diff_match_patch needs this relaxation of the requirements because
1428+ * not all libraries and versions produce valid URI strings in toDelta
1429+ * and we don't want to crash this code when the input is valid input
1430+ * but at the same time invalid utf-8
1431+ *
1432+ * @example : decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
1433+ * @example : decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
1434+ *
1435+ * @cite : @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
1436+ *
1437+ * @param {String } text input string encoded by encodeURI() or equivalent
1438+ * @return {String }
1439+ */
1440+ diff_match_patch . prototype . decodeURI = function ( text ) {
1441+ try {
1442+ return decodeURI ( text ) ;
1443+ } catch ( e ) {
1444+ var i = 0 ;
1445+ var decoded = '' ;
1446+
1447+ while ( i < text . length ) {
1448+ if ( text [ i ] !== '%' ) {
1449+ decoded += text [ i ++ ] ;
1450+ continue ;
1451+ }
1452+
1453+ // start a percent-sequence
1454+ var byte1 = ( this . digit16 ( text [ i + 1 ] ) << 4 ) + this . digit16 ( text [ i + 2 ] ) ;
1455+ if ( ( byte1 & 0x80 ) === 0 ) {
1456+ decoded += String . fromCharCode ( byte1 ) ;
1457+ i += 3 ;
1458+ continue ;
1459+ }
1460+
1461+ if ( '%' !== text [ i + 3 ] ) {
1462+ throw new URIError ( 'URI malformed' ) ;
1463+ }
1464+
1465+ var byte2 = ( this . digit16 ( text [ i + 4 ] ) << 4 ) + this . digit16 ( text [ i + 5 ] ) ;
1466+ if ( ( byte2 & 0xC0 ) !== 0x80 ) {
1467+ throw new URIError ( 'URI malformed' ) ;
1468+ }
1469+ byte2 = byte2 & 0x3F ;
1470+ if ( ( byte1 & 0xE0 ) === 0xC0 ) {
1471+ decoded += String . fromCharCode ( ( ( byte1 & 0x1F ) << 6 ) | byte2 ) ;
1472+ i += 6 ;
1473+ continue ;
1474+ }
1475+
1476+ if ( '%' !== text [ i + 6 ] ) {
1477+ throw new URIError ( 'URI malformed' ) ;
1478+ }
1479+
1480+ var byte3 = ( this . digit16 ( text [ i + 7 ] ) << 4 ) + this . digit16 ( text [ i + 8 ] ) ;
1481+ if ( ( byte3 & 0xC0 ) !== 0x80 ) {
1482+ throw new URIError ( 'URI malformed' ) ;
1483+ }
1484+ byte3 = byte3 & 0x3F ;
1485+ if ( ( byte1 & 0xF0 ) === 0xE0 ) {
1486+ // unpaired surrogate are fine here
1487+ decoded += String . fromCharCode ( ( ( byte1 & 0x0F ) << 12 ) | ( byte2 << 6 ) | byte3 ) ;
1488+ i += 9 ;
1489+ continue ;
1490+ }
1491+
1492+ if ( '%' !== text [ i + 9 ] ) {
1493+ throw new URIError ( 'URI malformed' ) ;
1494+ }
1495+
1496+ var byte4 = ( this . digit16 ( text [ i + 10 ] ) << 4 ) + this . digit16 ( text [ i + 11 ] ) ;
1497+ if ( ( byte4 & 0xC0 ) !== 0x80 ) {
1498+ throw new URIError ( 'URI malformed' ) ;
1499+ }
1500+ byte4 = byte4 & 0x3F ;
1501+ if ( ( byte1 & 0xF8 ) === 0xF0 ) {
1502+ var codePoint = ( ( byte1 & 0x07 ) << 0x12 ) | ( byte2 << 0x0C ) | ( byte3 << 0x06 ) | byte4 ;
1503+ if ( codePoint >= 0x010000 && codePoint <= 0x10FFFF ) {
1504+ decoded += String . fromCharCode ( ( codePoint & 0xFFFF ) >>> 10 & 0x3FF | 0xD800 ) ;
1505+ decoded += String . fromCharCode ( 0xDC00 | ( codePoint & 0xFFFF ) & 0x3FF ) ;
1506+ i += 12 ;
1507+ continue ;
1508+ }
1509+ }
1510+
1511+ throw new URIError ( 'URI malformed' ) ;
1512+ }
1513+
1514+ return decoded ;
1515+ }
1516+ } ;
13971517
13981518/**
13991519 * Given the original text1, and an encoded string which describes the
@@ -1416,7 +1536,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
14161536 case '+' :
14171537 try {
14181538 diffs [ diffsLength ++ ] =
1419- new diff_match_patch . Diff ( DIFF_INSERT , decodeURI ( param ) ) ;
1539+ new diff_match_patch . Diff ( DIFF_INSERT , this . decodeURI ( param ) ) ;
14201540 } catch ( ex ) {
14211541 // Malformed URI sequence.
14221542 throw new Error ( 'Illegal escape in diff_fromDelta: ' + param ) ;
0 commit comments