perf(idntranslator.go): deprecated API IDN Conversion and integrated …

…GOLang IDN library
centralnicgroup-opensource · May 13, 2024 · a9e7cfb · a9e7cfb
1 parent e2f7df8
commit a9e7cfb
Show file tree

Hide file tree

Showing 11 changed files with 1,231 additions and 1,770 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,4 +21,4 @@ coverage.html
 node_modules
 
 # go binary
-go-sdk
+rtldev-middleware-go-sdk
diff --git a/.golangci.yml b/.golangci.yml
@@ -37,4 +37,4 @@ linters-settings:
     min-complexity: 15
   maligned:
     # print struct with more effective memory layout or not, false by default
-    suggest-new: true
+    suggest-new: true
diff --git a/apiclient/apiclient.go b/apiclient/apiclient.go
@@ -20,6 +20,7 @@ import (
 	"strings"
 	"time"
 
+	IDN "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/idntranslator"
 	LG "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/logger"
 	R "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/response"
 	RTM "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/responsetemplatemanager"
@@ -30,10 +31,10 @@ import (
 const ISPAPI_CONNECTION_URL_PROXY = "http://127.0.0.1/api/call.cgi" //nolint
 
 // ISPAPI_CONNECTION_URL_LIVE represents the url used for the default connection setup
-const ISPAPI_CONNECTION_URL_LIVE = "https://api.ispapi.net/api/call.cgi" //nolint
+const ISPAPI_CONNECTION_URL_LIVE = "hapi.ispapi.net/api/call.cgi" //nolint
 
 // ISPAPI_CONNECTION_URL_OTE represents the url used for the OT&E (demo system) connection setup
-const ISPAPI_CONNECTION_URL_OTE = "https://api-ote.ispapi.net/api/call.cgi" //nolint
+const ISPAPI_CONNECTION_URL_OTE = "hapi-ote.ispapi.net/api/call.cgi" //nolint
 
 var rtm = RTM.GetInstance()
 
@@ -49,7 +50,7 @@ var rtm = RTM.GetInstance()
 // A sessionless communication makes sense in case you do not need
 // to care about the above and you have just to request some commands.
 //
-// Possible commands can be found at https://github.com/hexonet/hexonet-api-documentation/tree/master/API
+// Possible commands can be found at hgithub.com/hexonet/hexonet-api-documentation/tree/master/API
 type APIClient struct {
 	socketTimeout time.Duration
 	socketURL     string
@@ -522,9 +523,6 @@ func (cl *APIClient) flattenCommand(cmd map[string]interface{}) map[string]strin
 
 // autoIDNConvert method to translate all whitelisted parameter values to punycode, if necessary
 func (cl *APIClient) autoIDNConvert(cmd map[string]string) map[string]string {
-	newcmd := map[string]string{
-		"COMMAND": "ConvertIDN",
-	}
 	// don't convert for convertidn command to avoid endless loop
 	pattern := regexp.MustCompile(`(?i)^CONVERTIDN$`)
 	mm := pattern.MatchString(cmd["COMMAND"])
@@ -553,25 +551,15 @@ func (cl *APIClient) autoIDNConvert(cmd map[string]string) map[string]string {
 		if mm {
 			toconvert = append(toconvert, val)
 			idxs = append(idxs, key)
-		} else {
-			newcmd[key] = val
 		}
 	}
 	if len(toconvert) == 0 {
 		return cmd
 	}
-	r := cl.Request(map[string]interface{}{
-		"COMMAND": "ConvertIDN",
-		"DOMAIN":  toconvert,
-	})
-	if !r.IsSuccess() {
-		return cmd
-	}
-	col := r.GetColumn("ACE")
-	if col != nil {
-		for idx, pc := range col.GetData() {
-			cmd[idxs[idx]] = pc
-		}
+	r := IDN.Convert(toconvert)
+
+	for idx, pc := range r {
+		cmd[idxs[idx]] = pc.PUNYCODE
 	}
 	return cmd
 }
diff --git a/apiclient/apiclient_test.go b/apiclient/apiclient_test.go
@@ -91,12 +91,13 @@ func TestAutoIDNConvertCommand(t *testing.T) {
 	cl.UseOTESystem()
 	r := cl.Request(map[string]interface{}{
 		"COMMAND": "CheckDomains",
-		"DOMAiN":  []string{"example.com", "dömäin.example", "example.net"},
+		"DOMAIN":  []string{"example.com", "dömäin.example", "example.net"},
 	})
 	if !r.IsSuccess() || r.GetCode() != 200 || r.GetDescription() != "Command completed successfully" {
 		t.Error("TestRequestFlattenCommand: Expected response to succeed." + strconv.Itoa(r.GetCode()) + r.GetDescription())
 	}
 	cmd := r.GetCommand()
+
 	val1, exists1 := cmd["DOMAIN0"]
 	val2, exists2 := cmd["DOMAIN1"]
 	val3, exists3 := cmd["DOMAIN2"]
@@ -315,9 +316,9 @@ func TestSetProxy(t *testing.T) {
 }
 
 func TestSetReferer(t *testing.T) {
-	cl.SetReferer("https://www.hexonet.net/")
+	cl.SetReferer("hwww.hexonet.net/")
 	val, err := cl.GetReferer()
-	if err != nil || val != "https://www.hexonet.net/" {
+	if err != nil || val != "hwww.hexonet.net/" {
 		t.Error("TestSetReferer: referer not matching expected value")
 	}
 	cl.SetReferer("")

diff --git a/go.mod b/go.mod
@@ -1,3 +1,15 @@
 module github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3
 
-go 1.20
+go 1.22.3
+
+require (
+	github.com/stretchr/testify v1.9.0
+	golang.org/x/net v0.25.0
+	golang.org/x/text v0.15.0
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
diff --git a/go.sum b/go.sum
@@ -0,0 +1,14 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/idntranslator/idntranslator.go b/idntranslator/idntranslator.go
@@ -1,38 +1,201 @@
-// https://pkg.go.dev/golang.org/x/net/idna
+package idntranslator
 
-// Copyright (c) 2018 Kai Schwarz (HEXONET GmbH). All rights reserved.
-//
-// Use of this source code is governed by the MIT
-// license that can be found in the LICENSE.md file.
+import (
+	"regexp"
+	"strconv"
+	"strings"
+	"unicode/utf16"
+	"unicode/utf8"
 
-// Package idntranslator provides basic functionality to customize the API response description
-package idntranslator
+	"golang.org/x/net/idna"
+	"golang.org/x/text/unicode/norm"
+)
+
+// Row represents a row in the translation result.
+type Row struct {
+	IDN      string
+	PUNYCODE string
+}
 
-type IdnTranslatorRow struct {
-	Idn string
-	Punycode string
+// interfaceToStringSlice converts the input interface to a slice of strings.
+func interfaceToStringSlice(input interface{}) []string {
+	switch v := input.(type) {
+	case string:
+		return []string{v}
+	case []string:
+		return v
+	default:
+		return nil
+	}
 }
 
-// Convert function for converting a domain to idn + punycode
-func Convert(domains []string, options map[string]string) []IdnTranslatorRow {
-	translated := []IdnTranslatorRow{};
+// Convert converts a domain string or a slice of domain strings between Unicode and Punycode formats.
+func Convert(domainOrDomains interface{}) []Row {
+	domains := interfaceToStringSlice(domainOrDomains)
 
-	for idx, domain range domains {
-		translated[idx] = IdnTranslatorRow{
-			Idn: IdnTranslator.toUnicode(domain, options),
-			Punycode: IdnTranslator.toASCII(domain, options),
-		}
+	var translated []Row
+
+	for _, domain := range domains {
+		idn, punycode := handleConversion(domain)
+		translated = append(translated, Row{IDN: idn, PUNYCODE: punycode})
+	}
+
+	return translated
+}
+
+// handleConversion handles conversion of a keyword between Unicode and Punycode formats.
+func handleConversion(keyword string) (string, string) {
+	if keyword == "" {
+		return "", ""
 	}
 
-	return translated;
+	return ToUnicode(keyword), ToASCII(keyword)
+}
+
+// ToUnicode converts a domain string to Unicode format.
+func ToUnicode(asciiString string, transitionalProcessing ...bool) string {
+	decodedKeyword := decodeUnicodeEscapes(asciiString)
+	// Define the IDNA options
+	opts := idna.New(
+		idna.MapForLookup(),
+		idna.Transitional(isTransitionalProcessing(asciiString, transitionalProcessing...)), // Map ß -> ss
+		idna.StrictDomainName(false)) // Set more permissive ASCII rules.
+
+	// Convert the Unicode string to Punycode using the specified options
+	unicode, err := opts.ToUnicode(decodedKeyword)
+	if err != nil {
+		// Handle the error appropriately
+		return asciiString // Return the original string if conversion fails
+	}
+	return unicode
+}
+
+// ToASCII converts a Unicode string to Punycode format.
+func ToASCII(unicodeString string, transitionalProcessing ...bool) string {
+	// Define the IDNA options
+	opts := idna.New(
+		idna.MapForLookup(),
+		idna.Transitional(isTransitionalProcessing(unicodeString, transitionalProcessing...)), // Map ß -> ss
+		idna.StrictDomainName(false))              // Set more permissive ASCII rules.
+
+	// Convert the Unicode string to Punycode using the specified options
+	punycode, err := opts.ToASCII(unicodeString)
+	if err != nil {
+		// Handle the error appropriately
+		return unicodeString // Return the original string if conversion fails
+	}
+
+	return punycode
+}
+
+// DecodeUnicodeEscapes decodes Unicode escape sequences in a string, normalizes it, and converts it to lowercase.
+func decodeUnicodeEscapes(unicodeString string) string {
+	decoded := decodeUnicodeEscapeSequences(unicodeString)
+	normalized := normalizeAndLowerCase(decoded)
+	return normalized
 }
 
-func ToUnicode(domain string, options map[string]string) string {
-	idn := domain
-	return idn
+// isTransitionalProcessing checks if the provided top-level domain (TLD) is non-transitional.
+func isTransitionalProcessing(keyword string, transitionalProcessing ...bool) bool {
+	if len(transitionalProcessing) > 0 {
+        return transitionalProcessing[0]
+    }
+
+    transitionalTLDs := []string{"be", "ca", "de", "fr", "pm", "re", "swiss", "tf", "wf", "yt"}
+    regex := `\.(` + strings.Join(transitionalTLDs, "|") + `)\.?`
+    re := regexp.MustCompile(regex)
+    return re.MatchString(strings.ToLower(keyword))
 }
 
-func ToASCII(domain string, options map[string]string) string {
-	ascii := domain
-	return ascii
+// decodeUnicodeEscapeSequences decodes Unicode escape sequences in a string.
+func decodeUnicodeEscapeSequences(unicodeString string) string {
+	// Regular expression to match Unicode escape sequences and surrogate pairs
+	reUnicode := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
+	reHex := regexp.MustCompile(`\\x([0-9a-fA-F]{2})`)
+	decoded := reUnicode.ReplaceAllStringFunc(unicodeString, func(match string) string {
+		hexValue := match[2:]
+		code, err := strconv.ParseUint(hexValue, 16, 32)
+		if err != nil {
+			return match // Return the original string if parsing fails
+		}
+		r := rune(code)
+		if utf16.IsSurrogate(r) {
+			return match // Leave surrogate pairs to be processed together
+		}
+		return string(r)
+	})
+	decoded = reHex.ReplaceAllStringFunc(decoded, func(match string) string {
+		hexValue := match[2:]
+		code, err := strconv.ParseUint(hexValue, 16, 8)
+		if err != nil {
+			return match // Return the original string if parsing fails
+		}
+		return string([]byte{byte(code)})
+	})
+
+	// Process surrogate pairs: Surrogate pairs are used in UTF-16 encoding to represent characters outside the Basic Multilingual Plane (BMP).
+	// These characters are represented by pairs of 16-bit code units called surrogates.
+	// In Unicode escape sequences, surrogate pairs are represented as two consecutive escape sequences: \udXXX\udYYY.
+	// This regular expression captures these surrogate pair patterns.
+	reSurrogatePair := regexp.MustCompile(`\\[ux](d[89ab][0-9a-fA-F]{2})\\[ux](d[c-f][0-9a-fA-F]{2})`)
+	decoded = reSurrogatePair.ReplaceAllStringFunc(decoded, func(match string) string {
+		// Extract the hexadecimal values for the surrogate pair
+		// The first value represents the high surrogate, and the second represents the low surrogate
+		r1, err := strconv.ParseUint(match[2:6], 16, 32)
+		if err != nil {
+			// If parsing fails, keep the original string
+			return match
+		}
+		r2, err := strconv.ParseUint(match[8:12], 16, 32)
+		if err != nil {
+			// If parsing fails, keep the original string
+			return match
+		}
+		// Combine the two code points into a single Unicode character
+		// This is necessary because certain characters are represented by pairs of code points
+		runeValue := utf16.DecodeRune(rune(uint16(r1)), rune(uint16(r2)))
+		// If the resulting character is invalid, keep the original surrogate pair
+		if runeValue == utf8.RuneError {
+			return match
+		}
+		// Return the decoded Unicode character
+		return string(runeValue)
+	})
+
+	return decoded
 }
+
+
+// normalizeAndLowerCase normalizes the string using NFC normalization form and converts it to lowercase.
+func normalizeAndLowerCase(input string) string {
+	// Normalize the string using NFC normalization form
+	normalized := norm.NFC.String(input)
+
+	// replace full width characters with normalized e.g. ＡＢＣ -> abc
+	normalized = replaceFullWidthChars(normalized)
+
+	// Convert to lowercase
+	lowercase := strings.ToLower(normalized)
+
+	return lowercase
+}
+
+// ReplaceFullWidthChars replaces full-width characters with their corresponding normal-width counterparts.
+func replaceFullWidthChars(str string) string {
+	var sb strings.Builder
+	const fullWidthOffset = 0xfee0
+	for _, r := range str {
+		switch {
+		case r >= 0xFF01 && r <= 0xFF5E:
+			// Map full-width characters to their corresponding normal-width characters
+			sb.WriteRune(r - fullWidthOffset)
+		case r == '｡':
+			// Replace full-width dot character with the regular dot character
+			sb.WriteRune('.')
+		default:
+			sb.WriteRune(r)
+		}
+	}
+	return sb.String()
+}
+