Skip to content

Commit

Permalink
perf(idntranslator.go): deprecated API IDN Conversion and integrated …
Browse files Browse the repository at this point in the history
…GOLang IDN library
  • Loading branch information
AsifNawaz-cnic committed May 13, 2024
1 parent e2f7df8 commit a9e7cfb
Show file tree
Hide file tree
Showing 11 changed files with 1,231 additions and 1,770 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ coverage.html
node_modules

# go binary
go-sdk
rtldev-middleware-go-sdk
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ linters-settings:
min-complexity: 15
maligned:
# print struct with more effective memory layout or not, false by default
suggest-new: true
suggest-new: true
28 changes: 8 additions & 20 deletions apiclient/apiclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"strings"
"time"

IDN "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/idntranslator"
LG "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/logger"
R "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/response"
RTM "github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3/responsetemplatemanager"
Expand All @@ -30,10 +31,10 @@ import (
const ISPAPI_CONNECTION_URL_PROXY = "http://127.0.0.1/api/call.cgi" //nolint

// ISPAPI_CONNECTION_URL_LIVE represents the url used for the default connection setup
const ISPAPI_CONNECTION_URL_LIVE = "https://api.ispapi.net/api/call.cgi" //nolint
const ISPAPI_CONNECTION_URL_LIVE = "hapi.ispapi.net/api/call.cgi" //nolint

// ISPAPI_CONNECTION_URL_OTE represents the url used for the OT&E (demo system) connection setup
const ISPAPI_CONNECTION_URL_OTE = "https://api-ote.ispapi.net/api/call.cgi" //nolint
const ISPAPI_CONNECTION_URL_OTE = "hapi-ote.ispapi.net/api/call.cgi" //nolint

var rtm = RTM.GetInstance()

Expand All @@ -49,7 +50,7 @@ var rtm = RTM.GetInstance()
// A sessionless communication makes sense in case you do not need
// to care about the above and you have just to request some commands.
//
// Possible commands can be found at https://github.com/hexonet/hexonet-api-documentation/tree/master/API
// Possible commands can be found at hgithub.com/hexonet/hexonet-api-documentation/tree/master/API
type APIClient struct {
socketTimeout time.Duration
socketURL string
Expand Down Expand Up @@ -522,9 +523,6 @@ func (cl *APIClient) flattenCommand(cmd map[string]interface{}) map[string]strin

// autoIDNConvert method to translate all whitelisted parameter values to punycode, if necessary
func (cl *APIClient) autoIDNConvert(cmd map[string]string) map[string]string {
newcmd := map[string]string{
"COMMAND": "ConvertIDN",
}
// don't convert for convertidn command to avoid endless loop
pattern := regexp.MustCompile(`(?i)^CONVERTIDN$`)
mm := pattern.MatchString(cmd["COMMAND"])
Expand Down Expand Up @@ -553,25 +551,15 @@ func (cl *APIClient) autoIDNConvert(cmd map[string]string) map[string]string {
if mm {
toconvert = append(toconvert, val)
idxs = append(idxs, key)
} else {
newcmd[key] = val
}
}
if len(toconvert) == 0 {
return cmd
}
r := cl.Request(map[string]interface{}{
"COMMAND": "ConvertIDN",
"DOMAIN": toconvert,
})
if !r.IsSuccess() {
return cmd
}
col := r.GetColumn("ACE")
if col != nil {
for idx, pc := range col.GetData() {
cmd[idxs[idx]] = pc
}
r := IDN.Convert(toconvert)

for idx, pc := range r {
cmd[idxs[idx]] = pc.PUNYCODE
}
return cmd
}
7 changes: 4 additions & 3 deletions apiclient/apiclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,13 @@ func TestAutoIDNConvertCommand(t *testing.T) {
cl.UseOTESystem()
r := cl.Request(map[string]interface{}{
"COMMAND": "CheckDomains",
"DOMAiN": []string{"example.com", "dömäin.example", "example.net"},
"DOMAIN": []string{"example.com", "dömäin.example", "example.net"},
})
if !r.IsSuccess() || r.GetCode() != 200 || r.GetDescription() != "Command completed successfully" {
t.Error("TestRequestFlattenCommand: Expected response to succeed." + strconv.Itoa(r.GetCode()) + r.GetDescription())
}
cmd := r.GetCommand()

val1, exists1 := cmd["DOMAIN0"]
val2, exists2 := cmd["DOMAIN1"]
val3, exists3 := cmd["DOMAIN2"]
Expand Down Expand Up @@ -315,9 +316,9 @@ func TestSetProxy(t *testing.T) {
}

func TestSetReferer(t *testing.T) {
cl.SetReferer("https://www.hexonet.net/")
cl.SetReferer("hwww.hexonet.net/")
val, err := cl.GetReferer()
if err != nil || val != "https://www.hexonet.net/" {
if err != nil || val != "hwww.hexonet.net/" {
t.Error("TestSetReferer: referer not matching expected value")
}
cl.SetReferer("")
Expand Down
14 changes: 13 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
module github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3

go 1.20
go 1.22.3

require (
github.com/stretchr/testify v1.9.0
golang.org/x/net v0.25.0
golang.org/x/text v0.15.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
14 changes: 14 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
213 changes: 188 additions & 25 deletions idntranslator/idntranslator.go
Original file line number Diff line number Diff line change
@@ -1,38 +1,201 @@
// https://pkg.go.dev/golang.org/x/net/idna
package idntranslator

// Copyright (c) 2018 Kai Schwarz (HEXONET GmbH). All rights reserved.
//
// Use of this source code is governed by the MIT
// license that can be found in the LICENSE.md file.
import (
"regexp"
"strconv"
"strings"
"unicode/utf16"
"unicode/utf8"

// Package idntranslator provides basic functionality to customize the API response description
package idntranslator
"golang.org/x/net/idna"
"golang.org/x/text/unicode/norm"
)

// Row represents a row in the translation result.
type Row struct {
IDN string
PUNYCODE string
}

type IdnTranslatorRow struct {
Idn string
Punycode string
// interfaceToStringSlice converts the input interface to a slice of strings.
func interfaceToStringSlice(input interface{}) []string {
switch v := input.(type) {
case string:
return []string{v}
case []string:
return v
default:
return nil
}
}

// Convert function for converting a domain to idn + punycode
func Convert(domains []string, options map[string]string) []IdnTranslatorRow {
translated := []IdnTranslatorRow{};
// Convert converts a domain string or a slice of domain strings between Unicode and Punycode formats.
func Convert(domainOrDomains interface{}) []Row {
domains := interfaceToStringSlice(domainOrDomains)

for idx, domain range domains {
translated[idx] = IdnTranslatorRow{
Idn: IdnTranslator.toUnicode(domain, options),
Punycode: IdnTranslator.toASCII(domain, options),
}
var translated []Row

for _, domain := range domains {
idn, punycode := handleConversion(domain)
translated = append(translated, Row{IDN: idn, PUNYCODE: punycode})
}

return translated
}

// handleConversion handles conversion of a keyword between Unicode and Punycode formats.
func handleConversion(keyword string) (string, string) {
if keyword == "" {
return "", ""
}

return translated;
return ToUnicode(keyword), ToASCII(keyword)
}

// ToUnicode converts a domain string to Unicode format.
func ToUnicode(asciiString string, transitionalProcessing ...bool) string {
decodedKeyword := decodeUnicodeEscapes(asciiString)
// Define the IDNA options
opts := idna.New(
idna.MapForLookup(),
idna.Transitional(isTransitionalProcessing(asciiString, transitionalProcessing...)), // Map ß -> ss
idna.StrictDomainName(false)) // Set more permissive ASCII rules.

// Convert the Unicode string to Punycode using the specified options
unicode, err := opts.ToUnicode(decodedKeyword)
if err != nil {
// Handle the error appropriately
return asciiString // Return the original string if conversion fails
}
return unicode
}

// ToASCII converts a Unicode string to Punycode format.
func ToASCII(unicodeString string, transitionalProcessing ...bool) string {
// Define the IDNA options
opts := idna.New(
idna.MapForLookup(),
idna.Transitional(isTransitionalProcessing(unicodeString, transitionalProcessing...)), // Map ß -> ss
idna.StrictDomainName(false)) // Set more permissive ASCII rules.

// Convert the Unicode string to Punycode using the specified options
punycode, err := opts.ToASCII(unicodeString)
if err != nil {
// Handle the error appropriately
return unicodeString // Return the original string if conversion fails
}

return punycode
}

// DecodeUnicodeEscapes decodes Unicode escape sequences in a string, normalizes it, and converts it to lowercase.
func decodeUnicodeEscapes(unicodeString string) string {
decoded := decodeUnicodeEscapeSequences(unicodeString)
normalized := normalizeAndLowerCase(decoded)
return normalized
}

func ToUnicode(domain string, options map[string]string) string {
idn := domain
return idn
// isTransitionalProcessing checks if the provided top-level domain (TLD) is non-transitional.
func isTransitionalProcessing(keyword string, transitionalProcessing ...bool) bool {
if len(transitionalProcessing) > 0 {
return transitionalProcessing[0]
}

transitionalTLDs := []string{"be", "ca", "de", "fr", "pm", "re", "swiss", "tf", "wf", "yt"}
regex := `\.(` + strings.Join(transitionalTLDs, "|") + `)\.?`
re := regexp.MustCompile(regex)
return re.MatchString(strings.ToLower(keyword))
}

func ToASCII(domain string, options map[string]string) string {
ascii := domain
return ascii
// decodeUnicodeEscapeSequences decodes Unicode escape sequences in a string.
func decodeUnicodeEscapeSequences(unicodeString string) string {
// Regular expression to match Unicode escape sequences and surrogate pairs
reUnicode := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
reHex := regexp.MustCompile(`\\x([0-9a-fA-F]{2})`)
decoded := reUnicode.ReplaceAllStringFunc(unicodeString, func(match string) string {
hexValue := match[2:]
code, err := strconv.ParseUint(hexValue, 16, 32)
if err != nil {
return match // Return the original string if parsing fails
}
r := rune(code)
if utf16.IsSurrogate(r) {
return match // Leave surrogate pairs to be processed together
}
return string(r)
})
decoded = reHex.ReplaceAllStringFunc(decoded, func(match string) string {
hexValue := match[2:]
code, err := strconv.ParseUint(hexValue, 16, 8)
if err != nil {
return match // Return the original string if parsing fails
}
return string([]byte{byte(code)})
})

// Process surrogate pairs: Surrogate pairs are used in UTF-16 encoding to represent characters outside the Basic Multilingual Plane (BMP).
// These characters are represented by pairs of 16-bit code units called surrogates.
// In Unicode escape sequences, surrogate pairs are represented as two consecutive escape sequences: \udXXX\udYYY.
// This regular expression captures these surrogate pair patterns.
reSurrogatePair := regexp.MustCompile(`\\[ux](d[89ab][0-9a-fA-F]{2})\\[ux](d[c-f][0-9a-fA-F]{2})`)
decoded = reSurrogatePair.ReplaceAllStringFunc(decoded, func(match string) string {
// Extract the hexadecimal values for the surrogate pair
// The first value represents the high surrogate, and the second represents the low surrogate
r1, err := strconv.ParseUint(match[2:6], 16, 32)
if err != nil {
// If parsing fails, keep the original string
return match
}
r2, err := strconv.ParseUint(match[8:12], 16, 32)
if err != nil {
// If parsing fails, keep the original string
return match
}
// Combine the two code points into a single Unicode character
// This is necessary because certain characters are represented by pairs of code points
runeValue := utf16.DecodeRune(rune(uint16(r1)), rune(uint16(r2)))
// If the resulting character is invalid, keep the original surrogate pair
if runeValue == utf8.RuneError {
return match
}
// Return the decoded Unicode character
return string(runeValue)
})

return decoded
}


// normalizeAndLowerCase normalizes the string using NFC normalization form and converts it to lowercase.
func normalizeAndLowerCase(input string) string {
// Normalize the string using NFC normalization form
normalized := norm.NFC.String(input)

// replace full width characters with normalized e.g. ABC -> abc
normalized = replaceFullWidthChars(normalized)

// Convert to lowercase
lowercase := strings.ToLower(normalized)

return lowercase
}

// ReplaceFullWidthChars replaces full-width characters with their corresponding normal-width counterparts.
func replaceFullWidthChars(str string) string {
var sb strings.Builder
const fullWidthOffset = 0xfee0
for _, r := range str {
switch {
case r >= 0xFF01 && r <= 0xFF5E:
// Map full-width characters to their corresponding normal-width characters
sb.WriteRune(r - fullWidthOffset)
case r == '。':
// Replace full-width dot character with the regular dot character
sb.WriteRune('.')
default:
sb.WriteRune(r)
}
}
return sb.String()
}

Loading

0 comments on commit a9e7cfb

Please sign in to comment.