-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf(idntranslator.go): deprecated API IDN Conversion and integrated …
…GOLang IDN library
- Loading branch information
1 parent
e2f7df8
commit a9e7cfb
Showing
11 changed files
with
1,231 additions
and
1,770 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ coverage.html | |
node_modules | ||
|
||
# go binary | ||
go-sdk | ||
rtldev-middleware-go-sdk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,15 @@ | ||
module github.com/centralnicgroup-opensource/rtldev-middleware-go-sdk/v3 | ||
|
||
go 1.20 | ||
go 1.22.3 | ||
|
||
require ( | ||
github.com/stretchr/testify v1.9.0 | ||
golang.org/x/net v0.25.0 | ||
golang.org/x/text v0.15.0 | ||
) | ||
|
||
require ( | ||
github.com/davecgh/go-spew v1.1.1 // indirect | ||
github.com/pmezard/go-difflib v1.0.0 // indirect | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= | ||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= | ||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= | ||
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= | ||
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= | ||
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= | ||
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= | ||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | ||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= | ||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,201 @@ | ||
// https://pkg.go.dev/golang.org/x/net/idna | ||
package idntranslator | ||
|
||
// Copyright (c) 2018 Kai Schwarz (HEXONET GmbH). All rights reserved. | ||
// | ||
// Use of this source code is governed by the MIT | ||
// license that can be found in the LICENSE.md file. | ||
import ( | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
"unicode/utf16" | ||
"unicode/utf8" | ||
|
||
// Package idntranslator provides basic functionality to customize the API response description | ||
package idntranslator | ||
"golang.org/x/net/idna" | ||
"golang.org/x/text/unicode/norm" | ||
) | ||
|
||
// Row represents a row in the translation result. | ||
type Row struct { | ||
IDN string | ||
PUNYCODE string | ||
} | ||
|
||
type IdnTranslatorRow struct { | ||
Idn string | ||
Punycode string | ||
// interfaceToStringSlice converts the input interface to a slice of strings. | ||
func interfaceToStringSlice(input interface{}) []string { | ||
switch v := input.(type) { | ||
case string: | ||
return []string{v} | ||
case []string: | ||
return v | ||
default: | ||
return nil | ||
} | ||
} | ||
|
||
// Convert function for converting a domain to idn + punycode | ||
func Convert(domains []string, options map[string]string) []IdnTranslatorRow { | ||
translated := []IdnTranslatorRow{}; | ||
// Convert converts a domain string or a slice of domain strings between Unicode and Punycode formats. | ||
func Convert(domainOrDomains interface{}) []Row { | ||
domains := interfaceToStringSlice(domainOrDomains) | ||
|
||
for idx, domain range domains { | ||
translated[idx] = IdnTranslatorRow{ | ||
Idn: IdnTranslator.toUnicode(domain, options), | ||
Punycode: IdnTranslator.toASCII(domain, options), | ||
} | ||
var translated []Row | ||
|
||
for _, domain := range domains { | ||
idn, punycode := handleConversion(domain) | ||
translated = append(translated, Row{IDN: idn, PUNYCODE: punycode}) | ||
} | ||
|
||
return translated | ||
} | ||
|
||
// handleConversion handles conversion of a keyword between Unicode and Punycode formats. | ||
func handleConversion(keyword string) (string, string) { | ||
if keyword == "" { | ||
return "", "" | ||
} | ||
|
||
return translated; | ||
return ToUnicode(keyword), ToASCII(keyword) | ||
} | ||
|
||
// ToUnicode converts a domain string to Unicode format. | ||
func ToUnicode(asciiString string, transitionalProcessing ...bool) string { | ||
decodedKeyword := decodeUnicodeEscapes(asciiString) | ||
// Define the IDNA options | ||
opts := idna.New( | ||
idna.MapForLookup(), | ||
idna.Transitional(isTransitionalProcessing(asciiString, transitionalProcessing...)), // Map ß -> ss | ||
idna.StrictDomainName(false)) // Set more permissive ASCII rules. | ||
|
||
// Convert the Unicode string to Punycode using the specified options | ||
unicode, err := opts.ToUnicode(decodedKeyword) | ||
if err != nil { | ||
// Handle the error appropriately | ||
return asciiString // Return the original string if conversion fails | ||
} | ||
return unicode | ||
} | ||
|
||
// ToASCII converts a Unicode string to Punycode format. | ||
func ToASCII(unicodeString string, transitionalProcessing ...bool) string { | ||
// Define the IDNA options | ||
opts := idna.New( | ||
idna.MapForLookup(), | ||
idna.Transitional(isTransitionalProcessing(unicodeString, transitionalProcessing...)), // Map ß -> ss | ||
idna.StrictDomainName(false)) // Set more permissive ASCII rules. | ||
|
||
// Convert the Unicode string to Punycode using the specified options | ||
punycode, err := opts.ToASCII(unicodeString) | ||
if err != nil { | ||
// Handle the error appropriately | ||
return unicodeString // Return the original string if conversion fails | ||
} | ||
|
||
return punycode | ||
} | ||
|
||
// DecodeUnicodeEscapes decodes Unicode escape sequences in a string, normalizes it, and converts it to lowercase. | ||
func decodeUnicodeEscapes(unicodeString string) string { | ||
decoded := decodeUnicodeEscapeSequences(unicodeString) | ||
normalized := normalizeAndLowerCase(decoded) | ||
return normalized | ||
} | ||
|
||
func ToUnicode(domain string, options map[string]string) string { | ||
idn := domain | ||
return idn | ||
// isTransitionalProcessing checks if the provided top-level domain (TLD) is non-transitional. | ||
func isTransitionalProcessing(keyword string, transitionalProcessing ...bool) bool { | ||
if len(transitionalProcessing) > 0 { | ||
return transitionalProcessing[0] | ||
} | ||
|
||
transitionalTLDs := []string{"be", "ca", "de", "fr", "pm", "re", "swiss", "tf", "wf", "yt"} | ||
regex := `\.(` + strings.Join(transitionalTLDs, "|") + `)\.?` | ||
re := regexp.MustCompile(regex) | ||
return re.MatchString(strings.ToLower(keyword)) | ||
} | ||
|
||
func ToASCII(domain string, options map[string]string) string { | ||
ascii := domain | ||
return ascii | ||
// decodeUnicodeEscapeSequences decodes Unicode escape sequences in a string. | ||
func decodeUnicodeEscapeSequences(unicodeString string) string { | ||
// Regular expression to match Unicode escape sequences and surrogate pairs | ||
reUnicode := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`) | ||
reHex := regexp.MustCompile(`\\x([0-9a-fA-F]{2})`) | ||
decoded := reUnicode.ReplaceAllStringFunc(unicodeString, func(match string) string { | ||
hexValue := match[2:] | ||
code, err := strconv.ParseUint(hexValue, 16, 32) | ||
if err != nil { | ||
return match // Return the original string if parsing fails | ||
} | ||
r := rune(code) | ||
if utf16.IsSurrogate(r) { | ||
return match // Leave surrogate pairs to be processed together | ||
} | ||
return string(r) | ||
}) | ||
decoded = reHex.ReplaceAllStringFunc(decoded, func(match string) string { | ||
hexValue := match[2:] | ||
code, err := strconv.ParseUint(hexValue, 16, 8) | ||
if err != nil { | ||
return match // Return the original string if parsing fails | ||
} | ||
return string([]byte{byte(code)}) | ||
}) | ||
|
||
// Process surrogate pairs: Surrogate pairs are used in UTF-16 encoding to represent characters outside the Basic Multilingual Plane (BMP). | ||
// These characters are represented by pairs of 16-bit code units called surrogates. | ||
// In Unicode escape sequences, surrogate pairs are represented as two consecutive escape sequences: \udXXX\udYYY. | ||
// This regular expression captures these surrogate pair patterns. | ||
reSurrogatePair := regexp.MustCompile(`\\[ux](d[89ab][0-9a-fA-F]{2})\\[ux](d[c-f][0-9a-fA-F]{2})`) | ||
decoded = reSurrogatePair.ReplaceAllStringFunc(decoded, func(match string) string { | ||
// Extract the hexadecimal values for the surrogate pair | ||
// The first value represents the high surrogate, and the second represents the low surrogate | ||
r1, err := strconv.ParseUint(match[2:6], 16, 32) | ||
if err != nil { | ||
// If parsing fails, keep the original string | ||
return match | ||
} | ||
r2, err := strconv.ParseUint(match[8:12], 16, 32) | ||
if err != nil { | ||
// If parsing fails, keep the original string | ||
return match | ||
} | ||
// Combine the two code points into a single Unicode character | ||
// This is necessary because certain characters are represented by pairs of code points | ||
runeValue := utf16.DecodeRune(rune(uint16(r1)), rune(uint16(r2))) | ||
// If the resulting character is invalid, keep the original surrogate pair | ||
if runeValue == utf8.RuneError { | ||
return match | ||
} | ||
// Return the decoded Unicode character | ||
return string(runeValue) | ||
}) | ||
|
||
return decoded | ||
} | ||
|
||
|
||
// normalizeAndLowerCase normalizes the string using NFC normalization form and converts it to lowercase. | ||
func normalizeAndLowerCase(input string) string { | ||
// Normalize the string using NFC normalization form | ||
normalized := norm.NFC.String(input) | ||
|
||
// replace full width characters with normalized e.g. ABC -> abc | ||
normalized = replaceFullWidthChars(normalized) | ||
|
||
// Convert to lowercase | ||
lowercase := strings.ToLower(normalized) | ||
|
||
return lowercase | ||
} | ||
|
||
// ReplaceFullWidthChars replaces full-width characters with their corresponding normal-width counterparts. | ||
func replaceFullWidthChars(str string) string { | ||
var sb strings.Builder | ||
const fullWidthOffset = 0xfee0 | ||
for _, r := range str { | ||
switch { | ||
case r >= 0xFF01 && r <= 0xFF5E: | ||
// Map full-width characters to their corresponding normal-width characters | ||
sb.WriteRune(r - fullWidthOffset) | ||
case r == '。': | ||
// Replace full-width dot character with the regular dot character | ||
sb.WriteRune('.') | ||
default: | ||
sb.WriteRune(r) | ||
} | ||
} | ||
return sb.String() | ||
} | ||
|
Oops, something went wrong.