-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathu5xml_test.go
102 lines (93 loc) · 3.37 KB
/
u5xml_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package u8xml
import (
"bytes"
"errors"
"io"
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDetectEncoding(t *testing.T) {
tests := []struct {
name string
input []byte
expect string
bomLen int
}{
{"UTF-8 with BOM", []byte{0xEF, 0xBB, 0xBF, 't', 'e', 's', 't'}, "UTF-8", 3},
{"UTF-16BE with BOM", []byte{0xFE, 0xFF, 0, 't', 0, 'e', 0, 's', 0, 't'}, "UTF-16BE", 2},
{"UTF-16LE with BOM", []byte{0xFF, 0xFE, 't', 0, 'e', 0, 's', 0, 't', 0}, "UTF-16LE", 2},
{"UTF-32BE with BOM", []byte{0, 0, 0xFE, 0xFF, 0, 0, 't', 0, 0, 0, 'e', 0, 0, 's', 0, 0, 't', 0, 0, 0}, "UTF-32BE", 4},
{"UTF-32LE with BOM", []byte{0xFF, 0xFE, 0, 0, 't', 0, 0, 0, 'e', 0, 0, 's', 0, 0, 't', 0, 0, 0, 0, 0}, "UTF-32LE", 4},
{"UTF-8 without BOM", []byte{'t', 'e', 's', 't'}, "UTF-8", 0},
{`XML declaration with "ISO-8859-1" encoding attribute`, []byte(`<?xml encoding="ISO-8859-1"?>`), "ISO-8859-1", 0},
{`XML declaration with 'ISO-8859-1' encoding attribute`, []byte(`<?xml encoding='ISO-8859-1'?>`), "ISO-8859-1", 0},
{"XML declaration with empty encoding attribute", []byte(`<?xml?>`), "UTF-8", 0},
{"XML declaration with unclosed encoding attribute", []byte(`<?xml encoding="ISO-8859-1`), "UTF-8", 0},
{"Too small buffer", []byte(`A`), "UTF-8", 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
enc, bomLen := DetectEncoding(tt.input)
assert.Equal(t, tt.expect, enc)
assert.Equal(t, tt.bomLen, bomLen)
})
}
}
func TestNewReader(t *testing.T) {
tests := []struct {
name string
input string
expect string
err error
}{
{"UTF-8 with BOM", "\xEF\xBB\xBFtest", "test", nil},
{"UTF-8 without BOM and without XMP declaration", "test", "test", nil},
{"UTF-16LE", "\xFF\xFE\x74\x00\x65\x00\x73\x00\x74\x00", "test", nil},
{"Windows-1251", "<?xml encoding=\"Windows-1251\"?>\xC1\xF3\xEB\xE3\xE0\xEA\xEE\xE2", "<?xml encoding=\"Windows-1251\"?>Булгаков", nil},
{"IANA Unsupported encoding", "<?xml encoding=\"Windows-1\"?>\xC1\xF3\xEB\xE3\xE0\xEA\xEE\xE2", "<?xml encoding=\"Windows-1\"?>Булгаков", errors.New("ianaindex: invalid encoding name")},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := bytes.NewReader([]byte(tt.input))
reader, err := NewReader(r)
if tt.err != nil {
assert.Equal(t, tt.err, err)
return
}
assert.Nil(t, err)
b, err := io.ReadAll(reader)
assert.Nil(t, err)
assert.Equal(t, tt.expect, string(b))
})
}
}
type person struct {
Name string `xml:"Name"`
Age int `xml:"Age"`
}
func TestNewDecoder(t *testing.T) {
tests := []struct {
name string
file string
expect string
}{
{"iso-8859-1", "test-samples/iso-8859-1.xml", "Gabriel García Márquez ISO-8859-1"},
{"iso-8859-2", "test-samples/iso-8859-2.xml", "Ľudovít Štúr ISO-8859-2"},
{"windows-1251", "test-samples/windows-1251.xml", "Михаил Афанасьевич Булгаков Windows-1251"},
{"utf-16", "test-samples/utf-16.xml", "Ľudovít Štúr utf-16"},
{"utf-16le.", "test-samples/utf-16le.xml", "Ľudovít Štúr utf-16le"},
{"utf-16be", "test-samples/utf-16be.xml", "Ľudovít Štúr utf-16be"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f, err := os.ReadFile(tt.file)
assert.Nil(t, err)
d := NewDecoder(bytes.NewReader(f))
var p person
err = d.Decode(&p)
assert.Nil(t, err)
assert.Equal(t, tt.expect, p.Name)
})
}
}