forked from loicalleyne/bodkin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
types.go
210 lines (206 loc) · 6.33 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
package bodkin
import (
"encoding/json"
"fmt"
"slices"
"time"
"github.com/apache/arrow-go/v18/arrow"
)
// goType2Arrow maps a Go type to an Arrow DataType.
func goType2Arrow(f *fieldPos, gt any) arrow.DataType {
var dt arrow.DataType
switch t := gt.(type) {
case []any:
return goType2Arrow(f, t[0])
case json.Number:
if _, err := t.Int64(); err == nil {
f.arrowType = arrow.INT64
dt = arrow.PrimitiveTypes.Int64
} else {
f.arrowType = arrow.FLOAT64
dt = arrow.PrimitiveTypes.Float64
}
case time.Time:
f.arrowType = arrow.TIMESTAMP
dt = arrow.FixedWidthTypes.Timestamp_us
// either 32 or 64 bits
case int:
f.arrowType = arrow.INT64
dt = arrow.PrimitiveTypes.Int64
// the set of all signed 8-bit integers (-128 to 127)
case int8:
f.arrowType = arrow.INT8
dt = arrow.PrimitiveTypes.Int8
// the set of all signed 16-bit integers (-32768 to 32767)
case int16:
f.arrowType = arrow.INT16
dt = arrow.PrimitiveTypes.Int16
// the set of all signed 32-bit integers (-2147483648 to 2147483647)
case int32:
f.arrowType = arrow.INT32
dt = arrow.PrimitiveTypes.Int32
// the set of all signed 64-bit integers (-9223372036854775808 to 9223372036854775807)
case int64:
f.arrowType = arrow.INT64
dt = arrow.PrimitiveTypes.Int64
// either 32 or 64 bits
case uint:
f.arrowType = arrow.UINT64
dt = arrow.PrimitiveTypes.Uint64
// the set of all unsigned 8-bit integers (0 to 255)
case uint8:
f.arrowType = arrow.UINT8
dt = arrow.PrimitiveTypes.Uint8
// the set of all unsigned 16-bit integers (0 to 65535)
case uint16:
f.arrowType = arrow.UINT16
dt = arrow.PrimitiveTypes.Uint16
// the set of all unsigned 32-bit integers (0 to 4294967295)
case uint32:
f.arrowType = arrow.UINT32
dt = arrow.PrimitiveTypes.Uint32
// the set of all unsigned 64-bit integers (0 to 18446744073709551615)
case uint64:
f.arrowType = arrow.UINT64
dt = arrow.PrimitiveTypes.Uint64
// the set of all IEEE-754 32-bit floating-point numbers
case float32:
f.arrowType = arrow.FLOAT32
dt = arrow.PrimitiveTypes.Float32
// the set of all IEEE-754 64-bit floating-point numbers
case float64:
f.arrowType = arrow.FLOAT64
dt = arrow.PrimitiveTypes.Float64
case bool:
f.arrowType = arrow.BOOL
dt = arrow.FixedWidthTypes.Boolean
case string:
if f.owner.inferTimeUnits {
for _, r := range timestampMatchers {
if r.MatchString(t) {
f.arrowType = arrow.TIMESTAMP
return arrow.FixedWidthTypes.Timestamp_us
}
}
if dateMatcher.MatchString(t) {
f.arrowType = arrow.DATE32
return arrow.FixedWidthTypes.Date32
}
if timeMatcher.MatchString(t) {
f.arrowType = arrow.TIME64
return arrow.FixedWidthTypes.Time64ns
}
}
if !f.owner.quotedValuesAreStrings {
if slices.Contains(boolMatcher, t) {
f.arrowType = arrow.BOOL
return arrow.FixedWidthTypes.Boolean
}
if integerMatcher.MatchString(t) {
f.arrowType = arrow.INT64
return arrow.PrimitiveTypes.Int64
}
if floatMatcher.MatchString(t) {
f.arrowType = arrow.FLOAT64
return arrow.PrimitiveTypes.Float64
}
}
f.arrowType = arrow.STRING
dt = arrow.BinaryTypes.String
case []byte:
f.arrowType = arrow.BINARY
dt = arrow.BinaryTypes.Binary
// the set of all complex numbers with float32 real and imaginary parts
case complex64:
// TO-DO
f.arrowType = arrow.NULL
f.err = fmt.Errorf("%v : %v", ErrUndefinedFieldType, f.namePath())
dt = arrow.BinaryTypes.Binary
// the set of all complex numbers with float64 real and imaginary parts
case complex128:
// TO-DO
f.arrowType = arrow.NULL
f.err = fmt.Errorf("%v : %v", ErrUndefinedFieldType, f.namePath())
dt = arrow.BinaryTypes.Binary
case nil:
f.arrowType = arrow.NULL
f.err = fmt.Errorf("%v : %v", ErrUndefinedFieldType, f.namePath())
dt = arrow.BinaryTypes.Binary
default:
// Catch-all for exotic unsupported types - ie. input field is a func
f.arrowType = arrow.NULL
f.err = fmt.Errorf("%v : %v", ErrUndefinedFieldType, f.namePath())
dt = arrow.BinaryTypes.Binary
}
return dt
}
func arrowTypeID2Type(f *fieldPos, t arrow.Type) arrow.DataType {
var dt arrow.DataType
switch t {
// BOOL is a 1 bit, LSB bit-packed ordering
case arrow.BOOL:
dt = arrow.FixedWidthTypes.Boolean
// the set of all signed 8-bit integers (-128 to 127)
case arrow.INT8:
dt = arrow.PrimitiveTypes.Int8
// the set of all unsigned 8-bit integers (0 to 255)
case arrow.UINT8:
dt = arrow.PrimitiveTypes.Uint8
// the set of all signed 16-bit integers (-32768 to 32767)
case arrow.INT16:
dt = arrow.PrimitiveTypes.Int16
// the set of all unsigned 16-bit integers (0 to 65535)
case arrow.UINT16:
dt = arrow.PrimitiveTypes.Uint16
// the set of all signed 32-bit integers (-2147483648 to 2147483647)
case arrow.INT32:
dt = arrow.PrimitiveTypes.Int32
// the set of all unsigned 32-bit integers (0 to 4294967295)
case arrow.UINT32:
dt = arrow.PrimitiveTypes.Uint32
// the set of all signed 64-bit integers (-9223372036854775808 to 9223372036854775807)
case arrow.INT64:
dt = arrow.PrimitiveTypes.Int64
// the set of all unsigned 64-bit integers (0 to 18446744073709551615)
case arrow.UINT64:
dt = arrow.PrimitiveTypes.Uint64
// the set of all IEEE-754 32-bit floating-point numbers
case arrow.FLOAT32:
dt = arrow.PrimitiveTypes.Float32
// the set of all IEEE-754 64-bit floating-point numbers
case arrow.FLOAT64:
dt = arrow.PrimitiveTypes.Float64
// TIMESTAMP is an exact timestamp encoded with int64 since UNIX epoch
case arrow.TIMESTAMP:
dt = arrow.FixedWidthTypes.Timestamp_us
// DATE32 is int32 days since the UNIX epoch
case arrow.DATE32:
dt = arrow.FixedWidthTypes.Date32
// TIME64 is a signed 64-bit integer, representing either microseconds or
// nanoseconds since midnight
case arrow.TIME64:
dt = arrow.FixedWidthTypes.Time64ns
// STRING is a UTF8 variable-length string
case arrow.STRING:
dt = arrow.BinaryTypes.String
// BINARY is a Variable-length byte type (no guarantee of UTF8-ness)
case arrow.BINARY:
dt = arrow.BinaryTypes.Binary
// NULL type having no physical storage
case arrow.NULL:
dt = arrow.BinaryTypes.Binary
case arrow.STRUCT:
var fields []arrow.Field
for _, c := range f.children {
fields = append(fields, c.field)
}
return arrow.StructOf(fields...)
case arrow.LIST:
var fields []arrow.Field
for _, c := range f.children {
fields = append(fields, c.field)
}
return arrow.StructOf(fields...)
}
return dt
}