Skip to content

Commit

Permalink
Typecast array datatype to string in all the migrations (#623)
Browse files Browse the repository at this point in the history
* Typecast array datatype to string in all the migrations

* added comment

* fixing integration test

* addressing comment
  • Loading branch information
shreyakhajanchi committed Aug 9, 2023
1 parent 8ae82ae commit 9faa823
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 22 deletions.
2 changes: 1 addition & 1 deletion internal/reports/report_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ var IssueDB = map[internal.SchemaIssue]struct {
internal.RowLimitExceeded: {Brief: "Non key columns exceed the spanner limit of 1600 MB. Please modify the column sizes", severity: errors},
internal.ShardIdColumnAdded: {Brief: "column was added because this is a sharded migration and this column cannot be dropped", severity: note},
internal.ShardIdColumnPrimaryKey: {Brief: "column is not a part of primary key. You may go to the Primary Key tab and add this column as a part of Primary Key", severity: suggestion},
internal.ArrayTypeNotSupported: {Brief: "Array datatype is not supported in minimal downtime migration", severity: warning},
internal.ArrayTypeNotSupported: {Brief: "Array datatype migration is not fully supported. Please validate data after data migration", severity: warning},
}

type severity int
Expand Down
7 changes: 3 additions & 4 deletions sources/mysql/mysqldump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"testing"
"time"

"cloud.google.com/go/spanner"
"github.com/GoogleCloudPlatform/spanner-migration-tool/internal"
"github.com/GoogleCloudPlatform/spanner-migration-tool/sources/common"
"github.com/GoogleCloudPlatform/spanner-migration-tool/spanner/ddl"
Expand Down Expand Up @@ -78,7 +77,7 @@ func TestProcessMySQLDump_SingleCol(t *testing.T) {
ty string
expected ddl.ColumnDef
}{
{"set('a','b','c')", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: "STRING", Len: 9223372036854775807, IsArray: true}, NotNull: false, Comment: ""}},
{"set('a','b','c')", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: "STRING", Len: 9223372036854775807, IsArray: false}, NotNull: false, Comment: ""}},
{"text NOT NULL", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength}, NotNull: true}},
}

Expand Down Expand Up @@ -821,15 +820,15 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
table: "test", cols: []string{"a", "b", "c", "d", "e", "f", "synth_id"},
vals: []interface{}{int64(7), float64(42.1), true,
getDate("2019-10-29"), []byte{0x89, 0x50},
[]spanner.NullString{{StringVal: "42", Valid: true}, {StringVal: "6", Valid: true}},
"42,6",
fmt.Sprintf("%d", bitReverse(0))}},
spannerData{table: "test", cols: []string{"a", "synth_id"}, vals: []interface{}{int64(7), fmt.Sprintf("%d", bitReverse(1))}},
spannerData{table: "test", cols: []string{"b", "synth_id"}, vals: []interface{}{float64(42.1), fmt.Sprintf("%d", bitReverse(2))}},
spannerData{table: "test", cols: []string{"c", "synth_id"}, vals: []interface{}{true, fmt.Sprintf("%d", bitReverse(3))}},
spannerData{table: "test", cols: []string{"d", "synth_id"}, vals: []interface{}{getDate("2019-10-29"), fmt.Sprintf("%d", bitReverse(4))}},
spannerData{table: "test", cols: []string{"e", "synth_id"}, vals: []interface{}{[]byte{0x89, 0x50}, fmt.Sprintf("%d", bitReverse(5))}},
spannerData{table: "test", cols: []string{"f", "synth_id"},
vals: []interface{}{[]spanner.NullString{{StringVal: "42", Valid: true}, {StringVal: "6", Valid: true}}, fmt.Sprintf("%d", bitReverse(6))}},
vals: []interface{}{"42,6", fmt.Sprintf("%d", bitReverse(6))}},
},
},
}
Expand Down
4 changes: 3 additions & 1 deletion sources/mysql/toddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ func (tdi ToDdlImpl) ToSpannerType(conv *internal.Conv, spType string, srcType s
if len(srcType.ArrayBounds) > 1 {
ty = ddl.Type{Name: ddl.String, Len: ddl.MaxLength}
issues = append(issues, internal.MultiDimensionalArray)
} else if len(srcType.ArrayBounds) == 1 {
ty = ddl.Type{Name: ddl.String, Len: ddl.MaxLength}
issues = append(issues, internal.ArrayTypeNotSupported)
}
ty.IsArray = len(srcType.ArrayBounds) == 1
if conv.SpDialect == constants.DIALECT_POSTGRESQL {
ty = common.ToPGDialectType(ty)
}
Expand Down
4 changes: 2 additions & 2 deletions sources/postgres/infoschema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ func TestProcessSchema(t *testing.T) {
ColIds: []string{"id", "aint", "atext", "b", "bs", "by", "c", "c_8", "d", "f8", "f4", "i8", "i4", "i2", "num", "s", "ts", "tz", "txt", "vc", "vc6"},
ColDefs: map[string]ddl.ColumnDef{
"id": ddl.ColumnDef{Name: "id", T: ddl.Type{Name: ddl.Int64}, NotNull: true},
"aint": ddl.ColumnDef{Name: "aint", T: ddl.Type{Name: ddl.Int64, IsArray: true}},
"atext": ddl.ColumnDef{Name: "atext", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: true}},
"aint": ddl.ColumnDef{Name: "aint", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: false}},
"atext": ddl.ColumnDef{Name: "atext", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: false}},
"b": ddl.ColumnDef{Name: "b", T: ddl.Type{Name: ddl.Bool}},
"bs": ddl.ColumnDef{Name: "bs", T: ddl.Type{Name: ddl.Int64}, NotNull: true},
"by": ddl.ColumnDef{Name: "by", T: ddl.Type{Name: ddl.Bytes, Len: ddl.MaxLength}},
Expand Down
14 changes: 8 additions & 6 deletions sources/postgres/pgdump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ func TestProcessPgDump(t *testing.T) {
}{
{"text", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength}}},
{"text NOT NULL", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength}, NotNull: true}},
{"text array[4]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: true}}},
{"text[4]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: true}}},
{"text[]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: true}}},
{"text array[4]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: false}}},
{"text[4]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: false}}},
{"text[]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength, IsArray: false}}},
{"text[][]", ddl.ColumnDef{Name: "a", T: ddl.Type{Name: ddl.String, Len: ddl.MaxLength}}}, // Unrecognized array type mapped to string.
}
for _, tc := range singleColTests {
Expand Down Expand Up @@ -699,7 +699,7 @@ COPY test (id, a, b, c, d, e, f, g) FROM stdin;
table: "test", cols: []string{"int8", "float8", "bool", "timestamp", "date", "bytea", "arr", "synth_id"},
vals: []interface{}{int64(7), float64(42.1), true, getTime(t, "2019-10-29T05:30:00Z"),
getDate("2019-10-29"), []byte{0x0, 0x1, 0xbe, 0xef},
[]spanner.NullInt64{{Int64: 42, Valid: true}, {Int64: 6, Valid: true}},
"{42,6}",
fmt.Sprintf("%d", bitReverse(0))}},
spannerData{table: "test", cols: []string{"int8", "synth_id"}, vals: []interface{}{int64(7), fmt.Sprintf("%d", bitReverse(1))}},
spannerData{table: "test", cols: []string{"float8", "synth_id"}, vals: []interface{}{float64(42.1), fmt.Sprintf("%d", bitReverse(2))}},
Expand All @@ -708,14 +708,16 @@ COPY test (id, a, b, c, d, e, f, g) FROM stdin;
spannerData{table: "test", cols: []string{"date", "synth_id"}, vals: []interface{}{getDate("2019-10-29"), fmt.Sprintf("%d", bitReverse(5))}},
spannerData{table: "test", cols: []string{"bytea", "synth_id"}, vals: []interface{}{[]byte{0x0, 0x1, 0xbe, 0xef}, fmt.Sprintf("%d", bitReverse(6))}},
spannerData{table: "test", cols: []string{"arr", "synth_id"},
vals: []interface{}{[]spanner.NullInt64{{Int64: 42, Valid: true}, {Int64: 6, Valid: true}}, fmt.Sprintf("%d", bitReverse(7))}},
vals: []interface{}{"{42,6}", fmt.Sprintf("%d", bitReverse(7))}},
spannerData{table: "test", cols: []string{"arr", "synth_id"},
vals: []interface{}{"{42, 6}", fmt.Sprintf("%d", bitReverse(8))}},
},
},
}
for _, tc := range dataErrorTests {
conv, rows := runProcessPgDump(tc.input)
assert.Equal(t, tc.expectedData, rows, tc.name+": Data rows did not match")
assert.Equal(t, conv.BadRows(), int64(7), tc.name+": Error count did not match")
assert.Equal(t, conv.BadRows(), int64(6), tc.name+": Error count did not match")
}
}

Expand Down
6 changes: 5 additions & 1 deletion sources/postgres/toddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ func (tdi ToDdlImpl) ToSpannerType(conv *internal.Conv, spType string, srcType s
if len(srcType.ArrayBounds) > 1 {
ty = ddl.Type{Name: ddl.String, Len: ddl.MaxLength}
issues = append(issues, internal.MultiDimensionalArray)
} else if len(srcType.ArrayBounds) == 1 {
// This check has been added because we don't support Array<primitive type> to string conversions
// and Array datatype is currently not supported in datastream.
ty = ddl.Type{Name: ddl.String, Len: ddl.MaxLength}
issues = append(issues, internal.ArrayTypeNotSupported)
}
ty.IsArray = len(srcType.ArrayBounds) == 1
if conv.SpDialect == constants.DIALECT_POSTGRESQL {
ty = common.ToPGDialectType(ty)
}
Expand Down
2 changes: 1 addition & 1 deletion test_data/postgres_structured_report.json
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
"warningType":"Warnings",
"warningList":[
"Column 'synth_id' was added because table 'no_pk' didn't have a primary key. Spanner requires a primary key for every table",
"Table 'no_pk': Column a, Array datatype is not supported in minimal downtime migration",
"Table 'no_pk': Column a, Array datatype migration is not fully supported. Please validate data after data migration",
"Table no_pk: Some columns will consume more storage in Spanner e.g. for column 'b', source DB type int4 is mapped to Spanner data type int64"
]
}
Expand Down
4 changes: 2 additions & 2 deletions test_data/postgres_text_report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ Data conversion: POOR (60% of 5000 rows written to Spanner).
Warnings
1) Column 'synth_id' was added because table 'no_pk' didn't have a primary key.
Spanner requires a primary key for every table.
2) Table 'no_pk': Column a, Array datatype is not supported in minimal downtime
migration.
2) Table 'no_pk': Column a, Array datatype migration is not fully supported.
Please validate data after data migration.
3) Table no_pk: Some columns will consume more storage in Spanner e.g. for column
'b', source DB type int4 is mapped to Spanner data type int64.

Expand Down
8 changes: 4 additions & 4 deletions testing/postgres/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,8 @@ func checkCoreTypes(ctx context.Context, t *testing.T, client *spanner.Client) {
}

func checkArrays(ctx context.Context, t *testing.T, client *spanner.Client) {
var ints []int64
var strs []string
var ints string
var strs string
iter := client.Single().Read(ctx, "test3", spanner.Key{1}, []string{"a", "b"})
defer iter.Stop()
for {
Expand All @@ -316,10 +316,10 @@ func checkArrays(ctx context.Context, t *testing.T, client *spanner.Client) {
t.Fatal(err)
}
}
if got, want := ints, []int64{1, 2, 3}; !reflect.DeepEqual(got, want) {
if got, want := ints, "{1,2,3}"; !reflect.DeepEqual(got, want) {
t.Fatalf("integer array is not correct: got %v, want %v", got, want)
}
if got, want := strs, []string{"1", "nice", "foo"}; !reflect.DeepEqual(got, want) {
if got, want := strs, "{1,nice,foo}"; !reflect.DeepEqual(got, want) {
t.Fatalf("string array is not correct: got %v, want %v", got, want)
}
}
Expand Down

0 comments on commit 9faa823

Please sign in to comment.