Support FLOAT32 type in Spanner Migration Tool.

This changes the existing behavior of widening a source float32 type into float64 in spanner, and instead mapping the source float32 type to spanner's new float32 type. We still allow float32 to be manually mapped to float64 if the customer intends to do that.
GoogleCloudPlatform · Jul 19, 2024 · f8f14f9 · f8f14f9
1 parent c68b36b
commit f8f14f9
Show file tree

Hide file tree

Showing 41 changed files with 460 additions and 218 deletions.
diff --git a/docs/data-types/mysql.md b/docs/data-types/mysql.md
@@ -38,7 +38,7 @@ The Spanner migration tool maps MySQL types to Spanner types as follows:
 | `DECIMAL`, `NUMERIC`                              | `NUMERIC`        | potential changes of precision                           |
 | `DOUBLE`                                          | `FLOAT64`        |                                                          |
 | `ENUM`                                            | `STRING(MAX)`    |                                                          |
-| `FLOAT`                                           | `FLOAT64`        | changes in storage size                                  |
+| `FLOAT`                                           | `FLOAT32`        |                                                          |
 | `INTEGER`, `MEDIUMINT`,<br/>`TINYINT`, `SMALLINT` | `INT64`          | changes in storage size                                  |
 | `JSON`                                            | `JSON`           |                                                          |
 | `SET`                                             | `ARRAY<STRING>`  | SET only supports string values                          |

diff --git a/docs/data-types/oracle.md b/docs/data-types/oracle.md
@@ -32,7 +32,7 @@ In Oracle DB, USER is the account name, SCHEMA is the set of objects owned by th
 | ---------------------- | ------------ |
 | NUMBER (* , 0)         | INT64        |
 | FLOAT                  | FLOAT64      |
-| BINARY_FLOAT           | FLOAT64      |
+| BINARY_FLOAT           | FLOAT32      |
 | BINARY_DOUBLE          | FLOAT64      |
 | NUMBER (* , >0)        | NUMERIC      |
 | CHAR                   | STRING(1)    |

diff --git a/docs/data-types/postgres.md b/docs/data-types/postgres.md
@@ -36,7 +36,7 @@ The Spanner migration tool maps PostgreSQL types to Spanner types as follows:
 | `DOUBLE PRECISION` | `FLOAT64`              |                                                               |
 | `INTEGER`          | `INT64`                | changes in storage size                                       |
 | `NUMERIC`          | `NUMERIC`              | potential changes of precision                                |
-| `REAL`             | `FLOAT64`              | changes in storage size                                       |
+| `REAL`             | `FLOAT32`              |                                                               |
 | `SERIAL`           | `INT64`                | dropped autoincrement functionality , changes in storage size |
 | `SMALLINT`         | `INT64`                | changes in storage size                                       |
 | `TEXT`             | `STRING(MAX)`          |                                                               |
@@ -109,7 +109,7 @@ arrays.
 ## Arrays
 
 Spanner does not support multi-dimensional arrays. So while `TEXT[4]` maps to
-`ARRAY<STRING(MAX)>` and `REAL ARRAY` maps to `ARRAY<FLOAT64>`, `TEXT[][]` maps
+`ARRAY<STRING(MAX)>` and `REAL ARRAY` maps to `ARRAY<FLOAT32>`, `TEXT[][]` maps
 to `STRING(MAX)`.
 
 Also note that PosgreSQL supports array limits, but the PostgreSQL

diff --git a/docs/data-types/sqlserver.md b/docs/data-types/sqlserver.md
@@ -31,7 +31,7 @@ Spanner migration tool makes some assumptions while performing data type convers
 | ROWVERSION             | INT64        |
 | BIT                    | BOOL         |
 | FLOAT                  | FLOAT64      |
-| REAL                   | FLOAT64      |
+| REAL                   | FLOAT32      |
 | NUMERIC                | NUMERIC      |
 | DECIMAL                | NUMERIC      |
 | MONEY                  | NUMERIC      |
@@ -60,13 +60,13 @@ Spanner migration tool makes some assumptions while performing data type convers
 
 ## Spatial datatypes
 
-SQL Server supports `SPATIAL GEOGRAPHY` and `SPATIAL GEOMETRY` datatypes however, Spanner 
+SQL Server supports `SPATIAL GEOGRAPHY` and `SPATIAL GEOMETRY` datatypes however, Spanner
 does not support spatial data types.
 These datatype are currently mapped to standard `STRING` Spanner datatype.
 
 ## TIMESTAMP
 
-The `TIMESTAMP` datatype (deprecated in the newer versions of SQL Server) 
+The `TIMESTAMP` datatype (deprecated in the newer versions of SQL Server)
 was used for Row versioning. Hence, it is mapped to INT64 to keep it consistent
 with the `ROWVERSION` data type.
 
@@ -83,10 +83,10 @@ primary keys for all tables, but does not enforce this. When converting a table
 without a primary key:
 
 - Spanner migration tool will check for `UNIQUE` constraints on the table. If found, it
-will automatically pick any one of the unique constraints and convert it to a 
+will automatically pick any one of the unique constraints and convert it to a
 primary key.
-- If no `UNIQUE` constraints are present, Spanner migration tool will create a new primary 
-key column of type INT64. By default, the name of the new column is `synth_id`. 
+- If no `UNIQUE` constraints are present, Spanner migration tool will create a new primary
+key column of type INT64. By default, the name of the new column is `synth_id`.
 - If there is already a column with that name, then a variation is used to avoid collisions.
 
 ## NOT NULL Constraints
@@ -118,6 +118,6 @@ maps `UNIQUE` constraint into `UNIQUE` secondary index.
 ## Other SQL Server features
 
 SQL Server has many other features we haven't discussed, including functions,
-sequences, procedures, triggers and views which are currently not supported in Spanner. 
+sequences, procedures, triggers and views which are currently not supported in Spanner.
 The tool does not support these and the relevant schema info is ignored during schema
 conversion.
diff --git a/sources/common/utils.go b/sources/common/utils.go
@@ -232,6 +232,7 @@ func ToPGDialectType(standardType ddl.Type) ddl.Type {
 var DATATYPE_TO_STORAGE_SIZE = map[string]int{
 	ddl.Bool:      1,
 	ddl.Date:      4,
+	ddl.Float32:   4,
 	ddl.Float64:   8,
 	ddl.Int64:     8,
 	ddl.JSON:      ddl.StringMaxLength,

diff --git a/sources/csv/data.go b/sources/csv/data.go
@@ -379,6 +379,24 @@ func convArray(spannerType ddl.Type, val string) (interface{}, error) {
 			r = append(r, spanner.NullDate{Date: date, Valid: true})
 		}
 		return r, nil
+	case ddl.Float32:
+		var r []spanner.NullFloat32
+		for _, s := range a {
+			if s == "NULL" {
+				r = append(r, spanner.NullFloat32{Valid: false})
+				continue
+			}
+			s, err := processQuote(s)
+			if err != nil {
+				return []spanner.NullFloat32{}, err
+			}
+			f, err := convFloat32(s)
+			if err != nil {
+				return []spanner.NullFloat32{}, err
+			}
+			r = append(r, spanner.NullFloat32{Float32: f, Valid: true})
+		}
+		return r, nil
 	case ddl.Float64:
 		var r []spanner.NullFloat64
 		for _, s := range a {
@@ -477,6 +495,8 @@ func convScalar(conv *internal.Conv, spannerType ddl.Type, val string) (interfac
 		return convBytes(val)
 	case ddl.Date:
 		return convDate(val)
+	case ddl.Float32:
+		return convFloat32(val)
 	case ddl.Float64:
 		return convFloat64(val)
 	case ddl.Int64:
@@ -519,6 +539,14 @@ func convDate(val string) (civil.Date, error) {
 	return d, err
 }
 
+func convFloat32(val string) (float32, error) {
+	f, err := strconv.ParseFloat(val, 32)
+	if err != nil {
+		return float32(f), fmt.Errorf("can't convert to float32: %w", err)
+	}
+	return float32(f), err
+}
+
 func convFloat64(val string) (float64, error) {
 	f, err := strconv.ParseFloat(val, 64)
 	if err != nil {

diff --git a/sources/csv/data_test.go b/sources/csv/data_test.go
@@ -201,6 +201,7 @@ func TestConvertData(t *testing.T) {
 		{"bool", ddl.Type{Name: ddl.Bool}, "true", true},
 		{"bytes", ddl.Type{Name: ddl.Bytes, Len: ddl.MaxLength}, string([]byte{137, 80}), []byte{0x89, 0x50}},
 		{"date", ddl.Type{Name: ddl.Date}, "2019-10-29", getDate("2019-10-29")},
+		{"float32", ddl.Type{Name: ddl.Float32}, "3.14", float32(3.14)},
 		{"float64", ddl.Type{Name: ddl.Float64}, "42.6", float64(42.6)},
 		{"int64", ddl.Type{Name: ddl.Int64}, "42", int64(42)},
 		{"numeric", ddl.Type{Name: ddl.Numeric}, "42.6", *big.NewRat(426, 10)},
@@ -209,7 +210,8 @@ func TestConvertData(t *testing.T) {
 		{"json", ddl.Type{Name: ddl.JSON}, "{\"key1\": \"value1\"}", "{\"key1\": \"value1\"}"},
 		{"int_array", ddl.Type{Name: ddl.Int64, IsArray: true}, "{1,2,NULL}", []spanner.NullInt64{{Int64: int64(1), Valid: true}, {Int64: int64(2), Valid: true}, {Valid: false}}},
 		{"string_array", ddl.Type{Name: ddl.String, IsArray: true}, "[ab,cd]", []spanner.NullString{{StringVal: "ab", Valid: true}, {StringVal: "cd", Valid: true}}},
-		{"float_array", ddl.Type{Name: ddl.Float64, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat64{{Float64: float64(1.3), Valid: true}, {Float64: float64(2.5), Valid: true}}},
+		{"float32_array", ddl.Type{Name: ddl.Float32, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat32{{Float32: float32(1.3), Valid: true}, {Float32: float32(2.5), Valid: true}}},
+		{"float64_array", ddl.Type{Name: ddl.Float64, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat64{{Float64: float64(1.3), Valid: true}, {Float64: float64(2.5), Valid: true}}},
 		{"numeric_array", ddl.Type{Name: ddl.Numeric, IsArray: true}, "[1.7]", []spanner.NullNumeric{{Numeric: *big.NewRat(17, 10), Valid: true}}},
 	}
 	tableName := "testtable"

diff --git a/sources/csv/toddl.go b/sources/csv/toddl.go
@@ -36,6 +36,8 @@ func ToSpannerType(columnType string) (ddl.Type, error) {
 		return ddl.Type{}, fmt.Errorf("%v is not a valid Spanner column type", columnType)
 	case ty == "DATE":
 		return ddl.Type{Name: ddl.Date}, nil
+	case ty == "FLOAT32":
+		return ddl.Type{Name: ddl.Float32}, nil
 	case ty == "FLOAT64":
 		return ddl.Type{Name: ddl.Float64}, nil
 	case ty == "INT64":

diff --git a/sources/csv/toddl_test.go b/sources/csv/toddl_test.go
@@ -34,7 +34,8 @@ func TestToSpannerType(t *testing.T) {
 		{"bool", "BOOL", ddl.Type{Name: ddl.Bool}},
 		{"bytes", "BYTES", ddl.Type{Name: ddl.Bytes, Len: ddl.MaxLength}},
 		{"date", "DATE", ddl.Type{Name: ddl.Date}},
-		{"float", "FLOAT64", ddl.Type{Name: ddl.Float64}},
+		{"float32", "FLOAT32", ddl.Type{Name: ddl.Float32}},
+		{"float64", "FLOAT64", ddl.Type{Name: ddl.Float64}},
 		{"int", "INT64", ddl.Type{Name: ddl.Int64}},
 		{"numeric", "NUMERIC", ddl.Type{Name: ddl.Numeric}},
 		{"string", "STRING", ddl.Type{Name: ddl.String, Len: ddl.MaxLength}},

diff --git a/sources/mysql/data.go b/sources/mysql/data.go
@@ -120,6 +120,8 @@ func convScalar(conv *internal.Conv, spannerType ddl.Type, srcTypeName string, T
 		return convBytes(val)
 	case ddl.Date:
 		return convDate(val)
+	case ddl.Float32:
+		return convFloat32(val)
 	case ddl.Float64:
 		return convFloat64(val)
 	case ddl.Int64:
@@ -184,6 +186,14 @@ func convDate(val string) (civil.Date, error) {
 	return d, err
 }
 
+func convFloat32(val string) (float32, error) {
+	f, err := strconv.ParseFloat(val, 32)
+	if err != nil {
+		return float32(f), fmt.Errorf("can't convert to float32: %w", err)
+	}
+	return float32(f), err
+}
+
 func convFloat64(val string) (float64, error) {
 	f, err := strconv.ParseFloat(val, 64)
 	if err != nil {

diff --git a/sources/mysql/mysqldump_test.go b/sources/mysql/mysqldump_test.go
@@ -46,7 +46,7 @@ func TestProcessMySQLDump_Scalar(t *testing.T) {
 		{"date", ddl.Type{Name: ddl.Date}},
 		{"decimal(4,10)", ddl.Type{Name: ddl.Numeric}},
 		{"double(4,10)", ddl.Type{Name: ddl.Float64}},
-		{"float(4,10)", ddl.Type{Name: ddl.Float64}},
+		{"float(4,10)", ddl.Type{Name: ddl.Float32}},
 		{"integer", ddl.Type{Name: ddl.Int64}},
 		{"mediumint", ddl.Type{Name: ddl.Int64}},
 		{"int", ddl.Type{Name: ddl.Int64}},
@@ -736,7 +736,7 @@ func TestProcessMySQLDump_MultiCol(t *testing.T) {
 		INSERT INTO test (id, a, b, c) VALUES (1,'2019-10-29',4.444,5.44444);
 		`,
 			expectedData: []spannerData{
-				spannerData{table: "test", cols: []string{"id", "a", "b", "c"}, vals: []interface{}{int64(1), getDate("2019-10-29"), float64(4.444), big.NewRat(136111, 25000)}}},
+				spannerData{table: "test", cols: []string{"id", "a", "b", "c"}, vals: []interface{}{int64(1), getDate("2019-10-29"), float32(4.444), big.NewRat(136111, 25000)}}},
 		},
 		{
 			name: "Data conversion: smallint, mediumint, bigint, double",
@@ -804,8 +804,8 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 		{
 			// Test bad data for each scalar type (except text, which accepts all values) and an array type.
 			name: "Data conversion errors",
-			input: "CREATE TABLE test (a int, b float, c bool, d date, e blob, f set('42','6'), g bit);\n" +
-				`INSERT INTO test (a, b, c, d, e, f, g) VALUES (7,42.1,1,'2019-10-29',_binary '` + string([]byte{137, 80}) + `','42,6', 0);` + // Baseline (good)
+			input: "CREATE TABLE test (a int, b double, c bool, d date, e blob, f set('42','6'), g bit, h float);\n" +
+				`INSERT INTO test (a, b, c, d, e, f, g, h) VALUES (7,42.1,1,'2019-10-29',_binary '` + string([]byte{137, 80}) + `','42,6', 0, 3.14);` + // Baseline (good)
 				"INSERT INTO test (a, b, c, d, e, f, g) VALUES (7,NULL,NULL,NULL,NULL,NULL, NULL);\n" + // Good
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (7.1,NULL,NULL,NULL,NULL,NULL);\n" + // Error
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,42.1,NULL,NULL,NULL,NULL);\n" + // Good
@@ -816,13 +816,15 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,'2019-10-42',NULL,NULL);\n" + // Error
 				`INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,_binary '` + string([]byte{137, 80}) + `',NULL);` + // Good
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,'42,6');\n" + // Good
-				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,42,6);\n", // Error
+				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,42,6);\n" + // Error
+				"INSERT INTO test (a, b, c, d, e, f, h) VALUES (NULL,NULL,NULL,NULL,NULL,NULL, 3.14);\n" + // Good
+				"INSERT INTO test (a, b, c, d, e, f, h) VALUES (NULL,NULL,NULL,NULL,NULL,NULL, '3-14');\n", // Error
 			expectedData: []spannerData{
 				spannerData{
-					table: "test", cols: []string{"a", "b", "c", "d", "e", "f", "g", "synth_id"},
+					table: "test", cols: []string{"a", "b", "c", "d", "e", "f", "g", "h", "synth_id"},
 					vals: []interface{}{int64(7), float64(42.1), true,
 						getDate("2019-10-29"), []byte{0x89, 0x50},
-						"42,6", false,
+						"42,6", false, float32(3.14),
 						fmt.Sprintf("%d", bitReverse(0))}},
 				spannerData{table: "test", cols: []string{"a", "synth_id"}, vals: []interface{}{int64(7), fmt.Sprintf("%d", bitReverse(1))}},
 				spannerData{table: "test", cols: []string{"b", "synth_id"}, vals: []interface{}{float64(42.1), fmt.Sprintf("%d", bitReverse(2))}},
@@ -831,13 +833,14 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 				spannerData{table: "test", cols: []string{"e", "synth_id"}, vals: []interface{}{[]byte{0x89, 0x50}, fmt.Sprintf("%d", bitReverse(5))}},
 				spannerData{table: "test", cols: []string{"f", "synth_id"},
 					vals: []interface{}{"42,6", fmt.Sprintf("%d", bitReverse(6))}},
+				spannerData{table: "test", cols: []string{"h", "synth_id"}, vals: []interface{}{float32(3.14), fmt.Sprintf("%d", bitReverse(7))}},
 			},
 		},
 	}
 	for _, tc := range dataErrorTests {
 		conv, rows := runProcessMySQLDump(tc.input)
 		assert.Equal(t, tc.expectedData, rows, tc.name+": Data rows did not match")
-		assert.Equal(t, conv.BadRows(), int64(5), tc.name+": Error count did not match")
+		assert.Equal(t, conv.BadRows(), int64(6), tc.name+": Error count did not match")
 	}
 }
 

diff --git a/sources/mysql/report_test.go b/sources/mysql/report_test.go
@@ -34,8 +34,8 @@ import (
 func TestReport(t *testing.T) {
 	s := `
    CREATE TABLE bad_schema (
-      a float,
-      b integer NOT NULL);
+      a integer,
+			b integer NOT NULL);
   CREATE TABLE default_value (
       a text,
       b bigint DEFAULT 42,
@@ -44,6 +44,7 @@ func TestReport(t *testing.T) {
   CREATE TABLE excellent_schema (
       a text,
       b bigint,
+			c float,
       PRIMARY KEY (a)
       );
   CREATE TABLE foreign_key (

diff --git a/sources/mysql/toddl.go b/sources/mysql/toddl.go
@@ -112,8 +112,10 @@ func toSpannerTypeInternal(srcType schema.Type, spType string) (ddl.Type, []inte
 		switch spType {
 		case ddl.String:
 			return ddl.Type{Name: ddl.String, Len: ddl.MaxLength}, []internal.SchemaIssue{internal.Widened}
-		default:
+		case ddl.Float64:
 			return ddl.Type{Name: ddl.Float64}, []internal.SchemaIssue{internal.Widened}
+		default:
+			return ddl.Type{Name: ddl.Float32}, nil
 		}
 	case "numeric", "decimal":
 		switch spType {