GoogleCloudPlatform · shreyakhajanchi · Jul 25, 2024 · Jul 17, 2024
@@ -38,7 +38,7 @@ The Spanner migration tool maps MySQL types to Spanner types as follows:
 | `DECIMAL`, `NUMERIC`                              | `NUMERIC`        | potential changes of precision                           |
 | `DOUBLE`                                          | `FLOAT64`        |                                                          |
 | `ENUM`                                            | `STRING(MAX)`    |                                                          |
-| `FLOAT`                                           | `FLOAT64`        | changes in storage size                                  |
+| `FLOAT`                                           | `FLOAT32`        |                                                          |
 | `INTEGER`, `MEDIUMINT`,<br/>`TINYINT`, `SMALLINT` | `INT64`          | changes in storage size                                  |
 | `JSON`                                            | `JSON`           |                                                          |
 | `SET`                                             | `ARRAY<STRING>`  | SET only supports string values                          |

@@ -32,7 +32,7 @@ In Oracle DB, USER is the account name, SCHEMA is the set of objects owned by th
 | ---------------------- | ------------ |
 | NUMBER (* , 0)         | INT64        |
 | FLOAT                  | FLOAT64      |
-| BINARY_FLOAT           | FLOAT64      |
+| BINARY_FLOAT           | FLOAT32      |
 | BINARY_DOUBLE          | FLOAT64      |
 | NUMBER (* , >0)        | NUMERIC      |
 | CHAR                   | STRING(1)    |

@@ -36,7 +36,7 @@ The Spanner migration tool maps PostgreSQL types to Spanner types as follows:
 | `DOUBLE PRECISION` | `FLOAT64`              |                                                               |
 | `INTEGER`          | `INT64`                | changes in storage size                                       |
 | `NUMERIC`          | `NUMERIC`              | potential changes of precision                                |
-| `REAL`             | `FLOAT64`              | changes in storage size                                       |
+| `REAL`             | `FLOAT32`              |                                                               |
 | `SERIAL`           | `INT64`                | dropped autoincrement functionality , changes in storage size |
 | `SMALLINT`         | `INT64`                | changes in storage size                                       |
 | `TEXT`             | `STRING(MAX)`          |                                                               |
@@ -109,7 +109,7 @@ arrays.
 ## Arrays
 
 Spanner does not support multi-dimensional arrays. So while `TEXT[4]` maps to
-`ARRAY<STRING(MAX)>` and `REAL ARRAY` maps to `ARRAY<FLOAT64>`, `TEXT[][]` maps
+`ARRAY<STRING(MAX)>` and `REAL ARRAY` maps to `ARRAY<FLOAT32>`, `TEXT[][]` maps
 to `STRING(MAX)`.
 
 Also note that PosgreSQL supports array limits, but the PostgreSQL

@@ -31,7 +31,7 @@ Spanner migration tool makes some assumptions while performing data type convers
 | ROWVERSION             | INT64        |
 | BIT                    | BOOL         |
 | FLOAT                  | FLOAT64      |
-| REAL                   | FLOAT64      |
+| REAL                   | FLOAT32      |
 | NUMERIC                | NUMERIC      |
 | DECIMAL                | NUMERIC      |
 | MONEY                  | NUMERIC      |
@@ -60,13 +60,13 @@ Spanner migration tool makes some assumptions while performing data type convers
 
 ## Spatial datatypes
 
-SQL Server supports `SPATIAL GEOGRAPHY` and `SPATIAL GEOMETRY` datatypes however, Spanner 
+SQL Server supports `SPATIAL GEOGRAPHY` and `SPATIAL GEOMETRY` datatypes however, Spanner
 does not support spatial data types.
 These datatype are currently mapped to standard `STRING` Spanner datatype.
 
 ## TIMESTAMP
 
-The `TIMESTAMP` datatype (deprecated in the newer versions of SQL Server) 
+The `TIMESTAMP` datatype (deprecated in the newer versions of SQL Server)
 was used for Row versioning. Hence, it is mapped to INT64 to keep it consistent
 with the `ROWVERSION` data type.
 
@@ -83,10 +83,10 @@ primary keys for all tables, but does not enforce this. When converting a table
 without a primary key:
 
 - Spanner migration tool will check for `UNIQUE` constraints on the table. If found, it
-will automatically pick any one of the unique constraints and convert it to a 
+will automatically pick any one of the unique constraints and convert it to a
 primary key.
-- If no `UNIQUE` constraints are present, Spanner migration tool will create a new primary 
-key column of type INT64. By default, the name of the new column is `synth_id`. 
+- If no `UNIQUE` constraints are present, Spanner migration tool will create a new primary
+key column of type INT64. By default, the name of the new column is `synth_id`.
 - If there is already a column with that name, then a variation is used to avoid collisions.
 
 ## NOT NULL Constraints
@@ -118,6 +118,6 @@ maps `UNIQUE` constraint into `UNIQUE` secondary index.
 ## Other SQL Server features
 
 SQL Server has many other features we haven't discussed, including functions,
-sequences, procedures, triggers and views which are currently not supported in Spanner. 
+sequences, procedures, triggers and views which are currently not supported in Spanner.
 The tool does not support these and the relevant schema info is ignored during schema
 conversion.
@@ -232,6 +232,7 @@ func ToPGDialectType(standardType ddl.Type) ddl.Type {
 var DATATYPE_TO_STORAGE_SIZE = map[string]int{
 	ddl.Bool:      1,
 	ddl.Date:      4,
+	ddl.Float32:   4,
 	ddl.Float64:   8,
 	ddl.Int64:     8,
 	ddl.JSON:      ddl.StringMaxLength,

@@ -379,6 +379,24 @@ func convArray(spannerType ddl.Type, val string) (interface{}, error) {
 			r = append(r, spanner.NullDate{Date: date, Valid: true})
 		}
 		return r, nil
+	case ddl.Float32:
+		var r []spanner.NullFloat32
+		for _, s := range a {
+			if s == "NULL" {
+				r = append(r, spanner.NullFloat32{Valid: false})
+				continue
+			}
+			s, err := processQuote(s)
+			if err != nil {
+				return []spanner.NullFloat32{}, err
+			}
+			f, err := convFloat32(s)
+			if err != nil {
+				return []spanner.NullFloat32{}, err
+			}
+			r = append(r, spanner.NullFloat32{Float32: f, Valid: true})
+		}
+		return r, nil
 	case ddl.Float64:
 		var r []spanner.NullFloat64
 		for _, s := range a {
@@ -477,6 +495,8 @@ func convScalar(conv *internal.Conv, spannerType ddl.Type, val string) (interfac
 		return convBytes(val)
 	case ddl.Date:
 		return convDate(val)
+	case ddl.Float32:
+		return convFloat32(val)
 	case ddl.Float64:
 		return convFloat64(val)
 	case ddl.Int64:
@@ -519,6 +539,14 @@ func convDate(val string) (civil.Date, error) {
 	return d, err
 }
 
+func convFloat32(val string) (float32, error) {
+	f, err := strconv.ParseFloat(val, 32)
+	if err != nil {
+		return float32(f), fmt.Errorf("can't convert to float32: %w", err)
+	}
+	return float32(f), err
+}
+
 func convFloat64(val string) (float64, error) {
 	f, err := strconv.ParseFloat(val, 64)
 	if err != nil {

@@ -201,6 +201,7 @@ func TestConvertData(t *testing.T) {
 		{"bool", ddl.Type{Name: ddl.Bool}, "true", true},
 		{"bytes", ddl.Type{Name: ddl.Bytes, Len: ddl.MaxLength}, string([]byte{137, 80}), []byte{0x89, 0x50}},
 		{"date", ddl.Type{Name: ddl.Date}, "2019-10-29", getDate("2019-10-29")},
+		{"float32", ddl.Type{Name: ddl.Float32}, "3.14", float32(3.14)},
 		{"float64", ddl.Type{Name: ddl.Float64}, "42.6", float64(42.6)},
 		{"int64", ddl.Type{Name: ddl.Int64}, "42", int64(42)},
 		{"numeric", ddl.Type{Name: ddl.Numeric}, "42.6", *big.NewRat(426, 10)},
@@ -209,7 +210,8 @@ func TestConvertData(t *testing.T) {
 		{"json", ddl.Type{Name: ddl.JSON}, "{\"key1\": \"value1\"}", "{\"key1\": \"value1\"}"},
 		{"int_array", ddl.Type{Name: ddl.Int64, IsArray: true}, "{1,2,NULL}", []spanner.NullInt64{{Int64: int64(1), Valid: true}, {Int64: int64(2), Valid: true}, {Valid: false}}},
 		{"string_array", ddl.Type{Name: ddl.String, IsArray: true}, "[ab,cd]", []spanner.NullString{{StringVal: "ab", Valid: true}, {StringVal: "cd", Valid: true}}},
-		{"float_array", ddl.Type{Name: ddl.Float64, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat64{{Float64: float64(1.3), Valid: true}, {Float64: float64(2.5), Valid: true}}},
+		{"float32_array", ddl.Type{Name: ddl.Float32, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat32{{Float32: float32(1.3), Valid: true}, {Float32: float32(2.5), Valid: true}}},
+		{"float64_array", ddl.Type{Name: ddl.Float64, IsArray: true}, "{1.3,2.5}", []spanner.NullFloat64{{Float64: float64(1.3), Valid: true}, {Float64: float64(2.5), Valid: true}}},
 		{"numeric_array", ddl.Type{Name: ddl.Numeric, IsArray: true}, "[1.7]", []spanner.NullNumeric{{Numeric: *big.NewRat(17, 10), Valid: true}}},
 	}
 	tableName := "testtable"

@@ -36,6 +36,8 @@ func ToSpannerType(columnType string) (ddl.Type, error) {
 		return ddl.Type{}, fmt.Errorf("%v is not a valid Spanner column type", columnType)
 	case ty == "DATE":
 		return ddl.Type{Name: ddl.Date}, nil
+	case ty == "FLOAT32":
+		return ddl.Type{Name: ddl.Float32}, nil
 	case ty == "FLOAT64":
 		return ddl.Type{Name: ddl.Float64}, nil
 	case ty == "INT64":

@@ -34,7 +34,8 @@ func TestToSpannerType(t *testing.T) {
 		{"bool", "BOOL", ddl.Type{Name: ddl.Bool}},
 		{"bytes", "BYTES", ddl.Type{Name: ddl.Bytes, Len: ddl.MaxLength}},
 		{"date", "DATE", ddl.Type{Name: ddl.Date}},
-		{"float", "FLOAT64", ddl.Type{Name: ddl.Float64}},
+		{"float32", "FLOAT32", ddl.Type{Name: ddl.Float32}},
+		{"float64", "FLOAT64", ddl.Type{Name: ddl.Float64}},
 		{"int", "INT64", ddl.Type{Name: ddl.Int64}},
 		{"numeric", "NUMERIC", ddl.Type{Name: ddl.Numeric}},
 		{"string", "STRING", ddl.Type{Name: ddl.String, Len: ddl.MaxLength}},

@@ -120,6 +120,8 @@ func convScalar(conv *internal.Conv, spannerType ddl.Type, srcTypeName string, T
 		return convBytes(val)
 	case ddl.Date:
 		return convDate(val)
+	case ddl.Float32:
+		return convFloat32(val)
 	case ddl.Float64:
 		return convFloat64(val)
 	case ddl.Int64:
@@ -184,6 +186,14 @@ func convDate(val string) (civil.Date, error) {
 	return d, err
 }
 
+func convFloat32(val string) (float32, error) {
+	f, err := strconv.ParseFloat(val, 32)
+	if err != nil {
+		return float32(f), fmt.Errorf("can't convert to float32: %w", err)
+	}
+	return float32(f), err
+}
+
 func convFloat64(val string) (float64, error) {
 	f, err := strconv.ParseFloat(val, 64)
 	if err != nil {

@@ -46,7 +46,7 @@ func TestProcessMySQLDump_Scalar(t *testing.T) {
 		{"date", ddl.Type{Name: ddl.Date}},
 		{"decimal(4,10)", ddl.Type{Name: ddl.Numeric}},
 		{"double(4,10)", ddl.Type{Name: ddl.Float64}},
-		{"float(4,10)", ddl.Type{Name: ddl.Float64}},
+		{"float(4,10)", ddl.Type{Name: ddl.Float32}},
 		{"integer", ddl.Type{Name: ddl.Int64}},
 		{"mediumint", ddl.Type{Name: ddl.Int64}},
 		{"int", ddl.Type{Name: ddl.Int64}},
@@ -736,7 +736,7 @@ func TestProcessMySQLDump_MultiCol(t *testing.T) {
 		INSERT INTO test (id, a, b, c) VALUES (1,'2019-10-29',4.444,5.44444);
 		`,
 			expectedData: []spannerData{
-				spannerData{table: "test", cols: []string{"id", "a", "b", "c"}, vals: []interface{}{int64(1), getDate("2019-10-29"), float64(4.444), big.NewRat(136111, 25000)}}},
+				spannerData{table: "test", cols: []string{"id", "a", "b", "c"}, vals: []interface{}{int64(1), getDate("2019-10-29"), float32(4.444), big.NewRat(136111, 25000)}}},
 		},
 		{
 			name: "Data conversion: smallint, mediumint, bigint, double",
@@ -804,8 +804,8 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 		{
 			// Test bad data for each scalar type (except text, which accepts all values) and an array type.
 			name: "Data conversion errors",
-			input: "CREATE TABLE test (a int, b float, c bool, d date, e blob, f set('42','6'), g bit);\n" +
-				`INSERT INTO test (a, b, c, d, e, f, g) VALUES (7,42.1,1,'2019-10-29',_binary '` + string([]byte{137, 80}) + `','42,6', 0);` + // Baseline (good)
+			input: "CREATE TABLE test (a int, b double, c bool, d date, e blob, f set('42','6'), g bit, h float);\n" +
+				`INSERT INTO test (a, b, c, d, e, f, g, h) VALUES (7,42.1,1,'2019-10-29',_binary '` + string([]byte{137, 80}) + `','42,6', 0, 3.14);` + // Baseline (good)
 				"INSERT INTO test (a, b, c, d, e, f, g) VALUES (7,NULL,NULL,NULL,NULL,NULL, NULL);\n" + // Good
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (7.1,NULL,NULL,NULL,NULL,NULL);\n" + // Error
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,42.1,NULL,NULL,NULL,NULL);\n" + // Good
@@ -816,13 +816,15 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,'2019-10-42',NULL,NULL);\n" + // Error
 				`INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,_binary '` + string([]byte{137, 80}) + `',NULL);` + // Good
 				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,'42,6');\n" + // Good
-				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,42,6);\n", // Error
+				"INSERT INTO test (a, b, c, d, e, f) VALUES (NULL,NULL,NULL,NULL,NULL,42,6);\n" + // Error
+				"INSERT INTO test (a, b, c, d, e, f, h) VALUES (NULL,NULL,NULL,NULL,NULL,NULL, 3.14);\n" + // Good
+				"INSERT INTO test (a, b, c, d, e, f, h) VALUES (NULL,NULL,NULL,NULL,NULL,NULL, '3-14');\n", // Error
 			expectedData: []spannerData{
 				spannerData{
-					table: "test", cols: []string{"a", "b", "c", "d", "e", "f", "g", "synth_id"},
+					table: "test", cols: []string{"a", "b", "c", "d", "e", "f", "g", "h", "synth_id"},
 					vals: []interface{}{int64(7), float64(42.1), true,
 						getDate("2019-10-29"), []byte{0x89, 0x50},
-						"42,6", false,
+						"42,6", false, float32(3.14),
 						fmt.Sprintf("%d", bitReverse(0))}},
 				spannerData{table: "test", cols: []string{"a", "synth_id"}, vals: []interface{}{int64(7), fmt.Sprintf("%d", bitReverse(1))}},
 				spannerData{table: "test", cols: []string{"b", "synth_id"}, vals: []interface{}{float64(42.1), fmt.Sprintf("%d", bitReverse(2))}},
@@ -831,13 +833,14 @@ func TestProcessMySQLDump_DataError(t *testing.T) {
 				spannerData{table: "test", cols: []string{"e", "synth_id"}, vals: []interface{}{[]byte{0x89, 0x50}, fmt.Sprintf("%d", bitReverse(5))}},
 				spannerData{table: "test", cols: []string{"f", "synth_id"},
 					vals: []interface{}{"42,6", fmt.Sprintf("%d", bitReverse(6))}},
+				spannerData{table: "test", cols: []string{"h", "synth_id"}, vals: []interface{}{float32(3.14), fmt.Sprintf("%d", bitReverse(7))}},
 			},
 		},
 	}
 	for _, tc := range dataErrorTests {
 		conv, rows := runProcessMySQLDump(tc.input)
 		assert.Equal(t, tc.expectedData, rows, tc.name+": Data rows did not match")
-		assert.Equal(t, conv.BadRows(), int64(5), tc.name+": Error count did not match")
+		assert.Equal(t, conv.BadRows(), int64(6), tc.name+": Error count did not match")
 	}
 }
 

@@ -34,8 +34,8 @@ import (
 func TestReport(t *testing.T) {
 	s := `
    CREATE TABLE bad_schema (
-      a float,
-      b integer NOT NULL);
+      a integer,
+			b integer NOT NULL);
   CREATE TABLE default_value (
       a text,
       b bigint DEFAULT 42,
@@ -44,6 +44,7 @@ func TestReport(t *testing.T) {
   CREATE TABLE excellent_schema (
       a text,
       b bigint,
+			c float,
       PRIMARY KEY (a)
       );
   CREATE TABLE foreign_key (

@@ -112,8 +112,10 @@ func toSpannerTypeInternal(srcType schema.Type, spType string) (ddl.Type, []inte
 		switch spType {
 		case ddl.String:
 			return ddl.Type{Name: ddl.String, Len: ddl.MaxLength}, []internal.SchemaIssue{internal.Widened}
-		default:
+		case ddl.Float64:
 			return ddl.Type{Name: ddl.Float64}, []internal.SchemaIssue{internal.Widened}
+		default:
+			return ddl.Type{Name: ddl.Float32}, nil
 		}
 	case "numeric", "decimal":
 		switch spType {