Skip to content

feat(taps,targets): Support the x-singer.decimal JSON Schema extension #2786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
de53f85
refactor: Added a class method to instantiate `JSONSchemaToSQL` from …
edgarrmondragon Nov 28, 2024
42a702d
refactor: Added a class method to instantiate `JSONSchemaToSQL` from …
edgarrmondragon Nov 28, 2024
4301625
Confirm that we're not making any breaking changes
edgarrmondragon Nov 30, 2024
6fc27d5
refactor: Added a class method to instantiate `JSONSchemaToSQL` from …
edgarrmondragon Nov 28, 2024
8e95d23
feat(taps,targets): Support the `singer.decimal` JSON Schema extension
edgarrmondragon Nov 30, 2024
71985ad
test: Test with JSONSchemaToSQL
edgarrmondragon Jan 29, 2025
6f1a9dd
Default to not using singer.decimal
edgarrmondragon Jan 29, 2025
50d2c19
Update `versionchanged`
edgarrmondragon Jan 29, 2025
b0b807d
Make mypy happy
edgarrmondragon Jan 29, 2025
7a386c9
Merge branch 'main' into edgarrmondragon/refactor/jsonschematosql-fro…
edgarrmondragon Jan 30, 2025
9df828b
Merge branch 'edgarrmondragon/refactor/jsonschematosql-fromconfig' in…
edgarrmondragon Jan 30, 2025
6ec48ae
Merge branch 'edgarrmondragon/refactor/jsonschematosql-fromconfig' in…
edgarrmondragon Jan 30, 2025
d410295
Merge branch 'main' into edgarrmondragon/refactor/jsonschematosql-fro…
edgarrmondragon Feb 24, 2025
b9f8ba6
Merge branch 'edgarrmondragon/refactor/jsonschematosql-fromconfig' in…
edgarrmondragon Feb 24, 2025
a9714a4
Merge branch 'main' into edgarrmondragon/feat/singer-decimal
edgarrmondragon Feb 24, 2025
3e1abe9
Update tests/core/test_connector_sql.py
edgarrmondragon Feb 24, 2025
c50dad1
Merge branch 'main' into edgarrmondragon/feat/singer-decimal
edgarrmondragon Mar 3, 2025
d072f7a
Merge branch 'main' into edgarrmondragon/feat/singer-decimal
edgarrmondragon Mar 7, 2025
b9c9de0
Use `x-singer.decimal` to make it clear it is an extension
edgarrmondragon Mar 7, 2025
955e432
Test partial singer decimal payloads
edgarrmondragon Mar 7, 2025
0c6bc87
Append built-in setting
edgarrmondragon Mar 7, 2025
8c1b7ef
Make mypy happy
edgarrmondragon Mar 7, 2025
d710ac4
Documentation
edgarrmondragon Mar 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/guides/sql-tap.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,10 @@ class ConfiguredSQLToJSONSchema(SQLToJSONSchema):
```

Then, you can use your custom type mapping in your connector as in the previous example.


### SQL tap support for Singer Decimal string format

Starting from version `0.45.0`, the Meltano Singer SDK supports the `x-singer.decimal` format for strings. You can configure the tap to use it with the `use_singer_decimal` setting. SQL Targets that support the `x-singer.decimal` format will create an appropriate numeric column in the target database.

Read more about target support for `x-singer.decimal` in the [SQL target guide](./sql-target.md#sql-target-support-for-singer-decimal-string-format).
6 changes: 6 additions & 0 deletions docs/guides/sql-target.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,9 @@ plugins:
}
```
````

### SQL target support for Singer Decimal string format

Starting from version `0.45.0`, the Meltano Singer SDK supports the `x-singer.decimal` format for strings. If the source tap is configured to use this format, the SDK will automatically convert the string to a `DECIMAL` type in the target database.

Read more about target support for `x-singer.decimal` in the [SQL tap guide](./sql-tap.md#sql-tap-support-for-singer-decimal-string-format).
33 changes: 30 additions & 3 deletions singer_sdk/connectors/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,21 @@ class SQLToJSONSchema:
.. versionchanged:: 0.43.0
Added the :meth:`singer_sdk.connectors.sql.SQLToJSONSchema.from_config` class
method.
.. versionchanged:: 0.45.0
Added support for the `use_singer_decimal` option.
"""

def __init__(self, *, use_singer_decimal: bool = False) -> None:
"""Initialize the SQL to JSON Schema converter.

Args:
use_singer_decimal: Whether to represent numbers as `string` with
the `x-singer.decimal` format instead of as `number`.
"""
self.use_singer_decimal = use_singer_decimal

@classmethod
def from_config(cls: type[SQLToJSONSchema], config: dict) -> SQLToJSONSchema: # noqa: ARG003
def from_config(cls: type[SQLToJSONSchema], config: dict) -> SQLToJSONSchema:
"""Create a new instance from a configuration dictionary.

Override this to instantiate this converter with values from the tap's
Expand All @@ -148,11 +159,13 @@ def from_config(cls, config):

Args:
config: The configuration dictionary.
use_singer_decimal: Whether to represent numbers as `string` with
the `x-singer.decimal` format instead of as `number`.

Returns:
A new instance of the class.
"""
return cls()
return cls(use_singer_decimal=config.get("use_singer_decimal", False))

@functools.singledispatchmethod
def to_jsonschema(self, column_type: sa.types.TypeEngine) -> dict: # noqa: ARG002, D102, PLR6301
Expand Down Expand Up @@ -195,12 +208,14 @@ def integer_to_jsonschema(self, column_type: sa.types.Integer) -> dict: # noqa:
return th.IntegerType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
def float_to_jsonschema(self, column_type: sa.types.Numeric) -> dict: # noqa: ARG002, PLR6301
def float_to_jsonschema(self, column_type: sa.types.Numeric) -> dict: # noqa: ARG002
"""Return a JSON Schema representation of a generic number type.

Args:
column_type (:column_type:`Numeric`): The column type.
"""
if self.use_singer_decimal:
return th.SingerDecimalType.type_dict # type: ignore[no-any-return]
return th.NumberType.type_dict # type: ignore[no-any-return]

@to_jsonschema.register
Expand Down Expand Up @@ -278,6 +293,7 @@ def __init__(self, *, max_varchar_length: int | None = None) -> None:
"hostname": lambda _: sa.types.VARCHAR(253), # RFC 1035
"ipv4": lambda _: sa.types.VARCHAR(15),
"ipv6": lambda _: sa.types.VARCHAR(45),
"x-singer.decimal": self._handle_singer_decimal,
}

self._sql_datatype_mapping: dict[str, JSONtoSQLHandler] = {}
Expand Down Expand Up @@ -331,6 +347,17 @@ def _invoke_handler( # noqa: PLR6301
return handler() # type: ignore[no-any-return]
return handler(schema)

def _handle_singer_decimal(self, schema: dict) -> sa.types.TypeEngine: # noqa: PLR6301
"""Handle a x-singer.decimal format.

Args:
schema: The JSON Schema object.

Returns:
The appropriate SQLAlchemy type.
"""
return sa.types.DECIMAL(schema.get("precision"), schema.get("scale"))

@property
def fallback_type(self) -> type[sa.types.TypeEngine]:
"""Return the fallback type.
Expand Down
12 changes: 12 additions & 0 deletions singer_sdk/helpers/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,18 @@
),
),
).to_dict()
SQL_TAP_USE_SINGER_DECIMAL = PropertiesList(
Property(
"use_singer_decimal",
BooleanType(),
title="Use Singer Decimal",
description=(
"Whether to use use strings with `x-singer.decimal` format for "
"decimals in the discovered schema. "
"This is useful to avoid precision loss when working with large numbers."
),
),
).to_dict()
TARGET_SCHEMA_CONFIG = PropertiesList(
Property(
"default_target_schema",
Expand Down
11 changes: 11 additions & 0 deletions singer_sdk/tap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from singer_sdk.helpers._util import dump_json, read_json_file
from singer_sdk.helpers.capabilities import (
BATCH_CONFIG,
SQL_TAP_USE_SINGER_DECIMAL,
CapabilitiesEnum,
PluginCapabilities,
TapCapabilities,
Expand Down Expand Up @@ -675,6 +676,16 @@ def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
self._catalog_dict: dict | None = None
super().__init__(*args, **kwargs)

@classmethod
def append_builtin_config(cls: type[SQLTap], config_jsonschema: dict) -> None:
"""Appends built-in config to `config_jsonschema` if not already set.

Args:
config_jsonschema: [description]
"""
merge_missing_config_jsonschema(SQL_TAP_USE_SINGER_DECIMAL, config_jsonschema)
super().append_builtin_config(config_jsonschema)

@property
def tap_connector(self) -> SQLConnector:
"""The connector object.
Expand Down
6 changes: 6 additions & 0 deletions singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,12 @@ class RegexType(StringType):
string_format = "regex"


class SingerDecimalType(StringType):
"""Decimal type."""

string_format = "singer.decimal"


class BooleanType(JSONTypeHelper[bool]):
"""Boolean type.

Expand Down
58 changes: 56 additions & 2 deletions tests/core/test_connector_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,14 @@ def my_type_to_jsonschema(self, column_type) -> dict: # noqa: ARG002
assert m.to_jsonschema(sa.types.BOOLEAN()) == {"type": ["boolean"]}


def test_numeric_to_singer_decimal():
converter = SQLToJSONSchema(use_singer_decimal=True)
assert converter.to_jsonschema(sa.types.NUMERIC()) == {
"type": ["string"],
"format": "singer.decimal",
}


class TestJSONSchemaToSQL: # noqa: PLR0904
@pytest.fixture
def json_schema_to_sql(self) -> JSONSchemaToSQL:
Expand Down Expand Up @@ -682,7 +690,7 @@ def test_unknown_format(self, json_schema_to_sql: JSONSchemaToSQL):
assert isinstance(result, sa.types.VARCHAR)

def test_custom_fallback(self):
json_schema_to_sql = JSONSchemaToSQL()
json_schema_to_sql = JSONSchemaToSQL(max_varchar_length=None)
json_schema_to_sql.fallback_type = sa.types.CHAR
jsonschema_type = {"cannot": "compute"}
result = json_schema_to_sql.to_sql_type(jsonschema_type)
Expand All @@ -696,7 +704,7 @@ def handle_raw_string(self, schema):

return super().handle_raw_string(schema)

json_schema_to_sql = CustomJSONSchemaToSQL()
json_schema_to_sql = CustomJSONSchemaToSQL(max_varchar_length=None)

vanilla = {"type": ["string"]}
result = json_schema_to_sql.to_sql_type(vanilla)
Expand All @@ -717,6 +725,52 @@ def handle_raw_string(self, schema):
result = json_schema_to_sql.to_sql_type(image_type)
assert isinstance(result, sa.types.LargeBinary)

def test_singer_decimal(self):
json_schema_to_sql = JSONSchemaToSQL()
jsonschema_type = {"type": ["string"], "format": "x-singer.decimal"}
result = json_schema_to_sql.to_sql_type(jsonschema_type)
assert isinstance(result, sa.types.DECIMAL)

def test_singer_decimal_with_precision_scale(self):
json_schema_to_sql = JSONSchemaToSQL()
precision, scale = 12, 3
jsonschema_type = {
"type": ["string"],
"format": "x-singer.decimal",
"precision": precision,
"scale": scale,
}
result = json_schema_to_sql.to_sql_type(jsonschema_type)
assert isinstance(result, sa.types.DECIMAL)
assert result.precision == precision
assert result.scale == scale

def test_handle_singer_decimal_missing_precision(self):
json_schema_to_sql = JSONSchemaToSQL(max_varchar_length=None)
schema = {
"type": ["string"],
"format": "x-singer.decimal",
# 'precision' is missing
"scale": 2,
}
result = json_schema_to_sql.to_sql_type(schema)
assert isinstance(result, sa.types.DECIMAL)
assert result.precision is None
assert result.scale == 2

def test_handle_singer_decimal_missing_scale(self):
json_schema_to_sql = JSONSchemaToSQL(max_varchar_length=None)
schema = {
"type": ["string"],
"format": "x-singer.decimal",
"precision": 10,
# 'scale' is missing
}
result = json_schema_to_sql.to_sql_type(schema)
assert isinstance(result, sa.types.DECIMAL)
assert result.precision == 10
assert result.scale is None

def test_annotation_sql_datatype(self):
json_schema_to_sql = JSONSchemaToSQL()
json_schema_to_sql.register_sql_datatype_handler("json", sa.types.JSON)
Expand Down