From 0d98e60d442ff6103be5c4fe51b9e2f3e2249cab Mon Sep 17 00:00:00 2001 From: Ayush Tripathi Date: Thu, 4 Jul 2024 00:47:35 +0530 Subject: [PATCH] Add connector for couchbasedb,test and docs. --- docs/docs/configuration/databases.mdx | 17 ++ superset/db_engine_specs/couchbase.py | 128 --------- superset/db_engine_specs/couchbasedb.py | 253 ++++++++++++++++++ superset/sql_parse.py | 2 +- .../db_engine_specs/test_couchbase.py | 49 ++++ 5 files changed, 320 insertions(+), 129 deletions(-) delete mode 100644 superset/db_engine_specs/couchbase.py create mode 100644 superset/db_engine_specs/couchbasedb.py diff --git a/docs/docs/configuration/databases.mdx b/docs/docs/configuration/databases.mdx index 911cd11da7237..1477b05ccda29 100644 --- a/docs/docs/configuration/databases.mdx +++ b/docs/docs/configuration/databases.mdx @@ -54,6 +54,7 @@ are compatible with Superset. | [Azure MS SQL](/docs/configuration/databases#sql-server) | `pip install pymssql` | `mssql+pymssql://UserName@presetSQL:TestPassword@presetSQL.database.windows.net:1433/TestSchema` | | [ClickHouse](/docs/configuration/databases#clickhouse) | `pip install clickhouse-connect` | `clickhousedb://{username}:{password}@{hostname}:{port}/{database}` | | [CockroachDB](/docs/configuration/databases#cockroachdb) | `pip install cockroachdb` | `cockroachdb://root@{hostname}:{port}/{database}?sslmode=disable` | +| [CouchbaseDB](/docs/configuration/databases#couchbaseDB) | `pip install couchbase-sqlalchemy` | `couchbasedb://{username}:{password}@{hostname}:{port}?truststorepath={ssl certificate path}` | | [Dremio](/docs/configuration/databases#dremio) | `pip install sqlalchemy_dremio` | `dremio://user:pwd@host:31010/` | | [Elasticsearch](/docs/configuration/databases#elasticsearch) | `pip install elasticsearch-dbapi` | `elasticsearch+http://{user}:{password}@{host}:9200/` | | [Exasol](/docs/configuration/databases#exasol) | `pip install sqlalchemy-exasol` | `exa+pyodbc://{username}:{password}@{hostname}:{port}/my_schema?CONNECTIONLCALL=en_US.UTF-8&driver=EXAODBC` | @@ -372,6 +373,22 @@ cockroachdb://root@{hostname}:{port}/{database}?sslmode=disable ``` + +#### CouchbaseDB + +The recommended connector library for CouchbaseDB is +[couchbase-sqlalchemy](https://github.com/couchbase/couchbase-sqlalchemy). +``` +pip install couchbase-sqlalchemy +``` + +The expected connection string is formatted as follows: + +``` +couchbasedb://{username}:{password}@{hostname}:{port}?truststorepath={certificate path}?ssl={true/false} +``` + + #### CrateDB The recommended connector library for CrateDB is diff --git a/superset/db_engine_specs/couchbase.py b/superset/db_engine_specs/couchbase.py deleted file mode 100644 index a5576719568c3..0000000000000 --- a/superset/db_engine_specs/couchbase.py +++ /dev/null @@ -1,128 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=too-many-lines -from __future__ import annotations - -from sqlalchemy.engine.url import URL -from flask_babel import gettext as __ -from superset.superset_typing import ResultSetColumnType -from marshmallow import fields, Schema -from marshmallow.validate import Range -from sqlalchemy import column -from superset.databases.utils import make_url_safe -from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin -from superset.constants import TimeGrain -import os -import datetime -from typing import ( - Any, - TypedDict -) -class BasicParametersType(TypedDict, total=False): - username: str | None - password: str | None - host: str - port: int - database: str - query: dict[str, Any] - encryption: bool - -class CouchbaseParametersSchema(Schema): - username = fields.String(allow_none=True, metadata={"description": __("Username")}) - password = fields.String(allow_none=True, metadata={"description": __("Password")}) - host = fields.String(required=True, metadata={"description": __("Hostname or IP address")}) - port = fields.Integer(allow_none=True, metadata={"description": __("Database port")}, validate=Range(min=0, max=65535)) - database = fields.String(allow_none=True, metadata={"description": __("Database name")}) - encryption = fields.Boolean(dump_default=False, metadata={"description": __("Use an encrypted connection to the database")}) - query = fields.Dict(keys=fields.Str(), values=fields.Raw(), metadata={"description": __("Additional parameters")}) - -class CouchbaseEngineSpec(BasicParametersMixin,BaseEngineSpec): - engine = 'couchbase' - engine_name = 'Couchbase Columnar' - allows_joins = False - allows_subqueries = False - default_driver = 'couchbase' - sqlalchemy_uri_placeholder = ("columnar+couchbase://user:password@host[:port][/dbname][?ssl=value&=value...]") - parameters_schema = CouchbaseParametersSchema() - encryption_parameters = {"sslmode": "require"} - - _time_grain_expressions = { - None: "{col}", - TimeGrain.SECOND: "DATE_TRUNC_STR(TOSTRING({col}),'second')", - TimeGrain.MINUTE: "DATE_TRUNC_STR(TOSTRING({col}),'minute')", - TimeGrain.HOUR: "DATE_TRUNC_STR(TOSTRING({col}),'hour')", - TimeGrain.DAY: "DATE_TRUNC_STR(TOSTRING({col}),'day')", - TimeGrain.MONTH: "DATE_TRUNC_STR(TOSTRING({col}),'month')", - TimeGrain.YEAR: "DATE_TRUNC_STR(TOSTRING({col}),'year')", - TimeGrain.QUARTER: "DATE_TRUNC_STR(TOSTRING({col}),'quarter')" - } - - @classmethod - def epoch_to_dttm(cls) -> str: - return "MILLIS_TO_STR({col} * 1000, '111')" - - @classmethod - def epoch_ms_to_dttm(cls) -> str: - return "MILLIS_TO_STR({col}, '111')" - - @classmethod - def convert_dttm( - cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None - ) -> str | None: - sqla_type = cls.get_sqla_column_type(target_type) - return f"DATETIME(DATE_FORMAT_STR(STR_TO_UTC('{dttm.date().isoformat()}'), 'iso8601'))" - - @classmethod - def build_sqlalchemy_uri(cls, parameters: dict, encrypted_extra=None): - query_params = parameters.get("query", {}).copy() - if parameters.get("encryption", False): - query_params.update(cls.encryption_parameters) - - uri = URL.create( - f"{cls.engine}+{cls.default_driver}", - username=parameters.get("username"), - password=parameters.get("password"), - host=parameters["host"], - port=parameters.get("port", 18091), # Default SSL port for Couchbase - database=parameters.get("database", "default"), - query=query_params - ) - return str(uri) - - - @classmethod - def get_parameters_from_uri( # pylint: disable=unused-argument - cls, uri: str, encrypted_extra: dict[str, Any] | None = None - ) -> BasicParametersType: - url = make_url_safe(uri) - query = { - key: value - for (key, value) in url.query.items() - if (key, value) not in cls.encryption_parameters.items() - } - encryption = all( - item in url.query.items() for item in cls.encryption_parameters.items() - ) - return { - "username": url.username, - "password": url.password, - "host": url.host, - "port": url.port, - "database": url.database, - "query": query, - "encryption": encryption, - } \ No newline at end of file diff --git a/superset/db_engine_specs/couchbasedb.py b/superset/db_engine_specs/couchbasedb.py new file mode 100644 index 0000000000000..ea2148a97268d --- /dev/null +++ b/superset/db_engine_specs/couchbasedb.py @@ -0,0 +1,253 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=too-many-lines + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Optional, TypedDict +from urllib import parse + +from flask_babel import gettext as __ +from marshmallow import fields, Schema +from sqlalchemy.engine.url import URL + +from superset.constants import TimeGrain +from superset.databases.utils import make_url_safe +from superset.db_engine_specs.base import ( + BaseEngineSpec, + BasicParametersMixin, + BasicParametersType as BaseBasicParametersType, + BasicPropertiesType as BaseBasicPropertiesType, +) +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.utils.network import is_hostname_valid, is_port_open + + +class BasicParametersType(TypedDict, total=False): + username: Optional[str] + password: Optional[str] + host: str + database: str + port: Optional[int] + query: dict[str, Any] + encryption: bool + + +class BasicPropertiesType(TypedDict): + parameters: BasicParametersType + + +class CouchbaseParametersSchema(Schema): + username = fields.String(allow_none=True, metadata={"description": __("Username")}) + password = fields.String(allow_none=True, metadata={"description": __("Password")}) + host = fields.String( + required=True, metadata={"description": __("Hostname or IP address")} + ) + database = fields.String( + allow_none=True, metadata={"description": __("Database name")} + ) + port = fields.Integer( + allow_none=True, metadata={"description": __("Database port")} + ) + encryption = fields.Boolean( + dump_default=False, + metadata={"description": __("Use an encrypted connection to the database")}, + ) + query = fields.Dict( + keys=fields.Str(), + values=fields.Raw(), + metadata={"description": __("Additional parameters")}, + ) + + +class CouchbaseDbEngineSpec(BasicParametersMixin, BaseEngineSpec): + engine = "couchbasedb" + engine_name = "Couchbase" + default_driver = "couchbasedb" + allows_joins = False + allows_subqueries = False + sqlalchemy_uri_placeholder = ( + "couchbasedb://user:password@host[:port]?truststorepath=value?ssl=value" + ) + parameters_schema = CouchbaseParametersSchema() + + _time_grain_expressions = { + None: "{col}", + TimeGrain.SECOND: "DATE_TRUNC_STR(TOSTRING({col}),'second')", + TimeGrain.MINUTE: "DATE_TRUNC_STR(TOSTRING({col}),'minute')", + TimeGrain.HOUR: "DATE_TRUNC_STR(TOSTRING({col}),'hour')", + TimeGrain.DAY: "DATE_TRUNC_STR(TOSTRING({col}),'day')", + TimeGrain.MONTH: "DATE_TRUNC_STR(TOSTRING({col}),'month')", + TimeGrain.YEAR: "DATE_TRUNC_STR(TOSTRING({col}),'year')", + TimeGrain.QUARTER: "DATE_TRUNC_STR(TOSTRING({col}),'quarter')", + } + + @classmethod + def epoch_to_dttm(cls) -> str: + return "MILLIS_TO_STR({col} * 1000, '111')" + + @classmethod + def epoch_ms_to_dttm(cls) -> str: + return "MILLIS_TO_STR({col}, '111')" + + @classmethod + def convert_dttm( + cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None + ) -> Optional[str]: + return f"DATETIME(DATE_FORMAT_STR(STR_TO_UTC('{dttm.date().isoformat()}'), 'iso8601'))" + + @classmethod + def build_sqlalchemy_uri( + cls, + parameters: BaseBasicParametersType, + encrypted_extra: Optional[dict[str, Any]] = None, + ) -> str: + query_params = parameters.get("query", {}).copy() + if parameters.get("encryption"): + query_params["ssl"] = "true" + else: + query_params["ssl"] = "false" + + if parameters.get("port") is None: + uri = URL.create( + "couchbasedb", + username=parameters.get("username"), + password=parameters.get("password"), + host=parameters["host"], + port=None, + query=query_params, + ) + else: + uri = URL.create( + "couchbasedb", + username=parameters.get("username"), + password=parameters.get("password"), + host=parameters["host"], + port=parameters.get("port"), + query=query_params, + ) + print(uri) + return str(uri) + + @classmethod + def get_parameters_from_uri( + cls, uri: str, encrypted_extra: Optional[dict[str, Any]] = None + ) -> BaseBasicParametersType: + print("get_parameters is called : ", uri) + url = make_url_safe(uri) + query = { + key: value + for key, value in url.query.items() + if (key, value) not in cls.encryption_parameters.items() + } + ssl_value = url.query.get("ssl", "false").lower() + encryption = ssl_value == "true" + return BaseBasicParametersType( + username=url.username, + password=url.password, + host=url.host, + port=url.port, + database=url.database, + query=query, + encryption=encryption, + ) + + @classmethod + def validate_parameters( + cls, properties: BaseBasicPropertiesType + ) -> list[SupersetError]: + """ + Couchbase local server needs hostname and port but on cloud we need only connection String along with credentials to connect. + """ + errors: list[SupersetError] = [] + + required = {"host", "username", "password", "database"} + parameters = properties.get("parameters", {}) + present = {key for key in parameters if parameters.get(key, ())} + + if missing := sorted(required - present): + errors.append( + SupersetError( + message=f'One or more parameters are missing: {", ".join(missing)}', + error_type=SupersetErrorType.CONNECTION_MISSING_PARAMETERS_ERROR, + level=ErrorLevel.WARNING, + extra={"missing": missing}, + ), + ) + + host = parameters.get("host", None) + if not host: + return errors + # host can be a connection string in case of couchbase cloud. So Connection Check is not required in that case. + if not is_hostname_valid(host): + errors.append( + SupersetError( + message="The hostname provided can't be resolved.", + error_type=SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR, + level=ErrorLevel.ERROR, + extra={"invalid": ["host"]}, + ), + ) + return errors + + if port := parameters.get("port", None): + try: + port = int(port) + except (ValueError, TypeError): + errors.append( + SupersetError( + message="Port must be a valid integer.", + error_type=SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, + level=ErrorLevel.ERROR, + extra={"invalid": ["port"]}, + ), + ) + if not (isinstance(port, int) and 0 <= port < 2**16): + errors.append( + SupersetError( + message=( + "The port must be an integer between 0 and 65535 " + "(inclusive)." + ), + error_type=SupersetErrorType.CONNECTION_INVALID_PORT_ERROR, + level=ErrorLevel.ERROR, + extra={"invalid": ["port"]}, + ), + ) + elif not is_port_open(host, port): + errors.append( + SupersetError( + message="The port is closed.", + error_type=SupersetErrorType.CONNECTION_PORT_CLOSED_ERROR, + level=ErrorLevel.ERROR, + extra={"invalid": ["port"]}, + ), + ) + + return errors + + @classmethod + def get_schema_from_engine_params( + cls, + sqlalchemy_uri: URL, + connect_args: dict[str, Any], + ) -> Optional[str]: + """ + Return the configured schema. + """ + return parse.unquote(sqlalchemy_uri.database) diff --git a/superset/sql_parse.py b/superset/sql_parse.py index cf78431753935..05bf9b19bb84a 100644 --- a/superset/sql_parse.py +++ b/superset/sql_parse.py @@ -102,7 +102,7 @@ "clickhouse": Dialects.CLICKHOUSE, "clickhousedb": Dialects.CLICKHOUSE, "cockroachdb": Dialects.POSTGRES, - "couchbase": Dialects.MYSQL, + "couchbasedb": Dialects.MYSQL, # "crate": ??? # "databend": ??? "databricks": Dialects.DATABRICKS, diff --git a/tests/unit_tests/db_engine_specs/test_couchbase.py b/tests/unit_tests/db_engine_specs/test_couchbase.py index e69de29bb2d1d..422200ce7f3c2 100644 --- a/tests/unit_tests/db_engine_specs/test_couchbase.py +++ b/tests/unit_tests/db_engine_specs/test_couchbase.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime +from typing import Optional + +from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm + + +def test_epoch_to_dttm() -> None: + """ + DB Eng Specs (couchbase): Test epoch to dttm + """ + from superset.db_engine_specs.couchbasedb import CouchbaseDbEngineSpec + + assert CouchbaseDbEngineSpec.epoch_to_dttm() == "MILLIS_TO_STR({col} * 1000, '111')" + + +def test_epoch_ms_to_dttm() -> None: + """ + DB Eng Specs (couchbase): Test epoch ms to dttm + """ + from superset.db_engine_specs.couchbasedb import CouchbaseDbEngineSpec + + assert CouchbaseDbEngineSpec.epoch_ms_to_dttm() == "MILLIS_TO_STR({col}, '111')" + + +def test_convert_dttm( + target_type: str, + expected_result: Optional[str], + dttm: datetime, +) -> None: + from superset.db_engine_specs.couchbasedb import CouchbaseDbEngineSpec as spec + + assert_convert_dttm(spec, target_type, expected_result, dttm)