From eb2f099e0ffaaea0ccd63e97dc4dc469785d6454 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 7 Dec 2024 09:26:31 +0100 Subject: [PATCH] records: Add example about library use --- .github/dependabot.yml | 5 + .github/workflows/framework-records.yml | 74 ++++++++++++ framework/records/README.md | 62 ++++++++++ framework/records/example_basic.py | 58 ++++++++++ framework/records/example_types.py | 148 ++++++++++++++++++++++++ framework/records/pyproject.toml | 12 ++ framework/records/requirements-test.txt | 1 + framework/records/requirements.txt | 3 + framework/records/test.py | 42 +++++++ 9 files changed, 405 insertions(+) create mode 100644 .github/workflows/framework-records.yml create mode 100644 framework/records/README.md create mode 100644 framework/records/example_basic.py create mode 100644 framework/records/example_types.py create mode 100644 framework/records/pyproject.toml create mode 100644 framework/records/requirements-test.txt create mode 100644 framework/records/requirements.txt create mode 100644 framework/records/test.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 010dcd14..a1b742ac 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -112,6 +112,11 @@ updates: schedule: interval: "daily" + - directory: "/framework/records" + package-ecosystem: "pip" + schedule: + interval: "daily" + - directory: "/framework/streamlit" package-ecosystem: "pip" schedule: diff --git a/.github/workflows/framework-records.yml b/.github/workflows/framework-records.yml new file mode 100644 index 00000000..1b42964c --- /dev/null +++ b/.github/workflows/framework-records.yml @@ -0,0 +1,74 @@ +name: records + +on: + pull_request: + branches: ~ + paths: + - '.github/workflows/framework-records.yml' + - 'framework/records/**' + - '/requirements.txt' + push: + branches: [ main ] + paths: + - '.github/workflows/framework-records.yml' + - 'framework/records/**' + - '/requirements.txt' + + # Allow job to be triggered manually. + workflow_dispatch: + + # Run job each night after CrateDB nightly has been published. + schedule: + - cron: '0 3 * * *' + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + test: + name: " + Python: ${{ matrix.python-version }} + CrateDB: ${{ matrix.cratedb-version }} + on ${{ matrix.os }}" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ 'ubuntu-latest' ] + python-version: [ '3.9', '3.13' ] + cratedb-version: [ 'nightly' ] + + services: + cratedb: + image: crate/crate:${{ matrix.cratedb-version }} + ports: + - 4200:4200 + - 5432:5432 + env: + CRATE_HEAP_SIZE: 4g + + steps: + + - name: Acquire sources + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: | + requirements.txt + framework/records/requirements.txt + framework/records/requirements-dev.txt + + - name: Install utilities + run: | + pip install -r requirements.txt + + - name: Validate framework/records + run: | + ngr test --accept-no-venv framework/records diff --git a/framework/records/README.md b/framework/records/README.md new file mode 100644 index 00000000..66a5662f --- /dev/null +++ b/framework/records/README.md @@ -0,0 +1,62 @@ +# Verify the `records` library with CrateDB + +Records: SQL for Humans™ + +## About + +This folder includes software integration tests for verifying +that the [Records] Python library works well together with [CrateDB]. + +Records is a very simple, but powerful, library for making raw SQL +queries to most relational databases. It uses [SQLAlchemy]. + +Records is intended for report-style exports of database queries, and +has not yet been optimized for extremely large data dumps. + +## What's Inside + +- `example_basic.py`: A few examples that read CrateDB's `sys.summits` table. + An example inquiring existing tables. + +- `example_types.py`: An example that exercises all data types supported by + CrateDB. + +## Install + +Set up sandbox and install packages. +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r requirements.txt -r requirements-test.txt +``` + +## Synopsis +```shell +pip install --upgrade records sqlalchemy-cratedb +``` +```python +from pprint import pprint +import records + +# Define database connection URL, suitable for CrateDB on localhost. +# For CrateDB Cloud, use `crate://:@`. +db = records.Database("crate://") + +# Invoke query. +rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") +data = rows.all() +pprint(data) +``` + +## Tests + +Run integration tests. +```bash +pytest +``` + + +[CrateDB]: https://cratedb.com/database +[Records]: https://pypi.org/project/records/ +[SQLAlchemy]: https://www.sqlalchemy.org/ diff --git a/framework/records/example_basic.py b/framework/records/example_basic.py new file mode 100644 index 00000000..4cfab056 --- /dev/null +++ b/framework/records/example_basic.py @@ -0,0 +1,58 @@ +""" +Using `records` with CrateDB: Basic usage. + + pip install --upgrade records sqlalchemy-cratedb + +A few basic operations using the `records` library with CrateDB. + +- https://pypi.org/project/records/ +""" + +import records + + +def records_select_sys_summits(): + """ + Query CrateDB's built-in `sys.summits` table. + :return: + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.all() + return data + + +def records_export_sys_summits_pandas(): + """ + Query CrateDB's built-in `sys.summits` table, returning a pandas dataframe. + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.export("df") + return data + + +def records_export_sys_summits_csv(): + """ + Query CrateDB's built-in `sys.summits` table, returning CSV. + """ + db = records.Database("crate://", echo=True) + rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3") + data = rows.export("csv") + return data + + +def records_get_table_names(): + """ + Inquire table names of the system schema `sys`. + """ + db = records.Database("crate://?schema=sys", echo=True) + table_names = db.get_table_names() + return table_names + + +if __name__ == "__main__": + print(records_select_sys_summits()) + print(records_export_sys_summits_pandas()) + print(records_export_sys_summits_csv()) + print(records_get_table_names()) diff --git a/framework/records/example_types.py b/framework/records/example_types.py new file mode 100644 index 00000000..e57db0ba --- /dev/null +++ b/framework/records/example_types.py @@ -0,0 +1,148 @@ +""" +Using `records` with CrateDB: All data types. + + pip install --upgrade records sqlalchemy-cratedb + +An end-to-end lifecycle, defining a table, inserting data, and querying it. +This example uses all data types supported by CrateDB. + +- https://pypi.org/project/records/ +- https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types +""" + +from copy import deepcopy + +import pytest +import records + + +# The record that is inserted into the database. +RECORD_IN = dict( + null_integer=None, + integer=42, + bigint=42, + float=42.42, + double=42.42, + decimal=42.42, + bit="01010101", + bool=True, + text="foobar", + char="foo", + timestamp_tz="1970-01-02T00:00:00+01:00", + timestamp_notz="1970-01-02T00:00:00", + ip="127.0.0.1", + array=["foo", "bar"], + object={"for": "bar"}, + geopoint=[85.43, 66.23], + geoshape="POLYGON ((5 5, 10 5, 10 10, 5 10, 5 5))", + float_vector=[1.0, 2.0, 3.0], +) + +# When querying it, a few values will be canonicalized. +RECORD_OUT = deepcopy(RECORD_IN) +RECORD_OUT.update( + dict( + bit="B'01010101'", + char="foo ", + timestamp_tz=82800000, + timestamp_notz=86400000, + geopoint=[pytest.approx(85.43), pytest.approx(66.23)], + geoshape={ + "coordinates": [ + [[5.0, 5.0], [5.0, 10.0], [10.0, 10.0], [10.0, 5.0], [5.0, 5.0]] + ], + "type": "Polygon", + }, + ) +) + + +def records_ddl_dml_dql(): + """ + Validate all types of CrateDB. + + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types + """ + db = records.Database("crate://", echo=True) + + # DDL + db.query("DROP TABLE IF EXISTS testdrive.example;") + db.query(""" + CREATE TABLE testdrive.example ( + -- Numeric types + null_integer INT, + integer INT, + bigint BIGINT, + float FLOAT, + double DOUBLE, + decimal DECIMAL(8, 2), + -- Other scalar types + bit BIT(8), + bool BOOLEAN, + text TEXT, + char CHARACTER(5), + timestamp_tz TIMESTAMP WITH TIME ZONE, + timestamp_notz TIMESTAMP WITHOUT TIME ZONE, + ip IP, + -- Container types + "array" ARRAY(STRING), + "object" OBJECT(DYNAMIC), + -- Geospatial types + geopoint GEO_POINT, + geoshape GEO_SHAPE, + -- Vector type + "float_vector" FLOAT_VECTOR(3) + ); + """) + + # DML + db.query( + """ + INSERT INTO testdrive.example ( + null_integer, + integer, + bigint, + float, + double, + decimal, + bit, + bool, + text, + char, + timestamp_tz, + timestamp_notz, + ip, + "array", + "object", + geopoint, + geoshape, + float_vector + ) VALUES ( + :null_integer, + :integer, + :bigint, + :float, + :double, + :decimal, + :bit, + :bool, + :text, + :char, + :timestamp_tz, + :timestamp_notz, + :ip, + :array, + :object, + :geopoint, + :geoshape, + :float_vector + ); + """, + **RECORD_IN, + ) + + # DQL + db.query("REFRESH TABLE testdrive.example") + rows = db.query("SELECT * FROM testdrive.example") + data = rows.all() + return data diff --git a/framework/records/pyproject.toml b/framework/records/pyproject.toml new file mode 100644 index 00000000..b7997815 --- /dev/null +++ b/framework/records/pyproject.toml @@ -0,0 +1,12 @@ +[tool.pytest.ini_options] +minversion = "2.0" +addopts = """ + -rfEXs -p pytester --strict-markers --verbosity=3 + --capture=no + """ +log_level = "DEBUG" +log_cli_level = "DEBUG" +testpaths = ["*.py"] +xfail_strict = true +markers = [ +] diff --git a/framework/records/requirements-test.txt b/framework/records/requirements-test.txt new file mode 100644 index 00000000..508a3d0d --- /dev/null +++ b/framework/records/requirements-test.txt @@ -0,0 +1 @@ +pytest<9 diff --git a/framework/records/requirements.txt b/framework/records/requirements.txt new file mode 100644 index 00000000..b7ab3b8f --- /dev/null +++ b/framework/records/requirements.txt @@ -0,0 +1,3 @@ +records<0.7 +sqlalchemy-cratedb<0.41 +tablib[pandas] diff --git a/framework/records/test.py b/framework/records/test.py new file mode 100644 index 00000000..ccefb9cc --- /dev/null +++ b/framework/records/test.py @@ -0,0 +1,42 @@ +from example_basic import ( + records_select_sys_summits, + records_get_table_names, + records_export_sys_summits_csv, + records_export_sys_summits_pandas, +) +from example_types import records_ddl_dml_dql, RECORD_OUT + + +def test_sys_summits(): + """ + Read built-in data from CrateDB's `sys` table through `records`. + """ + data = records_select_sys_summits() + assert data[0]["mountain"] == "Mont Blanc" + + +def test_get_table_names(): + data = records_get_table_names() + assert "nodes" in data + assert "shards" in data + assert len(data) > 10 + + +def test_export_sys_summits_pandas(): + data = records_export_sys_summits_pandas() + assert list(data["mountain"]) == ["Mont Blanc", "Monte Rosa", "Dom"] + + +def test_export_sys_summits_csv(): + data = records_export_sys_summits_csv() + assert "classification,coordinates,country" in data + assert "Mont Blanc,4695,U-Savoy/Aosta" in data + + +def test_ddl_dml_dql(): + """ + Validate an end-to-end lifecycle, defining a table, inserting data, and querying it. + This example uses all data types supported by CrateDB. + """ + data = records_ddl_dml_dql() + assert data[0].as_dict() == RECORD_OUT