From eb2f099e0ffaaea0ccd63e97dc4dc469785d6454 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Sat, 7 Dec 2024 09:26:31 +0100
Subject: [PATCH] records: Add example about library use

---
 .github/dependabot.yml                  |   5 +
 .github/workflows/framework-records.yml |  74 ++++++++++++
 framework/records/README.md             |  62 ++++++++++
 framework/records/example_basic.py      |  58 ++++++++++
 framework/records/example_types.py      | 148 ++++++++++++++++++++++++
 framework/records/pyproject.toml        |  12 ++
 framework/records/requirements-test.txt |   1 +
 framework/records/requirements.txt      |   3 +
 framework/records/test.py               |  42 +++++++
 9 files changed, 405 insertions(+)
 create mode 100644 .github/workflows/framework-records.yml
 create mode 100644 framework/records/README.md
 create mode 100644 framework/records/example_basic.py
 create mode 100644 framework/records/example_types.py
 create mode 100644 framework/records/pyproject.toml
 create mode 100644 framework/records/requirements-test.txt
 create mode 100644 framework/records/requirements.txt
 create mode 100644 framework/records/test.py

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 010dcd14..a1b742ac 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -112,6 +112,11 @@ updates:
     schedule:
       interval: "daily"
 
+  - directory: "/framework/records"
+    package-ecosystem: "pip"
+    schedule:
+      interval: "daily"
+
   - directory: "/framework/streamlit"
     package-ecosystem: "pip"
     schedule:
diff --git a/.github/workflows/framework-records.yml b/.github/workflows/framework-records.yml
new file mode 100644
index 00000000..1b42964c
--- /dev/null
+++ b/.github/workflows/framework-records.yml
@@ -0,0 +1,74 @@
+name: records
+
+on:
+  pull_request:
+    branches: ~
+    paths:
+    - '.github/workflows/framework-records.yml'
+    - 'framework/records/**'
+    - '/requirements.txt'
+  push:
+    branches: [ main ]
+    paths:
+    - '.github/workflows/framework-records.yml'
+    - 'framework/records/**'
+    - '/requirements.txt'
+
+  # Allow job to be triggered manually.
+  workflow_dispatch:
+
+  # Run job each night after CrateDB nightly has been published.
+  schedule:
+    - cron: '0 3 * * *'
+
+# Cancel in-progress jobs when pushing to the same branch.
+concurrency:
+  cancel-in-progress: true
+  group: ${{ github.workflow }}-${{ github.ref }}
+
+jobs:
+  test:
+    name: "
+     Python: ${{ matrix.python-version }}
+     CrateDB: ${{ matrix.cratedb-version }}
+     on ${{ matrix.os }}"
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ 'ubuntu-latest' ]
+        python-version: [ '3.9', '3.13' ]
+        cratedb-version: [ 'nightly' ]
+
+    services:
+      cratedb:
+        image: crate/crate:${{ matrix.cratedb-version }}
+        ports:
+          - 4200:4200
+          - 5432:5432
+        env:
+          CRATE_HEAP_SIZE: 4g
+
+    steps:
+
+      - name: Acquire sources
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+          cache: 'pip'
+          cache-dependency-path: |
+            requirements.txt
+            framework/records/requirements.txt
+            framework/records/requirements-dev.txt
+
+      - name: Install utilities
+        run: |
+          pip install -r requirements.txt
+
+      - name: Validate framework/records
+        run: |
+          ngr test --accept-no-venv framework/records
diff --git a/framework/records/README.md b/framework/records/README.md
new file mode 100644
index 00000000..66a5662f
--- /dev/null
+++ b/framework/records/README.md
@@ -0,0 +1,62 @@
+# Verify the `records` library with CrateDB
+
+Records: SQL for Humans™
+
+## About
+
+This folder includes software integration tests for verifying
+that the [Records] Python library works well together with [CrateDB].
+
+Records is a very simple, but powerful, library for making raw SQL
+queries to most relational databases. It uses [SQLAlchemy].
+
+Records is intended for report-style exports of database queries, and
+has not yet been optimized for extremely large data dumps.
+
+## What's Inside
+
+- `example_basic.py`: A few examples that read CrateDB's `sys.summits` table.
+  An example inquiring existing tables.
+
+- `example_types.py`: An example that exercises all data types supported by
+  CrateDB.
+
+## Install
+
+Set up sandbox and install packages.
+```bash
+pip install uv
+uv venv .venv
+source .venv/bin/activate
+uv pip install -r requirements.txt -r requirements-test.txt
+```
+
+## Synopsis
+```shell
+pip install --upgrade records sqlalchemy-cratedb
+```
+```python
+from pprint import pprint
+import records
+
+# Define database connection URL, suitable for CrateDB on localhost.
+# For CrateDB Cloud, use `crate://<username>:<password>@<host>`.
+db = records.Database("crate://")
+
+# Invoke query.
+rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3")
+data = rows.all()
+pprint(data)
+```
+
+## Tests
+
+Run integration tests.
+```bash
+pytest
+```
+
+
+[CrateDB]: https://cratedb.com/database
+[Records]: https://pypi.org/project/records/
+[SQLAlchemy]: https://www.sqlalchemy.org/
diff --git a/framework/records/example_basic.py b/framework/records/example_basic.py
new file mode 100644
index 00000000..4cfab056
--- /dev/null
+++ b/framework/records/example_basic.py
@@ -0,0 +1,58 @@
+"""
+Using `records` with CrateDB: Basic usage.
+
+    pip install --upgrade records sqlalchemy-cratedb
+
+A few basic operations using the `records` library with CrateDB.
+
+- https://pypi.org/project/records/
+"""
+
+import records
+
+
+def records_select_sys_summits():
+    """
+    Query CrateDB's built-in `sys.summits` table.
+    :return:
+    """
+    db = records.Database("crate://", echo=True)
+    rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3")
+    data = rows.all()
+    return data
+
+
+def records_export_sys_summits_pandas():
+    """
+    Query CrateDB's built-in `sys.summits` table, returning a pandas dataframe.
+    """
+    db = records.Database("crate://", echo=True)
+    rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3")
+    data = rows.export("df")
+    return data
+
+
+def records_export_sys_summits_csv():
+    """
+    Query CrateDB's built-in `sys.summits` table, returning CSV.
+    """
+    db = records.Database("crate://", echo=True)
+    rows = db.query("SELECT * FROM sys.summits ORDER BY height DESC LIMIT 3")
+    data = rows.export("csv")
+    return data
+
+
+def records_get_table_names():
+    """
+    Inquire table names of the system schema `sys`.
+    """
+    db = records.Database("crate://?schema=sys", echo=True)
+    table_names = db.get_table_names()
+    return table_names
+
+
+if __name__ == "__main__":
+    print(records_select_sys_summits())
+    print(records_export_sys_summits_pandas())
+    print(records_export_sys_summits_csv())
+    print(records_get_table_names())
diff --git a/framework/records/example_types.py b/framework/records/example_types.py
new file mode 100644
index 00000000..e57db0ba
--- /dev/null
+++ b/framework/records/example_types.py
@@ -0,0 +1,148 @@
+"""
+Using `records` with CrateDB: All data types.
+
+    pip install --upgrade records sqlalchemy-cratedb
+
+An end-to-end lifecycle, defining a table, inserting data, and querying it.
+This example uses all data types supported by CrateDB.
+
+- https://pypi.org/project/records/
+- https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types
+"""
+
+from copy import deepcopy
+
+import pytest
+import records
+
+
+# The record that is inserted into the database.
+RECORD_IN = dict(
+    null_integer=None,
+    integer=42,
+    bigint=42,
+    float=42.42,
+    double=42.42,
+    decimal=42.42,
+    bit="01010101",
+    bool=True,
+    text="foobar",
+    char="foo",
+    timestamp_tz="1970-01-02T00:00:00+01:00",
+    timestamp_notz="1970-01-02T00:00:00",
+    ip="127.0.0.1",
+    array=["foo", "bar"],
+    object={"for": "bar"},
+    geopoint=[85.43, 66.23],
+    geoshape="POLYGON ((5 5, 10 5, 10 10, 5 10, 5 5))",
+    float_vector=[1.0, 2.0, 3.0],
+)
+
+# When querying it, a few values will be canonicalized.
+RECORD_OUT = deepcopy(RECORD_IN)
+RECORD_OUT.update(
+    dict(
+        bit="B'01010101'",
+        char="foo  ",
+        timestamp_tz=82800000,
+        timestamp_notz=86400000,
+        geopoint=[pytest.approx(85.43), pytest.approx(66.23)],
+        geoshape={
+            "coordinates": [
+                [[5.0, 5.0], [5.0, 10.0], [10.0, 10.0], [10.0, 5.0], [5.0, 5.0]]
+            ],
+            "type": "Polygon",
+        },
+    )
+)
+
+
+def records_ddl_dml_dql():
+    """
+    Validate all types of CrateDB.
+
+    https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#supported-types
+    """
+    db = records.Database("crate://", echo=True)
+
+    # DDL
+    db.query("DROP TABLE IF EXISTS testdrive.example;")
+    db.query("""
+    CREATE TABLE testdrive.example (
+        -- Numeric types
+        null_integer INT,
+        integer INT,
+        bigint BIGINT,
+        float FLOAT,
+        double DOUBLE,
+        decimal DECIMAL(8, 2),
+        -- Other scalar types
+        bit BIT(8),
+        bool BOOLEAN,
+        text TEXT,
+        char CHARACTER(5),
+        timestamp_tz TIMESTAMP WITH TIME ZONE,
+        timestamp_notz TIMESTAMP WITHOUT TIME ZONE,
+        ip IP,
+        -- Container types
+        "array" ARRAY(STRING),
+        "object" OBJECT(DYNAMIC),
+        -- Geospatial types
+        geopoint GEO_POINT,
+        geoshape GEO_SHAPE,
+        -- Vector type
+        "float_vector" FLOAT_VECTOR(3)
+    );
+    """)
+
+    # DML
+    db.query(
+        """
+    INSERT INTO testdrive.example (
+        null_integer,
+        integer,
+        bigint,
+        float,
+        double,
+        decimal,
+        bit,
+        bool,
+        text,
+        char,
+        timestamp_tz,
+        timestamp_notz,
+        ip,
+        "array",
+        "object",
+        geopoint,
+        geoshape,
+        float_vector
+    ) VALUES (
+        :null_integer,
+        :integer,
+        :bigint,
+        :float,
+        :double,
+        :decimal,
+        :bit,
+        :bool,
+        :text,
+        :char,
+        :timestamp_tz,
+        :timestamp_notz,
+        :ip,
+        :array,
+        :object,
+        :geopoint,
+        :geoshape,
+        :float_vector
+    );
+    """,
+        **RECORD_IN,
+    )
+
+    # DQL
+    db.query("REFRESH TABLE testdrive.example")
+    rows = db.query("SELECT * FROM testdrive.example")
+    data = rows.all()
+    return data
diff --git a/framework/records/pyproject.toml b/framework/records/pyproject.toml
new file mode 100644
index 00000000..b7997815
--- /dev/null
+++ b/framework/records/pyproject.toml
@@ -0,0 +1,12 @@
+[tool.pytest.ini_options]
+minversion = "2.0"
+addopts = """
+  -rfEXs -p pytester --strict-markers --verbosity=3
+  --capture=no
+  """
+log_level = "DEBUG"
+log_cli_level = "DEBUG"
+testpaths = ["*.py"]
+xfail_strict = true
+markers = [
+]
diff --git a/framework/records/requirements-test.txt b/framework/records/requirements-test.txt
new file mode 100644
index 00000000..508a3d0d
--- /dev/null
+++ b/framework/records/requirements-test.txt
@@ -0,0 +1 @@
+pytest<9
diff --git a/framework/records/requirements.txt b/framework/records/requirements.txt
new file mode 100644
index 00000000..b7ab3b8f
--- /dev/null
+++ b/framework/records/requirements.txt
@@ -0,0 +1,3 @@
+records<0.7
+sqlalchemy-cratedb<0.41
+tablib[pandas]
diff --git a/framework/records/test.py b/framework/records/test.py
new file mode 100644
index 00000000..ccefb9cc
--- /dev/null
+++ b/framework/records/test.py
@@ -0,0 +1,42 @@
+from example_basic import (
+    records_select_sys_summits,
+    records_get_table_names,
+    records_export_sys_summits_csv,
+    records_export_sys_summits_pandas,
+)
+from example_types import records_ddl_dml_dql, RECORD_OUT
+
+
+def test_sys_summits():
+    """
+    Read built-in data from CrateDB's `sys` table through `records`.
+    """
+    data = records_select_sys_summits()
+    assert data[0]["mountain"] == "Mont Blanc"
+
+
+def test_get_table_names():
+    data = records_get_table_names()
+    assert "nodes" in data
+    assert "shards" in data
+    assert len(data) > 10
+
+
+def test_export_sys_summits_pandas():
+    data = records_export_sys_summits_pandas()
+    assert list(data["mountain"]) == ["Mont Blanc", "Monte Rosa", "Dom"]
+
+
+def test_export_sys_summits_csv():
+    data = records_export_sys_summits_csv()
+    assert "classification,coordinates,country" in data
+    assert "Mont Blanc,4695,U-Savoy/Aosta" in data
+
+
+def test_ddl_dml_dql():
+    """
+    Validate an end-to-end lifecycle, defining a table, inserting data, and querying it.
+    This example uses all data types supported by CrateDB.
+    """
+    data = records_ddl_dml_dql()
+    assert data[0].as_dict() == RECORD_OUT