add test data to getting_started

RTIInternational · Feb 7, 2024 · a3ec481 · a3ec481
1 parent bb697c4
commit a3ec481
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 6 deletions.
diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml
@@ -27,8 +27,6 @@ jobs:
 
       - name: Sphinx build
         run: |
-          mkdir docs/sphinx/test_data
-          cp tests/data/test_study/timeseries/*.parquet docs/sphinx/test_data
           cd docs/sphinx
           make clean html
 

diff --git a/docs/sphinx/getting_started/index.rst b/docs/sphinx/getting_started/index.rst
@@ -119,10 +119,34 @@ to create a persisent database, allowing for efficient exploration and metric qu
    from teehr.database.teehr_dataset import TEEHRDatasetDB
 
    # Define file paths the test data
-   PRIMARY_FILEPATH = "../../tests/data/test_study/timeseries/*short_obs.parquet"
-   SECONDARY_FILEPATH = "../../tests/data/test_study/timeseries/*_fcast.parquet"
-   CROSSWALK_FILEPATH = "../../tests/data/test_study/geo/crosswalk.parquet"
-   DATABASE_FILEPATH = Path("../../tests/data/temp/temp_test.db")
+   PRIMARY_FILEPATH = "getting_started/test_data/*short_obs.parquet"
+   SECONDARY_FILEPATH = "getting_started/test_data/*_fcast.parquet"
+   CROSSWALK_FILEPATH = "getting_started/test_data/crosswalk.parquet"
+   DATABASE_FILEPATH = Path("getting_started/test_data/temp_test.db")
+
+   # Delete the test database if it already exists.
+   if DATABASE_FILEPATH.is_file():
+       DATABASE_FILEPATH.unlink()
+
+   # Initialize a database.
+   tds = TEEHRDatasetDB(DATABASE_FILEPATH)
+
+   # Join the primary and secondary timeseries using the crosswalk table
+   # and insert the data into the `joined_timeseries` database table.
+   tds.insert_joined_timeseries(
+       primary_filepath=PRIMARY_FILEPATH,
+       secondary_filepath=SECONDARY_FILEPATH,
+       crosswalk_filepath=CROSSWALK_FILEPATH,
+       drop_added_fields=True,
+   )
+
+   # Let's look at the table schema.
+   schema_df = tds.get_joined_timeseries_schema()
+   schema_df
+
+   # Now we can perform queries and calculate metrics.
+   df = tds.query("SELECT * FROM joined_timeseries", format="df")
+   df
 
 
 Example notebooks

diff --git a/docs/sphinx/getting_started/test_data/crosswalk.parquet b/docs/sphinx/getting_started/test_data/crosswalk.parquet
diff --git a/docs/sphinx/getting_started/test_data/test_short_fcast.parquet b/docs/sphinx/getting_started/test_data/test_short_fcast.parquet
diff --git a/docs/sphinx/getting_started/test_data/test_short_obs.parquet b/docs/sphinx/getting_started/test_data/test_short_obs.parquet