diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml index 9c063895..07680405 100644 --- a/.github/workflows/docs-publish.yml +++ b/.github/workflows/docs-publish.yml @@ -27,8 +27,6 @@ jobs: - name: Sphinx build run: | - mkdir docs/sphinx/test_data - cp tests/data/test_study/timeseries/*.parquet docs/sphinx/test_data cd docs/sphinx make clean html diff --git a/docs/sphinx/getting_started/index.rst b/docs/sphinx/getting_started/index.rst index 04f32b1a..d891ac1b 100644 --- a/docs/sphinx/getting_started/index.rst +++ b/docs/sphinx/getting_started/index.rst @@ -119,10 +119,34 @@ to create a persisent database, allowing for efficient exploration and metric qu from teehr.database.teehr_dataset import TEEHRDatasetDB # Define file paths the test data - PRIMARY_FILEPATH = "../../tests/data/test_study/timeseries/*short_obs.parquet" - SECONDARY_FILEPATH = "../../tests/data/test_study/timeseries/*_fcast.parquet" - CROSSWALK_FILEPATH = "../../tests/data/test_study/geo/crosswalk.parquet" - DATABASE_FILEPATH = Path("../../tests/data/temp/temp_test.db") + PRIMARY_FILEPATH = "getting_started/test_data/*short_obs.parquet" + SECONDARY_FILEPATH = "getting_started/test_data/*_fcast.parquet" + CROSSWALK_FILEPATH = "getting_started/test_data/crosswalk.parquet" + DATABASE_FILEPATH = Path("getting_started/test_data/temp_test.db") + + # Delete the test database if it already exists. + if DATABASE_FILEPATH.is_file(): + DATABASE_FILEPATH.unlink() + + # Initialize a database. + tds = TEEHRDatasetDB(DATABASE_FILEPATH) + + # Join the primary and secondary timeseries using the crosswalk table + # and insert the data into the `joined_timeseries` database table. + tds.insert_joined_timeseries( + primary_filepath=PRIMARY_FILEPATH, + secondary_filepath=SECONDARY_FILEPATH, + crosswalk_filepath=CROSSWALK_FILEPATH, + drop_added_fields=True, + ) + + # Let's look at the table schema. + schema_df = tds.get_joined_timeseries_schema() + schema_df + + # Now we can perform queries and calculate metrics. + df = tds.query("SELECT * FROM joined_timeseries", format="df") + df Example notebooks diff --git a/docs/sphinx/getting_started/test_data/crosswalk.parquet b/docs/sphinx/getting_started/test_data/crosswalk.parquet new file mode 100644 index 00000000..778bc968 Binary files /dev/null and b/docs/sphinx/getting_started/test_data/crosswalk.parquet differ diff --git a/docs/sphinx/getting_started/test_data/test_short_fcast.parquet b/docs/sphinx/getting_started/test_data/test_short_fcast.parquet new file mode 100644 index 00000000..e64dcdf7 Binary files /dev/null and b/docs/sphinx/getting_started/test_data/test_short_fcast.parquet differ diff --git a/docs/sphinx/getting_started/test_data/test_short_obs.parquet b/docs/sphinx/getting_started/test_data/test_short_obs.parquet new file mode 100644 index 00000000..be43f4b9 Binary files /dev/null and b/docs/sphinx/getting_started/test_data/test_short_obs.parquet differ