diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff
index a3bccbd..2a69a5f 100644
--- a/cumulus_library/.sqlfluff
+++ b/cumulus_library/.sqlfluff
@@ -6,6 +6,7 @@ exclude_rules=
     # these rule overfires on athena nested arrays
     references.from,
     structure.column_order,
+    structure.unused_join,
     aliasing.unused,
     # this rule interferes with FHIR naming conventions
     capitalisation.identifiers
diff --git a/cumulus_library/actions/builder.py b/cumulus_library/actions/builder.py
index 119463a..39d874b 100644
--- a/cumulus_library/actions/builder.py
+++ b/cumulus_library/actions/builder.py
@@ -7,11 +7,13 @@
 import re
 import sys
 import tomllib
+import zipfile
 
 from rich.progress import Progress, TaskID
 
 from cumulus_library import (
     BaseTableBuilder,
+    CountsBuilder,
     base_utils,
     databases,
     enums,
@@ -40,7 +42,10 @@ def _load_and_execute_builder(
     db_parser: databases.DatabaseParser = None,
     write_reference_sql: bool = False,
     doc_str: str | None = None,
-) -> None:
+    prepare: bool = False,
+    data_path: pathlib.Path,
+    query_count: int | None = None,
+) -> int:
     """Loads a table builder from a file.
 
     :param config: a StudyConfig object
@@ -49,6 +54,10 @@ def _load_and_execute_builder(
     :keyword db_parser: an object implementing DatabaseParser for the target database
     :keyword write_reference_sql: if true, writes sql to disk inside a study's directory
     :keyword doc_str: A string to insert between queries written to disk
+    :keyword prepare: If true, will render query instead of executing
+    :keyword data_path: If prepare is true, the path to write rendered data to
+    :keyword query_count: if prepare is true, the number of queries already rendered
+    :returns: number of processed queries
     """
 
     # Since we have to support arbitrary user-defined python files here, we
@@ -79,8 +88,7 @@ def _load_and_execute_builder(
             f"Error loading {manifest._study_path}{filename}\n"
             "Custom builders must extend the BaseTableBuilder class."
         )
-
-    # Remove instances of intermediate classes, if present
+    # Remove instances of intermediate classes, if present (usually not)
     table_builder_subclasses = list(
         filter(lambda x: x.__name__ != "CountsBuilder", table_builder_subclasses)
     )
@@ -89,7 +97,10 @@ def _load_and_execute_builder(
     # remove it so it doesn't interfere with the next python module to
     # execute, since the subclass would otherwise hang around.
     table_builder_class = table_builder_subclasses[0]
-    table_builder = table_builder_class()
+    if issubclass(table_builder_class, CountsBuilder):
+        table_builder = table_builder_class(study_prefix=manifest.get_study_prefix())
+    else:
+        table_builder = table_builder_class()
     if write_reference_sql:
         prefix = manifest.get_study_prefix()
         table_builder.prepare_queries(config=config, manifest=manifest, parser=db_parser)
@@ -105,19 +116,30 @@ def _load_and_execute_builder(
         table_builder.write_queries(
             path=pathlib.Path(f"{manifest._study_path}/reference_sql/" + new_filename)
         )
+    elif prepare:
+        table_builder.prepare_queries(
+            config=config,
+            manifest=manifest,
+            parser=db_parser,
+        )
+        _render_output(
+            manifest.get_study_prefix(), table_builder.queries, data_path, filename, query_count
+        )
     else:
         table_builder.execute_queries(
             config=config,
             manifest=manifest,
             parser=db_parser,
         )
-
+    num_queries = len(table_builder.queries)
     # After running the executor code, we'll remove
     # it so it doesn't interfere with the next python module to
     # execute, since the subclass would otherwise hang around.
     del sys.modules[table_builder_module.__name__]
     del table_builder_module
 
+    return num_queries
+
 
 def run_protected_table_builder(
     config: base_utils.StudyConfig,
@@ -136,13 +158,36 @@ def run_protected_table_builder(
 
 
 def _run_workflow(
-    config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
-) -> None:
+    config: base_utils.StudyConfig,
+    manifest: study_manifest.StudyManifest,
+    filename: str,
+    prepare: str,
+    data_path: pathlib.Path,
+    query_count: int,
+) -> int:
     """Loads workflow config from toml definitions and executes workflow
 
     :param config: a StudyConfig object
     :param manifest: a StudyManifest object
+    :param filename: Filename of the workflow config
+    :param prepare: If true, will render query instead of executing
+    :param data_path: If prepare is true, the path to write rendered data to
+    :param query_count: if prepare is true, the number of queries already rendered
+    :returns: a count of processed queries
     """
+    toml_path = pathlib.Path(f"{manifest._study_path}/{filename}")
+    if prepare:
+        with open(toml_path, encoding="utf-8") as file:
+            workflow_config = file.read()
+        _render_output(
+            manifest.get_study_prefix(),
+            [workflow_config],
+            data_path,
+            filename,
+            query_count,
+            is_toml=True,
+        )
+        return 1
     existing_stats = []
     if not config.stats_build:
         existing_stats = (
@@ -157,14 +202,13 @@ def _run_workflow(
     # but we're letting it slide so that builders function similarly
     # across the board
     safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
-    toml_path = pathlib.Path(f"{manifest._study_path}/{filename}")
     with open(toml_path, "rb") as file:
         workflow_config = tomllib.load(file)
         config_type = workflow_config["config_type"]
         target_table = workflow_config.get("target_table", workflow_config.get("table_prefix", ""))
 
     if (target_table,) in existing_stats and not config.stats_build:
-        return
+        return 0
     match config_type:
         case "psm":
             builder = psm_builder.PsmBuilder(
@@ -195,6 +239,7 @@ def _run_workflow(
             table_name=f"{target_table}_{safe_timestamp}",
             view_name=target_table,
         )
+    return len(builder.queries)
 
 
 def build_matching_files(
@@ -203,13 +248,20 @@ def build_matching_files(
     *,
     builder: str | None,
     db_parser: databases.DatabaseParser = None,
+    prepare: bool,
+    data_path: pathlib.Path,
 ):
     """targets all table builders matching a target string for running
 
     :param config: a StudyConfig object
     :param manifest: a StudyManifest object
     :keyword builder: filename of a module implementing a TableBuilder
-    :keyword db_parser: an object implementing DatabaseParser for the target database"""
+    :keyword db_parser: an object implementing DatabaseParser for the target database
+    :keyword prepare: If true, will render query instead of executing
+    :keyword data_path: If prepare is true, the path to write rendered data to
+    """
+    if prepare:
+        _check_if_preparable(manifest.get_study_prefix())  # pragma: no cover
     all_generators = manifest.get_all_generators()
     matches = []
     if not builder:  # pragma: no cover
@@ -218,7 +270,14 @@ def build_matching_files(
         for file in all_generators:
             if file.find(builder) != -1:
                 matches.append(file)
-    build_study(config, manifest, db_parser=db_parser, file_list=matches)
+    build_study(
+        config,
+        manifest,
+        db_parser=db_parser,
+        file_list=matches,
+        prepare=prepare,
+        data_path=data_path,
+    )
 
 
 def build_study(
@@ -228,35 +287,86 @@ def build_study(
     db_parser: databases.DatabaseParser = None,
     continue_from: str | None = None,
     file_list: list | None = None,
-) -> list:
+    prepare: bool,
+    data_path: pathlib.Path | None,
+) -> None:
     """Creates tables in the schema by iterating through the sql_config.file_names
 
     :param config: a StudyConfig object
     :param manifest: a StudyManifest object
+    :keyword db_parser: a parser for the target database
     :keyword continue_from: Name of a file to resume table creation from
-    :returns: loaded queries (for unit testing only)
+    :keyword prepare: If true, will render query instead of executing
+    :keyword data_path: If prepare is true, the path to write rendered data to
     """
+    if prepare:
+        _check_if_preparable(manifest.get_study_prefix())
     if file_list is None:
         file_list = manifest.get_file_list(continue_from)
+    if prepare:
+        data_dir = data_path / manifest.get_study_prefix()
+        for file in data_dir.glob("*"):
+            if file.is_file():  # pragma: no cover
+                file.unlink()
+    query_count = 0
     for file in file_list:
         if file.endswith(".py"):
-            _load_and_execute_builder(
+            query_count += _load_and_execute_builder(
                 config=config,
                 manifest=manifest,
                 filename=file,
                 db_parser=db_parser,
+                data_path=data_path,
+                prepare=prepare,
+                query_count=query_count,
             )
         elif file.endswith(".toml"):
-            _run_workflow(config=config, manifest=manifest, filename=file)
+            query_count += _run_workflow(
+                config=config,
+                manifest=manifest,
+                filename=file,
+                data_path=data_path,
+                prepare=prepare,
+                query_count=query_count,
+            )
         elif file.endswith(".sql"):
-            _run_raw_queries(config=config, manifest=manifest, filename=file)
+            query_count += _run_raw_queries(
+                config=config,
+                manifest=manifest,
+                filename=file,
+                data_path=data_path,
+                prepare=prepare,
+                query_count=query_count,
+            )
         else:
             raise errors.StudyManifestParsingError
+    if prepare:
+        with zipfile.ZipFile(
+            f"{data_path}/{manifest.get_study_prefix()}.zip", "w", zipfile.ZIP_DEFLATED
+        ) as z:
+            for file in data_dir.iterdir():
+                z.write(file, file.relative_to(data_dir))
 
 
 def _run_raw_queries(
-    config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
-):
+    config: base_utils.StudyConfig,
+    manifest: study_manifest.StudyManifest,
+    filename: str,
+    *,
+    data_path: pathlib.Path | None,
+    prepare: bool,
+    query_count: int,
+) -> int:
+    """Creates tables in the schema by iterating through the sql_config.file_names
+
+    :param config: a StudyConfig object
+    :param manifest: a StudyManifest object
+    :param filename: the name of the sql file to read
+    :param prepare: If true, will render query instead of executing
+    :param data_path: If prepare is true, the path to write rendered data to
+    :param query_count: the number of queries currently processed
+    :returns number of queries processed:
+    """
     queries = []
     for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{filename}")):
         queries.append([query, filename])
@@ -266,25 +376,75 @@ def _run_raw_queries(
             f"`{manifest.get_study_prefix()}__",
             "`",
         )
-    # We'll explicitly create a cursor since recreating cursors for each
-    # table in a study is slightly slower for some databases
-    cursor = config.db.cursor()
-    # We want to only show a progress bar if we are :not: printing SQL lines
-    with base_utils.get_progress_bar(disable=config.verbose) as progress:
-        task = progress.add_task(
-            f"Building tables from {filename}...",
-            total=len(queries),
-            visible=not config.verbose,
+    if prepare:
+        _render_output(
+            manifest.get_study_prefix(), [q[0] for q in queries], data_path, filename, query_count
         )
-        _execute_build_queries(
-            config=config,
-            manifest=manifest,
-            cursor=cursor,
-            queries=queries,
-            progress=progress,
-            task=task,
+    else:
+        # We'll explicitly create a cursor since recreating cursors for each
+        # table in a study is slightly slower for some databases
+        cursor = config.db.cursor()
+        # We want to only show a progress bar if we are :not: printing SQL lines
+        with base_utils.get_progress_bar(disable=config.verbose) as progress:
+            task = progress.add_task(
+                f"Building tables from {filename}...",
+                total=len(queries),
+                visible=not config.verbose,
+            )
+            _execute_build_queries(
+                config=config,
+                manifest=manifest,
+                cursor=cursor,
+                queries=queries,
+                progress=progress,
+                task=task,
+            )
+    return len(queries)
+
+
+def _render_output(
+    study_name: str,
+    outputs: list,
+    data_path: pathlib.Path,
+    filename: str,
+    count: int,
+    *,
+    is_toml: bool = False,
+):
+    if is_toml:
+        suffix = "toml"
+    else:
+        suffix = "sql"
+    for index, output in enumerate(outputs):
+        if is_toml:
+            name = "config"
+        else:
+            # This regex attempts to discover the table name, via looking for the first
+            # dunder, and then gets the start of the line its on as a non-quote requiring
+            # part of a file name. So for example, finding SQL that looks like this:
+            #   CREATE TABLE foo__bar AS (varchar baz)
+            # would result in `create_table_foo__bar` being assigned to name. The goal
+            # is to make this at least mildly parsable at the file system level if someone
+            # is reviewing a prepared study
+            name = re.search(r"(.*)__\w*", output)[0].lower().replace(" ", "_")
+        total_index = count + index
+        new_filename = f"{total_index:04d}.{filename.rsplit('.', 1)[0]}.{index:02d}.{name}.{suffix}"
+        file_path = data_path / f"{study_name}/{new_filename}"
+
+        file_path.parent.mkdir(exist_ok=True, parents=True)
+        with open(file_path, "w", encoding="UTF-8") as f:
+            f.write(output)
+
+
+def _check_if_preparable(prefix):
+    # This list should include any study which requires interrogating the database to
+    # find if data is available to query (outside of a toml-driven workflow),
+    # which isn't doable as a distributed query
+    if prefix in ["core", "discovery", "data-metrics"]:
+        sys.exit(
+            f"Study '{prefix}'' does not support prepare mode. It must be run "
+            "directly against a target database."
         )
-    return queries
 
 
 def _query_error(
diff --git a/cumulus_library/actions/file_generator.py b/cumulus_library/actions/file_generator.py
index be4a721..1ed943d 100644
--- a/cumulus_library/actions/file_generator.py
+++ b/cumulus_library/actions/file_generator.py
@@ -31,7 +31,7 @@ def run_generate_sql(
     )
     for file in all_generators:
         if table_builder and file.find(table_builder) == -1:
-            continue
+            continue  # pragma: no cover
         builder._load_and_execute_builder(
             config=config,
             manifest=manifest,
@@ -39,6 +39,7 @@ def run_generate_sql(
             db_parser=db_parser,
             write_reference_sql=True,
             doc_str=doc_str,
+            data_path=None,
         )
 
 
diff --git a/cumulus_library/builders/valueset/umls.py b/cumulus_library/builders/valueset/umls.py
index 966452a..a6cc8c3 100644
--- a/cumulus_library/builders/valueset/umls.py
+++ b/cumulus_library/builders/valueset/umls.py
@@ -92,10 +92,12 @@ def generate_umls_tables(
                     f"SELECT count(*) from {study_prefix}{table_prefix}umls_valuesets_rels"  # noqa: S608
                 ).fetchone()[0]
 
-    cursor.execute(f"""CREATE TABLE {study_prefix}{table_prefix}umls_valuesets AS
+    cursor.execute(
+        f"""CREATE TABLE {study_prefix}{table_prefix}umls_valuesets AS 
 SELECT distinct r.rxcui, v.*
 FROM {study_prefix}{table_prefix}umls_valuesets_rels as v, rxnorm.RXNCONSO as r
 WHERE v.code = r.code
 AND v.sab=r.sab
 --AND v.tty=r.tty
-""")  # noqa: S608
+"""  # noqa: S608
+    )
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
index f2cd0b7..547c3c7 100755
--- a/cumulus_library/cli.py
+++ b/cumulus_library/cli.py
@@ -93,55 +93,68 @@ def clean_and_build_study(
         *,
         options: dict[str, str],
         continue_from: str | None = None,
+        prepare: bool = False,
+        data_path: pathlib.Path | None = None,
     ) -> None:
         """Recreates study views/tables
 
         :param target: A path to the study directory
-        :param options: The dictionary of study-specific options
+        :keyword options: The dictionary of study-specific options
         :keyword continue_from: Restart a run from a specific sql file (for dev only)
+        :keyword prepare: If true, will render query instead of executing
+        :keyword data_path: If prepare is true, the path to write rendered data to
         """
         manifest = study_manifest.StudyManifest(target, self.data_path, options=options)
         try:
-            builder.run_protected_table_builder(config=self.get_config(manifest), manifest=manifest)
-            if not continue_from:
-                log_utils.log_transaction(
-                    config=self.get_config(manifest),
-                    manifest=manifest,
-                    status=enums.LogStatuses.STARTED,
-                )
-                cleaner.clean_study(
-                    config=self.get_config(manifest),
-                    manifest=manifest,
-                )
-
-            else:
-                log_utils.log_transaction(
-                    config=self.get_config(manifest),
-                    manifest=manifest,
-                    status=enums.LogStatuses.RESUMED,
+            print("prep", prepare)
+            print("cont", continue_from)
+            if not prepare:
+                builder.run_protected_table_builder(
+                    config=self.get_config(manifest), manifest=manifest
                 )
+                if not continue_from:
+                    print("continue")
+                    log_utils.log_transaction(
+                        config=self.get_config(manifest),
+                        manifest=manifest,
+                        status=enums.LogStatuses.STARTED,
+                    )
+                    cleaner.clean_study(
+                        config=self.get_config(manifest),
+                        manifest=manifest,
+                    )
+                else:
+                    log_utils.log_transaction(
+                        config=self.get_config(manifest),
+                        manifest=manifest,
+                        status=enums.LogStatuses.RESUMED,
+                    )
 
             builder.build_study(
                 config=self.get_config(manifest),
                 manifest=manifest,
                 continue_from=continue_from,
+                data_path=data_path,
+                prepare=prepare,
             )
-            log_utils.log_transaction(
-                config=self.get_config(manifest),
-                manifest=manifest,
-                status=enums.LogStatuses.FINISHED,
-            )
+            if not prepare:
+                log_utils.log_transaction(
+                    config=self.get_config(manifest),
+                    manifest=manifest,
+                    status=enums.LogStatuses.FINISHED,
+                )
 
         except errors.StudyManifestFilesystemError as e:
             # This should be thrown prior to any database connections, so
             # skipping logging
             raise e  # pragma: no cover
         except Exception as e:
-            log_utils.log_transaction(
-                config=self.get_config(manifest),
-                manifest=manifest,
-                status=enums.LogStatuses.ERROR,
-            )
+            if not prepare:
+                log_utils.log_transaction(
+                    config=self.get_config(manifest),
+                    manifest=manifest,
+                    status=enums.LogStatuses.ERROR,
+                )
             raise e
 
     def build_matching_files(
@@ -150,18 +163,24 @@ def build_matching_files(
         table_builder_name: str,
         *,
         options: dict[str, str],
+        prepare: bool = False,
+        data_path: pathlib.Path,
     ) -> None:
         """Runs a single table builder
 
         :param target: A path to the study directory
         :param table_builder_name: a builder file referenced in the study's manifest
-        :param options: The dictionary of study-specific options
+        :keyword options: The dictionary of study-specific options
+        :keyword prepare: If true, will render query instead of executing
+        :keyword data_path: If prepare is true, the path to write rendered data to
         """
         manifest = study_manifest.StudyManifest(target, options=options)
         builder.build_matching_files(
             config=self.get_config(manifest),
             manifest=manifest,
             builder=table_builder_name,
+            prepare=prepare,
+            data_path=data_path,
         )
 
     ### Data exporters
@@ -325,13 +344,19 @@ def run_cli(args: dict):
                 for target in args["target"]:
                     if args["builder"]:
                         runner.build_matching_files(
-                            study_dict[target], args["builder"], options=args["options"]
+                            study_dict[target],
+                            args["builder"],
+                            options=args["options"],
+                            prepare=args["prepare"],
+                            data_path=args["data_path"],
                         )
                     else:
                         runner.clean_and_build_study(
                             study_dict[target],
                             continue_from=args["continue_from"],
                             options=args["options"],
+                            prepare=args["prepare"],
+                            data_path=args["data_path"],
                         )
 
             elif args["action"] == "export":
diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
index ec1830b..a1fb80a 100644
--- a/cumulus_library/cli_parser.py
+++ b/cumulus_library/cli_parser.py
@@ -208,6 +208,11 @@ def get_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Forces file downloads/uploads to occur, even if they already exist",
     )
+    build.add_argument(
+        "--prepare",
+        action="store_true",
+        help=argparse.SUPPRESS,
+    )
     build.add_argument(
         "--statistics",
         action="store_true",
diff --git a/tests/conftest.py b/tests/conftest.py
index f602a24..824544d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,7 +3,9 @@
 import copy
 import datetime
 import json
+import os
 import pathlib
+from unittest import mock
 
 import numpy
 import pandas
@@ -197,6 +199,12 @@ def debug_diff_tables(cols, found, ref, pos=0):
 # Database fixtures
 
 
+@pytest.fixture(scope="session", autouse=True)
+def mock_env():
+    with mock.patch.dict(os.environ, clear=True):
+        yield
+
+
 @pytest.fixture
 def mock_db(tmp_path):
     """Provides a DuckDatabaseBackend for local testing"""
diff --git a/tests/test_actions.py b/tests/test_actions.py
index 9e3a27a..e963e5b 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -172,10 +172,7 @@ def test_run_protected_table_builder(mock_db_config, study_path, stats):
 def test_table_builder(mock_db_config, study_path, verbose, expects, raises):
     with raises:
         manifest = study_manifest.StudyManifest(pathlib.Path(study_path))
-        builder.build_study(
-            config=mock_db_config,
-            manifest=manifest,
-        )
+        builder.build_study(config=mock_db_config, manifest=manifest, data_path=None, prepare=False)
         tables = (
             mock_db_config.db.cursor()
             .execute("SELECT distinct(table_name) FROM information_schema.tables ")
@@ -248,10 +245,7 @@ def test_build_study(mock_db_config, study_path, verbose, expects, raises):
             table_cols_types=table.column_types,
         )
         mock_db_config.db.cursor().execute(query)
-        builder.build_study(
-            config=mock_db_config,
-            manifest=manifest,
-        )
+        builder.build_study(config=mock_db_config, manifest=manifest, data_path=None, prepare=False)
         tables = (
             mock_db_config.db.cursor()
             .execute("SELECT distinct(table_name) FROM information_schema.tables ")
@@ -309,7 +303,7 @@ def test_run_psm_statistics_builders(
                 table_name="psm_test__psm_encounter_2023_06_15",
                 view_name="psm_test__psm_encounter_covariate",
             )
-        builder.build_study(config=config, manifest=manifest)
+        builder.build_study(config=config, manifest=manifest, prepare=False, data_path=None)
         tables = (
             mock_db_stats_config.db.cursor()
             .execute("SELECT distinct(table_name) FROM information_schema.tables")
@@ -333,10 +327,7 @@ def test_invoke_valueset_builder(mock_builder, mock_db_config, tmp_path):
     config = base_utils.StudyConfig(
         db=mock_db_config.db, schema=mock_db_config.schema, stats_build=True
     )
-    builder.build_study(
-        config=config,
-        manifest=manifest,
-    )
+    builder.build_study(config=config, manifest=manifest, prepare=False, data_path=None)
     assert mock_builder.is_called()
 
 
@@ -567,7 +558,7 @@ def test_generate_md(mock_db_config, tmp_path):
         manifest = study_manifest.StudyManifest(
             study_path=pathlib.Path(f"{tmp_path}/study_python_valid/")
         )
-        builder.build_study(config=mock_db_config, manifest=manifest)
+        builder.build_study(config=mock_db_config, manifest=manifest, prepare=False, data_path=None)
         file_generator.run_generate_markdown(config=mock_db_config, manifest=manifest)
         with open(f"{tmp_path}/study_python_valid/study_python_valid_generated.md") as f:
             generated_md = f.read()
diff --git a/tests/test_athena.py b/tests/test_athena.py
index 940b770..18e233f 100644
--- a/tests/test_athena.py
+++ b/tests/test_athena.py
@@ -1,7 +1,6 @@
 """Edge case testing for Athena database support"""
 
 import json
-import os
 import pathlib
 from unittest import mock
 
@@ -46,10 +45,6 @@ def test_schema_parsing():
     assert expected == parser.parse_found_schema(schema)
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("botocore.session.Session")
 def test_upload_parquet_response_handling(mock_session):
     path = pathlib.Path(__file__).resolve().parent
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 6d50bc2..46b6d39 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -673,7 +673,6 @@ def test_cli_upload_filter(mock_upload_data, mock_glob, args, calls):
 
 
 @pytest.mark.parametrize("mode", ["cli", "env"])
-@mock.patch.dict(os.environ, clear=True)
 # early exit with a dumb error
 @mock.patch("cumulus_library.base_utils.StudyConfig", side_effect=ZeroDivisionError)
 def test_cli_umls_parsing(mock_config, mode, tmp_path):
@@ -688,7 +687,6 @@ def test_cli_umls_parsing(mock_config, mode, tmp_path):
     assert mock_config.call_args[1]["umls_key"] == "MY-KEY"
 
 
-@mock.patch.dict(os.environ, clear=True)
 def test_cli_single_builder(tmp_path):
     cli.main(cli_args=duckdb_args(["build", "--builder=patient", "--target=core"], tmp_path))
     db = databases.DuckDatabaseBackend(f"{tmp_path}/duck.db")
@@ -700,7 +698,6 @@ def test_cli_single_builder(tmp_path):
     } == tables
 
 
-@mock.patch.dict(os.environ, clear=True)
 def test_cli_finds_study_from_manifest_prefix(tmp_path):
     # This study is located inside a folder called `study_different_dir`,
     # but we're going to find it using its real study prefix from the manifest.
@@ -715,7 +712,6 @@ def test_cli_finds_study_from_manifest_prefix(tmp_path):
     assert "study_different_name__table" in tables
 
 
-@mock.patch.dict(os.environ, clear=True)
 @pytest.mark.parametrize(
     "option,raises",
     [
@@ -745,7 +741,6 @@ def test_cli_custom_args(mock_config, tmp_path, option, raises):
         assert called_options[option.split(":")[0]] == option.split(":")[1]
 
 
-@mock.patch.dict(os.environ, clear=True)
 @mock.patch("cumulus_library.base_utils.StudyConfig")
 def test_cli_no_custom_args_yields_empty_dict(mock_config, tmp_path):
     mock_config.return_value.stats_clean = False
@@ -765,7 +760,6 @@ def test_cli_no_custom_args_yields_empty_dict(mock_config, tmp_path):
     assert {} == called_options
 
 
-@mock.patch.dict(os.environ, clear=True)
 def test_cli_import_study(tmp_path):
     test_data = {"string": ["a", "b", None]}
     df = pandas.DataFrame(test_data)
@@ -790,7 +784,6 @@ def test_cli_import_study(tmp_path):
     )
 
 
-@mock.patch.dict(os.environ, clear=True)
 def test_dedicated_schema(tmp_path):
     core_build_args = duckdb_args(
         [
@@ -827,7 +820,6 @@ def test_dedicated_schema(tmp_path):
         assert table in tables
 
 
-@mock.patch.dict(os.environ, clear=True)
 @mock.patch("cumulus_library.databases.duckdb.DuckDatabaseBackend")
 def test_sql_error_handling(mock_backend, tmp_path):
     mock_backend.return_value.cursor.return_value.execute.side_effect = [
@@ -848,7 +840,6 @@ def test_sql_error_handling(mock_backend, tmp_path):
         cli.main(cli_args=build_args)
 
 
-@mock.patch.dict(os.environ, clear=True)
 def test_version(capfd):
     out = None
     with pytest.raises(SystemExit):
@@ -870,7 +861,10 @@ def test_version(capfd):
     assert "study_invalid_bad_query: no version defined" in out
 
 
-@mock.patch.dict(os.environ, clear=True)
+@mock.patch.dict(
+    os.environ,
+    clear=True,
+)
 def test_study_dir(tmp_path):
     os.environ["CUMULUS_LIBRARY_STUDY_DIR"] = str(
         pathlib.Path(__file__).resolve().parent / "test_data/"
@@ -885,3 +879,89 @@ def test_study_dir(tmp_path):
     )
     with does_not_raise():
         cli.main(cli_args=build_args)
+
+
+@pytest.mark.parametrize(
+    "study,expected_queries,generated_query,toml_file, raises",
+    [
+        (
+            "study_valid",
+            [
+                "0000.test.00.create_table_study_valid__table.sql",
+                "0001.test2.00.create_table_study_valid__table2.sql",
+            ],
+            "CREATE TABLE study_valid__table (test int)",
+            None,
+            does_not_raise(),
+        ),
+        (
+            "study_python_counts_valid",
+            [
+                "0000.module1.00.create_table_if_not_exists_study_python_counts_valid__table1.sql",
+                "0001.module1.00.create_table_if_not_exists_study_python_counts_valid__table1.sql",
+                "0002.module2.00.create_table_if_not_exists_study_python_counts_valid__table2.sql",
+            ],
+            "CREATE TABLE IF NOT EXISTS study_python_counts_valid__table1 (test int);",
+            None,
+            does_not_raise(),
+        ),
+        (
+            "psm_test",
+            ["0000.psm_cohort.00.create_table_psm_test__psm_cohort.sql"],
+            """CREATE TABLE psm_test__psm_cohort AS (
+    SELECT * FROM core__condition --noqa: AM04
+    ORDER BY id DESC LIMIT 100
+)""",
+            [
+                "0001.psm_config.00.config.toml",
+                """config_type = "psm"
+classification_json = "dsm5_classifications.json"
+pos_source_table = "psm_test__psm_cohort"
+neg_source_table = "core__condition"
+target_table = "psm_test__psm_encounter_covariate"
+primary_ref = 'encounter_ref'
+count_ref = 'subject_ref'
+count_table = 'core__condition'
+dependent_variable = "example_diagnosis"
+pos_sample_size = 20
+neg_sample_size = 100
+seed = 1234567890
+[join_cols_by_table.core__encounter]
+join_id = "encounter_ref"
+included_cols = [
+    ["gender"], 
+    ["race_display", "race"]
+]
+""",
+            ],
+            does_not_raise(),
+        ),
+        ("core", [], "", None, pytest.raises(SystemExit)),
+    ],
+)
+def test_prepare_study(tmp_path, study, expected_queries, generated_query, toml_file, raises):
+    with raises:
+        build_args = duckdb_args(
+            [
+                "build",
+                "-t",
+                study,
+                "-s",
+                "tests/test_data/",
+                str(tmp_path),
+                "--prepare",
+            ],
+            tmp_path,
+        )
+        with does_not_raise():
+            cli.main(cli_args=build_args)
+        queries = list(pathlib.Path(tmp_path).glob("**/*.sql"))
+        for query in queries:
+            assert query.name in expected_queries
+        with open(queries[0]) as f:
+            assert f.read() == generated_query
+        if toml_file:
+            config = next(pathlib.Path(tmp_path).glob("**/*.toml"))
+            assert config.name == toml_file[0]
+            with open(config) as f:
+                assert f.read() == toml_file[1]
diff --git a/tests/test_data/psm/psm_cohort.sql b/tests/test_data/psm/psm_cohort.sql
index eb97114..13a1981 100644
--- a/tests/test_data/psm/psm_cohort.sql
+++ b/tests/test_data/psm/psm_cohort.sql
@@ -1,3 +1,4 @@
 CREATE TABLE psm_test__psm_cohort AS (
-    SELECT * FROM core__condition ORDER BY id DESC LIMIT 100 --noqa: AM04
+    SELECT * FROM core__condition --noqa: AM04
+    ORDER BY id DESC LIMIT 100
 );
diff --git a/tests/test_data/study_python_counts_valid/module1.py b/tests/test_data/study_python_counts_valid/module1.py
index ecbaf6e..0871e90 100644
--- a/tests/test_data/study_python_counts_valid/module1.py
+++ b/tests/test_data/study_python_counts_valid/module1.py
@@ -4,8 +4,8 @@
 class ModuleOneRunner(cumulus_library.CountsBuilder):
     display_text = "module1"
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, study_prefix):
+        super().__init__(study_prefix="study_python_counts_valid")
 
     def prepare_queries(self, *args, **kwargs):
         self.queries.append(
diff --git a/tests/test_data/study_python_counts_valid/module2.py b/tests/test_data/study_python_counts_valid/module2.py
index c6c7e80..9d96f13 100644
--- a/tests/test_data/study_python_counts_valid/module2.py
+++ b/tests/test_data/study_python_counts_valid/module2.py
@@ -4,7 +4,7 @@
 class ModuleTwoRunner(cumulus_library.CountsBuilder):
     display_text = "module2"
 
-    def __init__(self):
+    def __init__(self, study_prefix):
         super().__init__(study_prefix="study_python_counts_valid")
 
     def prepare_queries(self, *args, **kwargs):
diff --git a/tests/test_databases.py b/tests/test_databases.py
index bd5d887..b726fe0 100644
--- a/tests/test_databases.py
+++ b/tests/test_databases.py
@@ -28,10 +28,6 @@
 }
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "db,data,expected,raises",
     [
@@ -77,10 +73,6 @@ def test_col_types_from_pandas(db, data, expected, raises):
         assert set(expected) == set(vals)
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "db,data,expected,raises",
     [
@@ -222,10 +214,6 @@ def test_pyarrow_types_from_sql(db, data, expected, raises):
             assert vals[index][-1] == expected[index][-1]
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "args,expected_type, raises",
     [
@@ -273,10 +261,6 @@ def test_create_db_backend(args, expected_type, raises):
 ### Database-specific edge case testing
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "args,sse,keycount,expected,raises",
     [
@@ -375,10 +359,6 @@ def test_upload_file_athena(mock_botocore, args, sse, keycount, expected, raises
                 assert kwargs["Key"].endswith(args["file"].name)
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("pyathena.connect")
 def test_athena_pandas_cursor(mock_pyathena):
     mock_as_pandas = mock.MagicMock()
@@ -415,10 +395,6 @@ def test_athena_pandas_cursor(mock_pyathena):
     )
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("pyathena.connect")
 def test_athena_parser(mock_pyathena):
     db = databases.AthenaDatabaseBackend(**ATHENA_KWARGS)
@@ -426,6 +402,8 @@ def test_athena_parser(mock_pyathena):
     assert isinstance(parser, databases.AthenaParser)
 
 
+# Since this test creates an environment variable, create a dedicated
+# mock for it
 @mock.patch.dict(
     os.environ,
     clear=True,
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index e85ad24..48bb750 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -10,10 +10,6 @@
 from tests import conftest
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_discovery(tmp_path):
     cli.main(
         cli_args=conftest.duckdb_args(
diff --git a/tests/test_duckdb.py b/tests/test_duckdb.py
index 876c858..1e8d476 100644
--- a/tests/test_duckdb.py
+++ b/tests/test_duckdb.py
@@ -6,7 +6,6 @@
 import tempfile
 from datetime import datetime
 from pathlib import Path
-from unittest import mock
 
 import pytest
 
@@ -14,10 +13,6 @@
 from cumulus_library.template_sql import base_templates
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_duckdb_core_build_and_export(tmp_path):
     data_dir = f"{Path(__file__).parent}/test_data/duckdb_data"
     cli.main(
diff --git a/tests/test_dynamic_manifest.py b/tests/test_dynamic_manifest.py
index e82251b..6b76a51 100644
--- a/tests/test_dynamic_manifest.py
+++ b/tests/test_dynamic_manifest.py
@@ -20,10 +20,6 @@
 ]
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "options,result",
     [
@@ -45,10 +41,6 @@ def test_manifest_with_dynamic_prefix(options, result):
         assert manifest.get_study_prefix() == result
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("sys.executable", new=None)
 def test_manifest_with_dynamic_prefix_and_no_executable():
     """sys.executable must be valid for us to run a Python script"""
@@ -56,10 +48,6 @@ def test_manifest_with_dynamic_prefix_and_no_executable():
         study_manifest.StudyManifest(pathlib.Path("tests/test_data/study_dynamic_prefix"))
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_cli_clean_with_dynamic_prefix(tmp_path):
     cli.main(cli_args=duckdb_args(["build", *STUDY_ARGS, "--option=prefix:dynamic2"], tmp_path))
     cli.main(cli_args=duckdb_args(["build", *STUDY_ARGS], tmp_path))
@@ -80,10 +68,6 @@ def test_cli_clean_with_dynamic_prefix(tmp_path):
     assert "dynamic2__meta_version" not in tables
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_cli_export_with_dynamic_prefix(tmp_path):
     cli.main(cli_args=duckdb_args(["build", *STUDY_ARGS, "--option=prefix:abc"], tmp_path))
     cli.main(cli_args=duckdb_args(["export", *STUDY_ARGS, "--option=prefix:abc"], tmp_path))
diff --git a/tests/test_static_file.py b/tests/test_static_file.py
index bdea564..ff89ec7 100644
--- a/tests/test_static_file.py
+++ b/tests/test_static_file.py
@@ -1,14 +1,7 @@
-import os
-from unittest import mock
-
 from cumulus_library import cli, databases
 from tests import conftest
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_static_file(tmp_path):
     cli.main(
         cli_args=conftest.duckdb_args(
diff --git a/tests/test_umls_api.py b/tests/test_umls_api.py
index 99c8a30..b2930b9 100644
--- a/tests/test_umls_api.py
+++ b/tests/test_umls_api.py
@@ -1,7 +1,6 @@
 import os
 import pathlib
 from contextlib import nullcontext as does_not_raise
-from unittest import mock
 
 import pytest
 import responses
@@ -18,10 +17,6 @@
 EXPANSION_VALUESET_OID = "2.16.840.1.113762.1.4.1106.68"
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "api_key,validator_key,res_text,res_status,raises",
     [
@@ -45,10 +40,6 @@ def test_auth(api_key, validator_key, res_text, res_status, raises):
             assert api.validator_key == api_key
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @responses.activate
 def test_auth_reads_env_var():
     responses.add(responses.GET, AUTH_URL, body="true", status=200)
@@ -63,10 +54,6 @@ def get_valueset_data(file_name):
         return f.read()
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @pytest.mark.parametrize(
     "action,url,oid,expected_oids,raises",
     [
@@ -159,10 +146,6 @@ def test_get_valueset(action, url, oid, expected_oids, raises):
             assert data[i]["id"] == expected_oids[i]
 
 
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @responses.activate
 def test_download_umls(tmp_path):
     # this zip file is just an archive made by targeting the other .json files
diff --git a/tests/valueset/test_additional_rules_builder.py b/tests/valueset/test_additional_rules_builder.py
index 755b909..a4fb309 100644
--- a/tests/valueset/test_additional_rules_builder.py
+++ b/tests/valueset/test_additional_rules_builder.py
@@ -1,5 +1,4 @@
 import json
-import os
 import pathlib
 import tomllib
 from unittest import mock
@@ -16,10 +15,6 @@
 
 
 @pytest.mark.parametrize("prefix", [(""), ("foo")])
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("cumulus_library.apis.umls.UmlsApi")
 def test_additional_rules(mock_api, mock_db_config_rxnorm, prefix):
     data_path = pathlib.Path(__file__).parent.parent / "test_data/valueset/"
diff --git a/tests/valueset/test_rxnorm_valueset_builder.py b/tests/valueset/test_rxnorm_valueset_builder.py
index 7494f37..1b2f7ed 100644
--- a/tests/valueset/test_rxnorm_valueset_builder.py
+++ b/tests/valueset/test_rxnorm_valueset_builder.py
@@ -1,5 +1,4 @@
 import json
-import os
 import pathlib
 import tomllib
 from unittest import mock
@@ -15,10 +14,6 @@
 
 
 @pytest.mark.parametrize("prefix", [(""), ("foo")])
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("cumulus_library.apis.umls.UmlsApi")
 def test_rxnorm_valueset_builder(mock_api, mock_db_config_rxnorm, prefix):
     data_path = pathlib.Path(__file__).parent.parent / "test_data/valueset/"
diff --git a/tests/valueset/test_umls.py b/tests/valueset/test_umls.py
index c4d1485..ea7c21e 100644
--- a/tests/valueset/test_umls.py
+++ b/tests/valueset/test_umls.py
@@ -1,6 +1,3 @@
-import os
-from unittest import mock
-
 import pytest
 
 from cumulus_library import StudyManifest
@@ -8,10 +5,6 @@
 
 
 @pytest.mark.parametrize("prefix", [(""), ("foo")])
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 def test_umls_lookup(mock_db_config_rxnorm, prefix, tmp_path):
     with open(f"{tmp_path}/manifest.toml", "w", encoding="utf8") as f:
         f.write('study_prefix="test"')
diff --git a/tests/valueset/test_valueset_builder.py b/tests/valueset/test_valueset_builder.py
index b5a8e76..41fd94a 100644
--- a/tests/valueset/test_valueset_builder.py
+++ b/tests/valueset/test_valueset_builder.py
@@ -1,5 +1,4 @@
 import json
-import os
 import pathlib
 from contextlib import nullcontext as does_not_raise
 from unittest import mock
@@ -19,10 +18,6 @@
         (data_path / "invalid.toml", pytest.raises(SystemExit)),
     ],
 )
-@mock.patch.dict(
-    os.environ,
-    clear=True,
-)
 @mock.patch("cumulus_library.apis.umls.UmlsApi")
 def test_valueset_builder(mock_api, mock_db_config_rxnorm, config_path, raises):
     with raises: