opendp · mccalluc · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/.flake8 b/.flake8
@@ -9,4 +9,4 @@ extend-ignore = E203,E501,E701
 
 per-file-ignores =
     # Ignore undefined names in templates.
-    */code_generators/no-tests/*.py:F821,F401,E302
+    **/no-tests/*.py:F821,F401,E302
diff --git a/.pytest.ini b/.pytest.ini
@@ -11,7 +11,7 @@ filterwarnings =
 	ignore:Exception ignored in.*:pytest.PytestUnraisableExceptionWarning
 	ignore:unclosed <socket.*:ResourceWarning
 
-addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_wizard/utils/code_generators/no-tests --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure
+addopts = --doctest-glob '*.md' --doctest-modules --ignore-glob '**/no-tests' --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure
 
 # If an xfail starts passing unexpectedly, that should count as a failure:
 xfail_strict=true
diff --git a/dp_wizard/__init__.py b/dp_wizard/__init__.py
@@ -6,11 +6,6 @@
 __version__ = (Path(__file__).parent / "VERSION").read_text().strip()
 
 
-class AnalysisType:
-    HISTOGRAM = "Histogram"
-    MEAN = "Mean"
-
-
 def main():  # pragma: no cover
     import shiny
     from dp_wizard.utils.argparse_helpers import get_cli_info

diff --git a/dp_wizard/analyses/__init__.py b/dp_wizard/analyses/__init__.py
@@ -0,0 +1,11 @@
+def get_analysis_by_name(name):  # pragma: no cover
+    # Avoid circular import:
+    from dp_wizard.analyses import histogram, mean
+
+    match name:
+        case histogram.name:
+            return histogram
+        case mean.name:
+            return mean
+        case _:
+            raise Exception("Unrecognized analysis")
diff --git a/dp_wizard/analyses/common.py b/dp_wizard/analyses/common.py
@@ -0,0 +1,24 @@
+label_width = "10em"  # Just wide enough so the text isn't trucated.
+col_widths = {
+    # Controls stay roughly a constant width;
+    # Graph expands to fill space.
+    "sm": [4, 8],
+    "md": [3, 9],
+    "lg": [2, 10],
+}
+
+default_weight = "2"
+weight_choices = {
+    "1": "Less accurate",
+    default_weight: "Default",
+    "4": "More accurate",
+}
+
+bounds_tooltip_text = """
+DP requires that we limit the sensitivity to the contributions
+of any individual. To do this, we need an estimate of the lower
+and upper bounds for each variable. We should not look at the
+data when estimating the bounds! In this case, we could imagine
+that "class year" would vary between 1 and 4, and we could limit
+"grade" to values between 50 and 100.
+"""
diff --git a/dp_wizard/analyses/histogram/__init__.py b/dp_wizard/analyses/histogram/__init__.py
@@ -0,0 +1,5 @@
+from .codegen import *  # noqa: F401, F403
+
+
+name = "Histogram"
+has_bins = True
diff --git a/dp_wizard/analyses/histogram/codegen.py b/dp_wizard/analyses/histogram/codegen.py
@@ -0,0 +1,69 @@
+from dp_wizard.utils.code_generators._template import Template
+
+
+def make_query(code_gen, identifier, accuracy_name, stats_name):
+    return (
+        Template("histogram_query", __file__)
+        .fill_values(
+            BIN_NAME=f"{identifier}_bin",
+            GROUP_NAMES=code_gen.groups,
+        )
+        .fill_expressions(
+            QUERY_NAME=f"{identifier}_query",
+            ACCURACY_NAME=accuracy_name,
+            STATS_NAME=stats_name,
+        )
+        .finish()
+    )
+
+
+def make_output(code_gen, column_name, accuracy_name, stats_name):
+    return (
+        Template(f"histogram_{code_gen.root_template}_output", __file__)
+        .fill_values(
+            COLUMN_NAME=column_name,
+            GROUP_NAMES=code_gen.groups,
+        )
+        .fill_expressions(
+            ACCURACY_NAME=accuracy_name,
+            HISTOGRAM_NAME=stats_name,
+            CONFIDENCE_NOTE=code_gen._make_confidence_note(),
+        )
+        .finish()
+    )
+
+
+def make_report_kv(name, confidence, identifier):
+    return (
+        Template("histogram_report_kv", __file__)
+        .fill_values(
+            NAME=name,
+            CONFIDENCE=confidence,
+        )
+        .fill_expressions(
+            IDENTIFIER_STATS=f"{identifier}_stats",
+            IDENTIFIER_ACCURACY=f"{identifier}_accuracy",
+        )
+        .finish()
+    )
+
+
+def make_column_config_block(column_name, lower_bound, upper_bound, bin_count):
+    from dp_wizard.utils.code_generators import _snake_case
+
+    snake_name = _snake_case(column_name)
+    return (
+        Template("histogram_config", __file__)
+        .fill_expressions(
+            CUT_LIST_NAME=f"{snake_name}_cut_points",
+            CONFIG_NAME=f"{snake_name}_config",
+        )
+        .fill_values(
+            LOWER_BOUND=lower_bound,
+            UPPER_BOUND=upper_bound,
+            BIN_COUNT=bin_count,
+            COLUMN_NAME=column_name,
+            BIN_COLUMN_NAME=f"{snake_name}_bin",
+        )
+        .finish()
+    )
diff --git a/..._generators/no-tests/_histogram_config.py → ...s/histogram/no-tests/_histogram_config.py b/..._generators/no-tests/_histogram_config.py → ...s/histogram/no-tests/_histogram_config.py
diff --git a/...rs/no-tests/_histogram_notebook_output.py → ...am/no-tests/_histogram_notebook_output.py b/...rs/no-tests/_histogram_notebook_output.py → ...am/no-tests/_histogram_notebook_output.py
diff --git a/...e_generators/no-tests/_histogram_query.py → ...es/histogram/no-tests/_histogram_query.py b/...e_generators/no-tests/_histogram_query.py → ...es/histogram/no-tests/_histogram_query.py
diff --git a/...nerators/no-tests/_histogram_report_kv.py → ...istogram/no-tests/_histogram_report_kv.py b/...nerators/no-tests/_histogram_report_kv.py → ...istogram/no-tests/_histogram_report_kv.py
diff --git a/...tors/no-tests/_histogram_script_output.py → ...gram/no-tests/_histogram_script_output.py b/...tors/no-tests/_histogram_script_output.py → ...gram/no-tests/_histogram_script_output.py
diff --git a/dp_wizard/analyses/mean/__init__.py b/dp_wizard/analyses/mean/__init__.py
@@ -0,0 +1,6 @@
+from .codegen import *  # noqa: F401, F403
+from .shiny import *  # noqa: F401, F403
+
+
+name = "Mean"
+has_bins = False
diff --git a/dp_wizard/analyses/mean/codegen.py b/dp_wizard/analyses/mean/codegen.py
@@ -0,0 +1,48 @@
+from dp_wizard.utils.code_generators._template import Template
+
+
+def make_query(code_gen, identifier, accuracy_name, stats_name):
+    return (
+        Template("mean_query", __file__)
+        .fill_values(
+            GROUP_NAMES=code_gen.groups,
+        )
+        .fill_expressions(
+            QUERY_NAME=f"{identifier}_query",
+            STATS_NAME=stats_name,
+            CONFIG_NAME=f"{identifier}_config",
+        )
+        .finish()
+    )
+
+
+def make_output(code_gen, column_name, accuracy_name, stats_name):
+    return Template(f"mean_{code_gen.root_template}_output", __file__).finish()
+
+
+def make_report_kv(name, confidence, identifier):
+    return (
+        Template("mean_report_kv", __file__)
+        .fill_values(
+            NAME=name,
+        )
+        .finish()
+    )
+
+
+def make_column_config_block(column_name, lower_bound, upper_bound, bin_count):
+    from dp_wizard.utils.code_generators import _snake_case
+
+    snake_name = _snake_case(column_name)
+    return (
+        Template("mean_config", __file__)
+        .fill_expressions(
+            CONFIG_NAME=f"{snake_name}_config",
+        )
+        .fill_values(
+            COLUMN_NAME=column_name,
+            LOWER_BOUND=lower_bound,
+            UPPER_BOUND=upper_bound,
+        )
+        .finish()
+    )
diff --git a/.../code_generators/no-tests/_mean_config.py → ...rd/analyses/mean/no-tests/_mean_config.py b/.../code_generators/no-tests/_mean_config.py → ...rd/analyses/mean/no-tests/_mean_config.py
diff --git a/...erators/no-tests/_mean_notebook_output.py → ...es/mean/no-tests/_mean_notebook_output.py b/...erators/no-tests/_mean_notebook_output.py → ...es/mean/no-tests/_mean_notebook_output.py
diff --git a/...s/code_generators/no-tests/_mean_query.py → ...ard/analyses/mean/no-tests/_mean_query.py b/...s/code_generators/no-tests/_mean_query.py → ...ard/analyses/mean/no-tests/_mean_query.py
diff --git a/...de_generators/no-tests/_mean_report_kv.py → ...analyses/mean/no-tests/_mean_report_kv.py b/...de_generators/no-tests/_mean_report_kv.py → ...analyses/mean/no-tests/_mean_report_kv.py
diff --git a/...enerators/no-tests/_mean_script_output.py → ...yses/mean/no-tests/_mean_script_output.py b/...enerators/no-tests/_mean_script_output.py → ...yses/mean/no-tests/_mean_script_output.py
diff --git a/dp_wizard/analyses/mean/shiny.py b/dp_wizard/analyses/mean/shiny.py
@@ -0,0 +1,80 @@
+from shiny import ui, render, module, reactive, Inputs, Outputs, Session
+from dp_wizard.app.components.outputs import demo_tooltip, hide_if, output_code_sample
+from dp_wizard.analyses.common import (
+    default_weight,
+    label_width,
+    col_widths,
+    weight_choices,
+    bounds_tooltip_text,
+)
+
+
+@module.ui
+def mean_ui():  # pragma: no cover
+    return ui.output_ui("mean_inputs_preview_ui")
+
+
+@module.server
+def mean_server(
+    input: Inputs,
+    output: Outputs,
+    session: Session,
+    name: str,
+    lower_bounds: reactive.Value[dict[str, float]],
+    upper_bounds: reactive.Value[dict[str, float]],
+    is_single_column: bool,
+    is_demo: bool,
+):  # pragma: no cover
+    @render.ui
+    def mean_inputs_preview_ui():
+        return ui.layout_columns(
+            [
+                ui.input_numeric(
+                    "lower",
+                    ["Lower", ui.output_ui("bounds_tooltip_ui")],
+                    lower_bounds().get(name, 0),
+                    width=label_width,
+                ),
+                ui.input_numeric(
+                    "upper",
+                    "Upper",
+                    upper_bounds().get(name, 10),
+                    width=label_width,
+                ),
+                ui.output_ui("optional_weight_ui"),
+            ],
+            ui.output_ui("mean_preview_ui"),
+            col_widths=col_widths,  # type: ignore
+        )
+
+    @render.ui
+    def mean_preview_ui():
+        return [
+            ui.p(
+                """
+                Since the mean is just a single number,
+                there is not a preview visualization.
+                """
+            ),
+            output_code_sample("Column Definition", "column_code"),
+        ]
+
+    @render.ui
+    def bounds_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            bounds_tooltip_text,
+        )
+
+    @render.ui
+    def optional_weight_ui():
+        return hide_if(
+            is_single_column,
+            ui.input_select(
+                "weight",
+                ["Weight", ui.output_ui("weight_tooltip_ui")],
+                choices=weight_choices,
+                selected=default_weight,
+                width=label_width,
+            ),
+        )
diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
@@ -5,7 +5,7 @@
 from shiny.types import SilentException
 import polars as pl
 
-from dp_wizard import AnalysisType
+from dp_wizard.analyses import histogram, mean
 from dp_wizard.utils.dp_helper import make_accuracy_histogram
 from dp_wizard.utils.shared import plot_histogram
 from dp_wizard.utils.code_generators import make_column_config_block
@@ -14,7 +14,7 @@
 from dp_wizard.utils.mock_data import mock_data, ColumnDef
 
 
-default_analysis_type = AnalysisType.HISTOGRAM
+default_analysis_type = histogram.name
 default_weight = "2"
 label_width = "10em"  # Just wide enough so the text isn't trucated.
 
@@ -26,7 +26,7 @@ def column_ui():  # pragma: no cover
         ui.input_select(
             "analysis_type",
             None,
-            [AnalysisType.HISTOGRAM, AnalysisType.MEAN],
+            [histogram.name, mean.name],
             width=label_width,
         ),
         ui.output_ui("analysis_config_ui"),
@@ -51,6 +51,15 @@ def column_server(
     is_demo: bool,
     is_single_column: bool,
 ):  # pragma: no cover
+    mean.shiny.mean_server(
+        name,
+        name=name,
+        lower_bounds=lower_bounds,
+        upper_bounds=upper_bounds,
+        is_single_column=is_single_column,
+        is_demo=is_demo,
+    )
+
     @reactive.effect
     def _set_hidden_inputs():
         # TODO: Is isolate still needed?
@@ -130,7 +139,7 @@ def analysis_config_ui():
             "lg": [2, 10],
         }
         match input.analysis_type():
-            case AnalysisType.HISTOGRAM:
+            case histogram.name:
                 return ui.layout_columns(
                     [
                         ui.input_numeric(
@@ -156,26 +165,8 @@ def analysis_config_ui():
                     ui.output_ui("histogram_preview_ui"),
                     col_widths=col_widths,  # type: ignore
                 )
-            case AnalysisType.MEAN:
-                return ui.layout_columns(
-                    [
-                        ui.input_numeric(
-                            "lower",
-                            ["Lower", ui.output_ui("bounds_tooltip_ui")],
-                            lower_bounds().get(name, 0),
-                            width=label_width,
-                        ),
-                        ui.input_numeric(
-                            "upper",
-                            "Upper",
-                            upper_bounds().get(name, 10),
-                            width=label_width,
-                        ),
-                        ui.output_ui("optional_weight_ui"),
-                    ],
-                    ui.output_ui("mean_preview_ui"),
-                    col_widths=col_widths,  # type: ignore
-                )
+            case mean.name:
+                return mean.shiny.mean_ui(name)
 
     @render.ui
     def bounds_tooltip_ui():
@@ -259,19 +250,6 @@ def histogram_preview_ui():
             ),
         ]
 
-    @render.ui
-    def mean_preview_ui():
-        # accuracy, histogram = accuracy_histogram()
-        return [
-            ui.p(
-                """
-                Since the mean is just a single number,
-                there is not a preview visualization.
-                """
-            ),
-            output_code_sample("Column Definition", "column_code"),
-        ]
-
     @render.data_frame
     def data_frame():
         accuracy, histogram = accuracy_histogram()