Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ extend-ignore = E203,E501,E701

per-file-ignores =
# Ignore undefined names in templates.
*/code_generators/no-tests/*.py:F821,F401,E302
**/no-tests/*.py:F821,F401,E302
2 changes: 1 addition & 1 deletion .pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ filterwarnings =
ignore:Exception ignored in.*:pytest.PytestUnraisableExceptionWarning
ignore:unclosed <socket.*:ResourceWarning

addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_wizard/utils/code_generators/no-tests --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure
addopts = --doctest-glob '*.md' --doctest-modules --ignore-glob '**/no-tests' --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure

# If an xfail starts passing unexpectedly, that should count as a failure:
xfail_strict=true
5 changes: 0 additions & 5 deletions dp_wizard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
__version__ = (Path(__file__).parent / "VERSION").read_text().strip()


class AnalysisType:
HISTOGRAM = "Histogram"
MEAN = "Mean"


def main(): # pragma: no cover
import shiny
from dp_wizard.utils.argparse_helpers import get_cli_info
Expand Down
11 changes: 11 additions & 0 deletions dp_wizard/analyses/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def get_analysis_by_name(name): # pragma: no cover
# Avoid circular import:
from dp_wizard.analyses import histogram, mean

match name:
case histogram.name:
return histogram
case mean.name:
return mean
case _:
raise Exception("Unrecognized analysis")
24 changes: 24 additions & 0 deletions dp_wizard/analyses/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
label_width = "10em" # Just wide enough so the text isn't trucated.
col_widths = {
# Controls stay roughly a constant width;
# Graph expands to fill space.
"sm": [4, 8],
"md": [3, 9],
"lg": [2, 10],
}

default_weight = "2"
weight_choices = {
"1": "Less accurate",
default_weight: "Default",
"4": "More accurate",
}

bounds_tooltip_text = """
DP requires that we limit the sensitivity to the contributions
of any individual. To do this, we need an estimate of the lower
and upper bounds for each variable. We should not look at the
data when estimating the bounds! In this case, we could imagine
that "class year" would vary between 1 and 4, and we could limit
"grade" to values between 50 and 100.
"""
5 changes: 5 additions & 0 deletions dp_wizard/analyses/histogram/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .codegen import * # noqa: F401, F403


name = "Histogram"
has_bins = True
69 changes: 69 additions & 0 deletions dp_wizard/analyses/histogram/codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from dp_wizard.utils.code_generators._template import Template


def make_query(code_gen, identifier, accuracy_name, stats_name):
return (
Template("histogram_query", __file__)
.fill_values(
BIN_NAME=f"{identifier}_bin",
GROUP_NAMES=code_gen.groups,
)
.fill_expressions(
QUERY_NAME=f"{identifier}_query",
ACCURACY_NAME=accuracy_name,
STATS_NAME=stats_name,
)
.finish()
)


def make_output(code_gen, column_name, accuracy_name, stats_name):
return (
Template(f"histogram_{code_gen.root_template}_output", __file__)
.fill_values(
COLUMN_NAME=column_name,
GROUP_NAMES=code_gen.groups,
)
.fill_expressions(
ACCURACY_NAME=accuracy_name,
HISTOGRAM_NAME=stats_name,
CONFIDENCE_NOTE=code_gen._make_confidence_note(),
)
.finish()
)


def make_report_kv(name, confidence, identifier):
return (
Template("histogram_report_kv", __file__)
.fill_values(
NAME=name,
CONFIDENCE=confidence,
)
.fill_expressions(
IDENTIFIER_STATS=f"{identifier}_stats",
IDENTIFIER_ACCURACY=f"{identifier}_accuracy",
)
.finish()
)


def make_column_config_block(column_name, lower_bound, upper_bound, bin_count):
from dp_wizard.utils.code_generators import _snake_case

snake_name = _snake_case(column_name)
return (
Template("histogram_config", __file__)
.fill_expressions(
CUT_LIST_NAME=f"{snake_name}_cut_points",
CONFIG_NAME=f"{snake_name}_config",
)
.fill_values(
LOWER_BOUND=lower_bound,
UPPER_BOUND=upper_bound,
BIN_COUNT=bin_count,
COLUMN_NAME=column_name,
BIN_COLUMN_NAME=f"{snake_name}_bin",
)
.finish()
)
6 changes: 6 additions & 0 deletions dp_wizard/analyses/mean/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .codegen import * # noqa: F401, F403
from .shiny import * # noqa: F401, F403


name = "Mean"
has_bins = False
48 changes: 48 additions & 0 deletions dp_wizard/analyses/mean/codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from dp_wizard.utils.code_generators._template import Template


def make_query(code_gen, identifier, accuracy_name, stats_name):
return (
Template("mean_query", __file__)
.fill_values(
GROUP_NAMES=code_gen.groups,
)
.fill_expressions(
QUERY_NAME=f"{identifier}_query",
STATS_NAME=stats_name,
CONFIG_NAME=f"{identifier}_config",
)
.finish()
)


def make_output(code_gen, column_name, accuracy_name, stats_name):
return Template(f"mean_{code_gen.root_template}_output", __file__).finish()


def make_report_kv(name, confidence, identifier):
return (
Template("mean_report_kv", __file__)
.fill_values(
NAME=name,
)
.finish()
)


def make_column_config_block(column_name, lower_bound, upper_bound, bin_count):
from dp_wizard.utils.code_generators import _snake_case

snake_name = _snake_case(column_name)
return (
Template("mean_config", __file__)
.fill_expressions(
CONFIG_NAME=f"{snake_name}_config",
)
.fill_values(
COLUMN_NAME=column_name,
LOWER_BOUND=lower_bound,
UPPER_BOUND=upper_bound,
)
.finish()
)
80 changes: 80 additions & 0 deletions dp_wizard/analyses/mean/shiny.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from shiny import ui, render, module, reactive, Inputs, Outputs, Session
from dp_wizard.app.components.outputs import demo_tooltip, hide_if, output_code_sample
from dp_wizard.analyses.common import (
default_weight,
label_width,
col_widths,
weight_choices,
bounds_tooltip_text,
)


@module.ui
def mean_ui(): # pragma: no cover
return ui.output_ui("mean_inputs_preview_ui")


@module.server
def mean_server(
input: Inputs,
output: Outputs,
session: Session,
name: str,
lower_bounds: reactive.Value[dict[str, float]],
upper_bounds: reactive.Value[dict[str, float]],
is_single_column: bool,
is_demo: bool,
): # pragma: no cover
@render.ui
def mean_inputs_preview_ui():
return ui.layout_columns(
[
ui.input_numeric(
"lower",
["Lower", ui.output_ui("bounds_tooltip_ui")],
lower_bounds().get(name, 0),
width=label_width,
),
ui.input_numeric(
"upper",
"Upper",
upper_bounds().get(name, 10),
width=label_width,
),
ui.output_ui("optional_weight_ui"),
],
ui.output_ui("mean_preview_ui"),
col_widths=col_widths, # type: ignore
)

@render.ui
def mean_preview_ui():
return [
ui.p(
"""
Since the mean is just a single number,
there is not a preview visualization.
"""
),
output_code_sample("Column Definition", "column_code"),
]

@render.ui
def bounds_tooltip_ui():
return demo_tooltip(
is_demo,
bounds_tooltip_text,
)

@render.ui
def optional_weight_ui():
return hide_if(
is_single_column,
ui.input_select(
"weight",
["Weight", ui.output_ui("weight_tooltip_ui")],
choices=weight_choices,
selected=default_weight,
width=label_width,
),
)
52 changes: 15 additions & 37 deletions dp_wizard/app/components/column_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from shiny.types import SilentException
import polars as pl

from dp_wizard import AnalysisType
from dp_wizard.analyses import histogram, mean
from dp_wizard.utils.dp_helper import make_accuracy_histogram
from dp_wizard.utils.shared import plot_histogram
from dp_wizard.utils.code_generators import make_column_config_block
Expand All @@ -14,7 +14,7 @@
from dp_wizard.utils.mock_data import mock_data, ColumnDef


default_analysis_type = AnalysisType.HISTOGRAM
default_analysis_type = histogram.name
default_weight = "2"
label_width = "10em" # Just wide enough so the text isn't trucated.

Expand All @@ -26,7 +26,7 @@ def column_ui(): # pragma: no cover
ui.input_select(
"analysis_type",
None,
[AnalysisType.HISTOGRAM, AnalysisType.MEAN],
[histogram.name, mean.name],
width=label_width,
),
ui.output_ui("analysis_config_ui"),
Expand All @@ -51,6 +51,15 @@ def column_server(
is_demo: bool,
is_single_column: bool,
): # pragma: no cover
mean.shiny.mean_server(
name,
name=name,
lower_bounds=lower_bounds,
upper_bounds=upper_bounds,
is_single_column=is_single_column,
is_demo=is_demo,
)

@reactive.effect
def _set_hidden_inputs():
# TODO: Is isolate still needed?
Expand Down Expand Up @@ -130,7 +139,7 @@ def analysis_config_ui():
"lg": [2, 10],
}
match input.analysis_type():
case AnalysisType.HISTOGRAM:
case histogram.name:
return ui.layout_columns(
[
ui.input_numeric(
Expand All @@ -156,26 +165,8 @@ def analysis_config_ui():
ui.output_ui("histogram_preview_ui"),
col_widths=col_widths, # type: ignore
)
case AnalysisType.MEAN:
return ui.layout_columns(
[
ui.input_numeric(
"lower",
["Lower", ui.output_ui("bounds_tooltip_ui")],
lower_bounds().get(name, 0),
width=label_width,
),
ui.input_numeric(
"upper",
"Upper",
upper_bounds().get(name, 10),
width=label_width,
),
ui.output_ui("optional_weight_ui"),
],
ui.output_ui("mean_preview_ui"),
col_widths=col_widths, # type: ignore
)
case mean.name:
return mean.shiny.mean_ui(name)

@render.ui
def bounds_tooltip_ui():
Expand Down Expand Up @@ -259,19 +250,6 @@ def histogram_preview_ui():
),
]

@render.ui
def mean_preview_ui():
# accuracy, histogram = accuracy_histogram()
return [
ui.p(
"""
Since the mean is just a single number,
there is not a preview visualization.
"""
),
output_code_sample("Column Definition", "column_code"),
]

@render.data_frame
def data_frame():
accuracy, histogram = accuracy_histogram()
Expand Down
Loading
Loading