Skip to content

Commit

Permalink
Tuning for multiple columns part 2: Find candidate parameters for mul…
Browse files Browse the repository at this point in the history
…tiple aggregation (#524)
  • Loading branch information
dvadym authored Sep 10, 2024
1 parent 71875ea commit 916bd8e
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 173 deletions.
6 changes: 4 additions & 2 deletions analysis/data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ class MultiParameterConfiguration:
"""
max_partitions_contributed: Sequence[int] = None
max_contributions_per_partition: Sequence[int] = None
min_sum_per_partition: Sequence[float] = None
max_sum_per_partition: Sequence[float] = None
min_sum_per_partition: Union[Sequence[float],
Sequence[Sequence[float]]] = None
max_sum_per_partition: Union[Sequence[float],
Sequence[Sequence[float]]] = None
noise_kind: Sequence[pipeline_dp.NoiseKind] = None
partition_selection_strategy: Sequence[
pipeline_dp.PartitionSelectionStrategy] = None
Expand Down
42 changes: 24 additions & 18 deletions analysis/dp_strategy_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""Choosing DP Strategy (i.e. noise_kind, partition selection strategy etc)
based on contribution bounding params."""
from dataclasses import dataclass
from typing import Optional
from typing import List, Optional

import pipeline_dp
from pipeline_dp import aggregate_params
Expand All @@ -41,38 +41,45 @@ class DPStrategySelector:
"""

def __init__(self, epsilon: float, delta: float,
metric: Optional[pipeline_dp.Metric],
is_public_partitions: bool):
metrics: List[pipeline_dp.Metric], is_public_partitions: bool):
input_validators.validate_epsilon_delta(epsilon, delta,
"DPStrategySelector")
if delta == 0 and not is_public_partitions:
raise ValueError("DPStrategySelector: when private partition "
"selection is used, delta must be positive")
self._epsilon = epsilon
self._delta = delta
self._metric = metric
self._metrics = metrics
self._is_public_partitions = is_public_partitions

@property
def is_public_partitions(self) -> bool:
return self._is_public_partitions

@property
def metric(self) -> Optional[pipeline_dp.Metric]:
return self._metric
def metrics(self) -> List[pipeline_dp.Metric]:
return self._metrics

def get_dp_strategy(
self, sensitivities: dp_computations.Sensitivities) -> DPStrategy:
"""Chooses DPStrategy for given sensitivities."""
if self._metric is None:
if not self._metrics:
# This is Select partitions case.
return self._get_strategy_for_select_partition(sensitivities.l0)

n_metrics = len(self._metrics)
# Having n metrics is equivalent to multiplying of contributing for
# n times more partitions
scaled_sensitivities = dp_computations.Sensitivities(
l0=sensitivities.l0 * n_metrics, linf=sensitivities.linf)

if self._is_public_partitions:
return self._get_dp_strategy_for_public_partitions(sensitivities)
if self.use_post_aggregation_thresholding(self._metric):
return self._get_dp_strategy_for_public_partitions(
scaled_sensitivities)
if self.use_post_aggregation_thresholding(self._metrics):
return self._get_dp_strategy_with_post_aggregation_threshold(
sensitivities.l0)
return self._get_dp_strategy_private_partition(sensitivities)
scaled_sensitivities.l0)
return self._get_dp_strategy_private_partition(scaled_sensitivities)

def _get_strategy_for_select_partition(self,
l0_sensitivity: int) -> DPStrategy:
Expand All @@ -92,7 +99,7 @@ def _get_dp_strategy_for_public_partitions(

def _get_dp_strategy_with_post_aggregation_threshold(
self, l0_sensitivity: int) -> DPStrategy:
assert self._metric == pipeline_dp.Metrics.PRIVACY_ID_COUNT
assert pipeline_dp.Metrics.PRIVACY_ID_COUNT in self._metrics
# Half delta goes to the noise, the other half for partition selection.
# For more details see
# https://github.com/google/differential-privacy/blob/main/common_docs/Delta_For_Thresholding.pdf
Expand Down Expand Up @@ -144,9 +151,9 @@ def select_noise_kind(
return pipeline_dp.NoiseKind.GAUSSIAN
return pipeline_dp.NoiseKind.LAPLACE

def use_post_aggregation_thresholding(self,
metric: pipeline_dp.Metric) -> bool:
return metric == pipeline_dp.Metrics.PRIVACY_ID_COUNT
def use_post_aggregation_thresholding(
self, metrics: List[pipeline_dp.Metric]) -> bool:
return pipeline_dp.Metrics.PRIVACY_ID_COUNT in metrics

def select_partition_selection_strategy(
self, epsilon: float, delta: float,
Expand Down Expand Up @@ -191,6 +198,5 @@ def create_mechanism(strategy: pipeline_dp.PartitionSelectionStrategy):
class DPStrategySelectorFactory:

def create(self, epsilon: float, delta: float,
metric: Optional[pipeline_dp.Metric],
is_public_partitions: bool):
return DPStrategySelector(epsilon, delta, metric, is_public_partitions)
metrics: List[pipeline_dp.Metric], is_public_partitions: bool):
return DPStrategySelector(epsilon, delta, metrics, is_public_partitions)
Loading

0 comments on commit 916bd8e

Please sign in to comment.