Skip to content

Commit

Permalink
CompoundCombiner test
Browse files Browse the repository at this point in the history
  • Loading branch information
dvadym committed Sep 11, 2024
1 parent 6f92164 commit 1675b5f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
10 changes: 6 additions & 4 deletions analysis/per_partition_combiners.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import abc
import copy
from dataclasses import dataclass
from typing import Any, Iterable, List, Optional, Tuple, Union
from typing import Any, Iterable, List, Optional, Sequence, Tuple, Union
import numpy as np
import math

Expand All @@ -31,7 +31,7 @@

# It corresponds to the aggregating per (privacy_id, partition_key).
# (count, sum, num_partition_privacy_id_contributes).
PreaggregatedData = Tuple[int, Union[float, tuple[float]], int]
PreaggregatedData = Tuple[int, Union[float, Sequence[float]], int]


class UtilityAnalysisCombiner(pipeline_dp.Combiner):
Expand Down Expand Up @@ -239,7 +239,7 @@ def __init__(self,
metric: pipeline_dp.Metrics = pipeline_dp.Metrics.SUM,
i_column: Optional[int] = None):
self._spec = spec
self._params = copy.deepcopy(params)
self._params = params
self._metric = metric
self._i_column = i_column

Expand Down Expand Up @@ -383,7 +383,9 @@ class CompoundCombiner(pipeline_dp.combiners.CompoundCombiner):
# improvements, on converting from sparse to dense mode, the data are
# converted to NumPy arrays. And internal combiners perform NumPy vector
# aggregations.
SparseAccumulatorType = Tuple[List[int], List[float], List[int]]
SparseAccumulatorType = Tuple[List[int], Union[List[float],
List[Sequence[float]]],
List[int]]
DenseAccumulatorType = List[Any]
AccumulatorType = Tuple[Optional[SparseAccumulatorType],
Optional[DenseAccumulatorType]]
Expand Down
36 changes: 36 additions & 0 deletions analysis/tests/per_partition_combiners_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""UtilityAnalysisCountCombinerTest."""
import copy
import dataclasses

import numpy as np
Expand Down Expand Up @@ -488,7 +489,25 @@ def _create_combiner(self) -> combiners.CompoundCombiner:
return combiners.CompoundCombiner([count_combiner],
n_sum_aggregations=0)

def _create_combiner_2_columns(self) -> combiners.CompoundCombiner:
mechanism_spec, params1 = _create_combiner_params_for_sum(0, 1)
sum_combiner1 = combiners.SumCombiner(mechanism_spec,
params1,
i_column=0)
params2 = copy.deepcopy(params1)
params2.max_sum_per_partition = 5
sum_combiner2 = combiners.SumCombiner(mechanism_spec,
params2,
i_column=1)
return combiners.CompoundCombiner([sum_combiner1, sum_combiner2],
n_sum_aggregations=2)

def test_create_accumulator_empty_data(self):
sparse, dense = self._create_combiner_2_columns().create_accumulator(())
self.assertEqual(sparse, ([0], [(0, 0)], [0]))
self.assertIsNone(dense)

def test_create_accumulator_empty_data_multi_columns(self):
sparse, dense = self._create_combiner().create_accumulator(())
self.assertEqual(sparse, ([0], [0], [0]))
self.assertIsNone(dense)
Expand All @@ -502,6 +521,13 @@ def test_create_accumulator(self):
self.assertEqual(([len(data)], [sum(data)], [n_partitions]), sparse)
self.assertIsNone(dense)

def test_create_accumulator_2_sum_columns(self):
combiner = self._create_combiner_2_columns()
pre_aggregate_data = [1, [2, 3], 4] # count, sum, n_partitions
sparse, dense = combiner.create_accumulator(pre_aggregate_data)
self.assertEqual(([1], [[2, 3]], [4]), sparse)
self.assertIsNone(dense)

def test_to_dense(self):
combiner = self._create_combiner()
sparse_acc = ([1, 3], [10, 20], [100, 200])
Expand All @@ -510,6 +536,16 @@ def test_to_dense(self):
self.assertEqual(2, num_privacy_ids)
self.assertSequenceEqual((4, 0, -1.0, -2.98, 0.0298), count_acc)

def test_to_dense_2_columns(self):
combiner = self._create_combiner_2_columns()
sparse_acc = ([1, 3], [(10, 20), (100, 200)], [100, 200])
dense = combiner._to_dense(sparse_acc)
num_privacy_ids, (sum1_acc, sum2_acc) = dense
self.assertEqual(2, num_privacy_ids)
self.assertSequenceEqual(
(110, 0, -108, -1.9849999999999999, 0.014875000000000001), sum1_acc)
self.assertSequenceEqual((220, 0, -210, -9.925, 0.371875), sum2_acc)

def test_merge_sparse(self):
combiner = self._create_combiner()
sparse_acc1 = ([1], [10], [100])
Expand Down

0 comments on commit 1675b5f

Please sign in to comment.