Skip to content

Commit 68bdbb2

Browse files
Differential Privacy Teamdibakch
authored andcommitted
Privacy-on-Beam: support for quantiles, C++ accounting: fast delta
C++ DP Lib: * Migrated to absl::Status Privacy-on-Beam: * Implement QuantilesPerKey along with pbeamtest support * Updated dependencies C++ accounting library: * Fast computation of delta without convolution GitOrigin-RevId: 9faba9cd2873465e0f52bc2f9304dd9fa27e6998 Change-Id: I908cb6281e0316be82ae7264a6a79dc3ac126f03
1 parent 2d07a0c commit 68bdbb2

26 files changed

+2195
-173
lines changed

cc/accounting/privacy_loss_distribution.cc

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "absl/strings/str_format.h"
2222
#include "accounting/common/common.h"
2323
#include "accounting/convolution.h"
24+
#include "base/status_macros.h"
2425

2526
namespace differential_privacy {
2627
namespace accounting {
@@ -206,8 +207,8 @@ PrivacyLossDistribution::CreateForPrivacyParameters(
206207
discretization_interval, /*infinity_mass=*/delta, rounded_pmf));
207208
}
208209

209-
absl::Status PrivacyLossDistribution::Compose(
210-
const PrivacyLossDistribution& other_pld, double tail_mass_truncation) {
210+
absl::Status PrivacyLossDistribution::ValidateComposition(
211+
const PrivacyLossDistribution& other_pld) const {
211212
if (other_pld.DiscretizationInterval() != discretization_interval_) {
212213
return absl::InvalidArgumentError(absl::StrFormat(
213214
"Cannot compose, discretization intervals are different "
@@ -220,6 +221,13 @@ absl::Status PrivacyLossDistribution::Compose(
220221
"Cannot compose, estimate types are different");
221222
}
222223

224+
return absl::OkStatus();
225+
}
226+
227+
absl::Status PrivacyLossDistribution::Compose(
228+
const PrivacyLossDistribution& other_pld, double tail_mass_truncation) {
229+
RETURN_IF_ERROR(ValidateComposition(other_pld));
230+
223231
double new_infinity_mass = infinity_mass_ + other_pld.InfinityMass() -
224232
infinity_mass_ * other_pld.InfinityMass();
225233

@@ -237,6 +245,51 @@ absl::Status PrivacyLossDistribution::Compose(
237245
return absl::OkStatus();
238246
}
239247

248+
base::StatusOr<double>
249+
PrivacyLossDistribution::GetDeltaForEpsilonForComposedPLD(
250+
const PrivacyLossDistribution& other_pld, double epsilon) const {
251+
RETURN_IF_ERROR(ValidateComposition(other_pld));
252+
253+
UnpackedProbabilityMassFunction this_pmf =
254+
UnpackProbabilityMassFunction(probability_mass_function_);
255+
UnpackedProbabilityMassFunction other_pmf =
256+
UnpackProbabilityMassFunction(other_pld.probability_mass_function_);
257+
258+
// Compute the hockey stick divergence using equation (2) in the
259+
// supplementary material. other_cumulative_upper_mass below represents the
260+
// summation in equation (3) and other_cumulative_lower_mass represents the
261+
// summation in equation (4).
262+
263+
double other_cumulative_upper_mass = 0;
264+
double other_cumulative_lower_mass = 0;
265+
int current_idx = other_pmf.items.size() - 1;
266+
double delta = 0;
267+
268+
for (int this_idx = 0; this_idx < this_pmf.items.size(); ++this_idx) {
269+
double this_privacy_loss =
270+
discretization_interval_ * (this_idx + this_pmf.min_key);
271+
double this_probability_mass = this_pmf.items[this_idx];
272+
while (current_idx >= 0) {
273+
double other_privacy_loss = other_pld.discretization_interval_ *
274+
(current_idx + other_pmf.min_key);
275+
if (other_privacy_loss + this_privacy_loss <= epsilon) break;
276+
other_cumulative_upper_mass += other_pmf.items[current_idx];
277+
other_cumulative_lower_mass +=
278+
other_pmf.items[current_idx] / std::exp(other_privacy_loss);
279+
--current_idx;
280+
}
281+
delta += this_probability_mass * (other_cumulative_upper_mass -
282+
std::exp(epsilon - this_privacy_loss) *
283+
other_cumulative_lower_mass);
284+
}
285+
286+
// The probability that the composed privacy loss is infinite.
287+
double composed_infinity_mass = infinity_mass_ + other_pld.InfinityMass() -
288+
infinity_mass_ * other_pld.InfinityMass();
289+
290+
return delta + composed_infinity_mass;
291+
}
292+
240293
void PrivacyLossDistribution::Compose(int num_times) {
241294
double new_infinity_mass = 1 - pow((1 - infinity_mass_), num_times);
242295

cc/accounting/privacy_loss_distribution.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,27 @@ class PrivacyLossDistribution {
146146
// Observation 1 in the supplementary material.)
147147
double GetEpsilonForDelta(double delta) const;
148148

149-
// Composes other PLD into itself. The discretization intervals should be
150-
// the same otherwise failure status is returned. Additional parameter:
149+
// Validates that a given PLD can be composed with this PLD. The
150+
// discretization intervals and the estimate types should be the same;
151+
// otherwise failure status is returned.
152+
absl::Status ValidateComposition(
153+
const PrivacyLossDistribution& other_pld) const;
154+
155+
// Composes other PLD into itself. Additional parameter:
151156
// tail_mass_truncation: an upper bound on the tails of the probability
152157
// mass of the PLD that might be truncated.
153158
absl::Status Compose(const PrivacyLossDistribution& other_pld,
154159
double tail_mass_truncation = 1e-15);
155160

161+
// Computes delta for given epsilon for the result of composing this PLD and a
162+
// given PLD. Note that this function does not modify the current PLD.
163+
//
164+
// The output of this function should be the same as first composing this PLD
165+
// and other_pld, and then call GetEpsilonForDelta on the resulting
166+
// PLD. The main advantage is that this function is faster.
167+
base::StatusOr<double> GetDeltaForEpsilonForComposedPLD(
168+
const PrivacyLossDistribution& other_pld, double epsilon) const;
169+
156170
// Composes PLD into itself num_times.
157171
void Compose(int num_times);
158172

cc/accounting/privacy_loss_distribution_test.cc

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,25 @@ TEST(PrivacyLossDistributionTest, Compose) {
160160
EXPECT_FALSE(pld->Pmf().empty());
161161
}
162162

163+
TEST(PrivacyLossDistributionTest, GetDeltaForEpsilonForComposedPLD) {
164+
ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}};
165+
std::unique_ptr<PrivacyLossDistribution> pld =
166+
PrivacyLossDistributionTestPeer::Create(pmf,
167+
/*infinity_mass=*/0.1,
168+
/*discretization_interval=*/0.4);
169+
170+
ProbabilityMassFunction pmf_other = {{1, 0.1}, {2, 0.6}, {3, 0.25}};
171+
std::unique_ptr<PrivacyLossDistribution> pld_other =
172+
PrivacyLossDistributionTestPeer::Create(pmf_other,
173+
/*infinity_mass=*/0.05,
174+
/*discretization_interval=*/0.4);
175+
176+
base::StatusOr<double> delta =
177+
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1.1);
178+
ASSERT_OK(delta);
179+
EXPECT_THAT(*delta, DoubleNear(0.2956, kMaxError));
180+
}
181+
163182
TEST(PrivacyLossDistributionTest, ComposeTruncation) {
164183
ProbabilityMassFunction pmf = {{0, 0.1}, {1, 0.7}, {2, 0.1}};
165184
std::unique_ptr<PrivacyLossDistribution> pld =
@@ -209,10 +228,16 @@ TEST(PrivacyLossDistributionTest,
209228
std::unique_ptr<PrivacyLossDistribution> pld_other =
210229
PrivacyLossDistributionTestPeer::Create(pmf, 0.3, 2e-4);
211230

212-
EXPECT_THAT(pld->Compose(*pld_other),
213-
StatusIs(absl::InvalidArgumentError("").code(),
214-
HasSubstr("Cannot compose, discretization intervals "
215-
"are different - 0.000200 vs 0.000100")));
231+
std::string error_msg = "discretization interval";
232+
EXPECT_THAT(
233+
pld->ValidateComposition(*pld_other),
234+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
235+
EXPECT_THAT(
236+
pld->Compose(*pld_other),
237+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
238+
EXPECT_THAT(
239+
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1),
240+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
216241
}
217242

218243
TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) {
@@ -227,9 +252,16 @@ TEST(PrivacyLossDistributionTest, ComposeErrorDifferentEstimateTypes) {
227252
pmf, /*infinity_mass=*/0.3, /*discretization_interval=*/1e-4,
228253
/*estimate_type=*/EstimateType::kOptimistic);
229254

230-
EXPECT_THAT(pld->Compose(*pld_other),
231-
StatusIs(absl::StatusCode::kInvalidArgument,
232-
Eq("Cannot compose, estimate types are different")));
255+
std::string error_msg = "estimate type";
256+
EXPECT_THAT(
257+
pld->ValidateComposition(*pld_other),
258+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
259+
EXPECT_THAT(
260+
pld->Compose(*pld_other),
261+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
262+
EXPECT_THAT(
263+
pld->GetDeltaForEpsilonForComposedPLD(*pld_other, /*epsilon=*/1),
264+
StatusIs(absl::StatusCode::kInvalidArgument, HasSubstr(error_msg)));
233265
}
234266

235267
struct GetEpsilonFromDeltaParam {

cc/algorithms/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ cc_library(
486486
"//base:logging",
487487
"@boringssl//:crypto",
488488
"@com_google_absl//absl/base:core_headers",
489+
"@com_google_absl//absl/numeric:bits",
489490
"@com_google_absl//absl/synchronization",
490491
],
491492
)

cc/algorithms/rand.cc

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,12 @@
2323
#include <limits>
2424

2525
#include "base/logging.h"
26+
#include "absl/numeric/bits.h"
2627
#include "absl/synchronization/mutex.h"
2728
#include "openssl/rand.h"
2829

2930
namespace differential_privacy {
3031
namespace {
31-
// From absl/base/internal/bits.h.
32-
int CountLeadingZeros64Slow(uint64_t n) {
33-
int zeroes = 60;
34-
if (n >> 32) zeroes -= 32, n >>= 32;
35-
if (n >> 16) zeroes -= 16, n >>= 16;
36-
if (n >> 8) zeroes -= 8, n >>= 8;
37-
if (n >> 4) zeroes -= 4, n >>= 4;
38-
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
39-
}
40-
4132
// We usually expect DBL_MANT_DIG to be 53.
4233
static_assert(DBL_MANT_DIG < 64,
4334
"Double mantissa must have less than 64 bits.");
@@ -59,7 +50,7 @@ double UniformDouble() {
5950
uint64_t j = uint_64_number >> kMantDigits;
6051

6152
// exponent is the number of leading zeros in the first 11 bits plus one.
62-
uint64_t exponent = CountLeadingZeros64Slow(j) - kMantDigits + 1;
53+
uint64_t exponent = absl::countl_zero(j) - kMantDigits + 1;
6354

6455
// Extra geometric sampling is needed only when the leading 11 bits are all 0.
6556
if (j == 0) {
@@ -84,7 +75,7 @@ uint64_t Geometric() {
8475
uint64_t r = 0;
8576
while (r == 0 && result < 1023) {
8677
r = SecureURBG::GetSingleton()();
87-
result += CountLeadingZeros64Slow(r);
78+
result += absl::countl_zero(r);
8879
}
8980
return result;
9081
}

cc/cc_differential_privacy_deps.bzl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ def cc_differential_privacy_deps():
2727
# Abseil
2828
http_archive(
2929
name = "com_google_absl",
30-
url = "https://github.com/abseil/abseil-cpp/archive/20200923.3.tar.gz",
31-
sha256 = "ebe2ad1480d27383e4bf4211e2ca2ef312d5e6a09eba869fd2e8a5c5d553ded2",
32-
strip_prefix = "abseil-cpp-20200923.3",
30+
url = "https://github.com/abseil/abseil-cpp/archive/20210324.0.tar.gz",
31+
sha256 = "dd7db6815204c2a62a2160e32c55e97113b0a0178b2f090d6bab5ce36111db4b",
32+
strip_prefix = "abseil-cpp-20210324.0",
3333
)
3434

3535
# Common bazel rules

cc/docs/algorithms/algorithm.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ an error.
110110

111111
```
112112
Summary Serialize();
113-
util::Status Merge(const Summary& summary);
113+
absl::Status Merge(const Summary& summary);
114114
```
115115

116116
Serialization and merging can allow these algorithms to be used in a distributed

cc/postgres/postgres.BUILD

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
licenses(["notice"]) # Apache v2.0
1717

18-
load("@rules_foreign_cc//tools/build_defs:configure.bzl", "configure_make")
18+
load("@rules_foreign_cc//foreign_cc:configure.bzl", "configure_make")
1919

2020
package(
2121
default_visibility = ["//visibility:public"],
@@ -50,7 +50,7 @@ configure_make(
5050
"CFLAGS": "-fPIC",
5151
},
5252
}),
53-
headers_only = True,
53+
out_headers_only = True,
5454
lib_source = "@postgres//:all",
5555
)
5656

cc/testing/stochastic_tester_test.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ class NonDpSum : public Algorithm<T> {
5151
void ResetState() override { result_ = 0; }
5252

5353
Summary Serialize() const override { return Summary(); }
54-
base::Status Merge(const Summary& summary) override {
55-
return base::OkStatus();
54+
absl::Status Merge(const Summary& summary) override {
55+
return absl::OkStatus();
5656
}
5757
int64_t MemoryUsed() override { return sizeof(NonDpSum<T>); };
5858

@@ -74,8 +74,8 @@ class NonDpCount : public Algorithm<T> {
7474
void ResetState() override { result_ = 0; }
7575

7676
Summary Serialize() const override { return Summary(); }
77-
base::Status Merge(const Summary& summary) override {
78-
return base::OkStatus();
77+
absl::Status Merge(const Summary& summary) override {
78+
return absl::OkStatus();
7979
}
8080
int64_t MemoryUsed() override { return sizeof(NonDpCount<T>); };
8181

@@ -166,8 +166,8 @@ class AlwaysError : public Algorithm<T> {
166166
void ResetState() override {}
167167

168168
Summary Serialize() const override { return Summary(); }
169-
base::Status Merge(const Summary& summary) override {
170-
return base::OkStatus();
169+
absl::Status Merge(const Summary& summary) override {
170+
return absl::OkStatus();
171171
}
172172
int64_t MemoryUsed() override { return sizeof(AlwaysError<T>); };
173173

go/dpagg/quantiles.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,11 @@ import (
2525
"github.com/google/differential-privacy/go/noise"
2626
)
2727

28+
// Constants used for QuantileTrees.
2829
const (
2930
numericalTolerance = 1e-6
30-
defaultTreeHeight = 4
31-
defaultBranchingFactor = 16
31+
DefaultTreeHeight = 4
32+
DefaultBranchingFactor = 16
3233
rootIndex = 0
3334
// Fraction a node needs to contribute to the total count of itself and its siblings to be
3435
// considered during the search for a particular quantile. The idea of alpha is to filter out
@@ -61,7 +62,7 @@ type BoundedQuantiles struct {
6162
branchingFactor int
6263
l0Sensitivity int64
6364
lInfSensitivity float64
64-
noise noise.Noise
65+
Noise noise.Noise
6566
noiseKind noise.Kind // necessary for serializing noise.Noise information
6667

6768
// State variables
@@ -128,15 +129,15 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles {
128129
// Check tree height and branching factor, set defaults if not specified, and use them to compute numLeaves and leftmostLeafIndex.
129130
treeHeight := opt.TreeHeight
130131
if treeHeight == 0 {
131-
treeHeight = defaultTreeHeight
132+
treeHeight = DefaultTreeHeight
132133
}
133134
if err := checks.CheckTreeHeight("NewBoundedQuantiles", treeHeight); err != nil {
134135
// TODO: do not exit the program from within library code
135136
log.Fatalf("CheckTreeHeight failed with %v", err)
136137
}
137138
branchingFactor := opt.BranchingFactor
138139
if branchingFactor == 0 {
139-
branchingFactor = defaultBranchingFactor
140+
branchingFactor = DefaultBranchingFactor
140141
}
141142
if err := checks.CheckBranchingFactor("NewBoundedQuantiles", branchingFactor); err != nil {
142143
// TODO: do not exit the program from within library code
@@ -171,7 +172,7 @@ func NewBoundedQuantiles(opt *BoundedQuantilesOptions) *BoundedQuantiles {
171172
branchingFactor: branchingFactor,
172173
l0Sensitivity: l0Sensitivity,
173174
lInfSensitivity: lInfSensitivity,
174-
noise: n,
175+
Noise: n,
175176
noiseKind: noise.ToKind(n),
176177
tree: make(map[int]int64),
177178
noisedTree: make(map[int]float64),
@@ -328,7 +329,7 @@ func (bq *BoundedQuantiles) getNoisedCount(index int) float64 {
328329
return noisedCount
329330
}
330331
rawCount := bq.tree[index]
331-
noisedCount := bq.noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta)
332+
noisedCount := bq.Noise.AddNoiseFloat64(float64(rawCount), bq.l0Sensitivity, bq.lInfSensitivity, bq.epsilon, bq.delta)
332333
bq.noisedTree[index] = noisedCount
333334
return noisedCount
334335
}
@@ -359,6 +360,7 @@ func (bq *BoundedQuantiles) Merge(bq2 *BoundedQuantiles) {
359360
for index, count := range bq2.tree {
360361
bq.tree[index] += count
361362
}
363+
bq2.state = merged
362364
}
363365

364366
func checkMergeBoundedQuantiles(bq1, bq2 *BoundedQuantiles) error {
@@ -421,7 +423,7 @@ func (bq *BoundedQuantiles) GobEncode() ([]byte, error) {
421423
Upper: bq.upper,
422424
NumLeaves: bq.numLeaves,
423425
LeftmostLeafIndex: bq.leftmostLeafIndex,
424-
NoiseKind: noise.ToKind(bq.noise),
426+
NoiseKind: noise.ToKind(bq.Noise),
425427
QuantileTree: bq.tree,
426428
}
427429
bq.state = serialized
@@ -446,7 +448,7 @@ func (bq *BoundedQuantiles) GobDecode(data []byte) error {
446448
lower: enc.Lower,
447449
upper: enc.Upper,
448450
noiseKind: enc.NoiseKind,
449-
noise: noise.ToNoise(enc.NoiseKind),
451+
Noise: noise.ToNoise(enc.NoiseKind),
450452
numLeaves: enc.NumLeaves,
451453
leftmostLeafIndex: enc.LeftmostLeafIndex,
452454
tree: enc.QuantileTree,

0 commit comments

Comments
 (0)