-
Notifications
You must be signed in to change notification settings - Fork 139
Closed
Labels
Description
Hello guys,
I have converted the hospital example with a logistic regression (binary classification) and it seems to work but somebody please check my math.
Code below:
import numpy as np
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import phe as paillier
import pandas as pd
from sklearn.linear_model import LogisticRegression
seed = 42
np.random.seed(seed)
import os
def get_data_artificial(n_clients):
"""
Import the dataset via sklearn, shuffle and split train/test.
Return training, target lists for `n_clients` and a holdout test set
"""
print("Loading data")
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,random_state=1, n_clusters_per_class=1)
X = StandardScaler().fit_transform(X)
# add some noise
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
# Add constant to emulate intercept
X = np.c_[X, np.ones(X.shape[0])]
# The features are already preprocessed
# Shuffle
perm = np.random.permutation(X.shape[0])
X, y = X[perm, :], y[perm]
# Select test at random
test_size = round(X.shape[0]*20/100)
test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
train_idx = np.ones(X.shape[0], dtype=bool)
train_idx[test_idx] = False
X_test, y_test = X[test_idx, :], y[test_idx]
X_train, y_train = X[train_idx, :], y[train_idx]
# Split train among multiple clients.
# The selection is not at random. We simulate the fact that each client
# sees a potentially very different sample of patients.
X, y = [], []
step = int(X_train.shape[0] / n_clients)
for c in range(n_clients):
X.append(X_train[step * c: step * (c + 1), :])
y.append(y_train[step * c: step * (c + 1)])
return X, y, X_test, y_test
def mean_square_error(y_pred, y):
""" 1/m * \sum_{i=1..m} (y_pred_i - y_i)^2 """
return np.mean((y - y_pred) ** 2)
def encrypt_vector(public_key, x):
return [public_key.encrypt(i) for i in x]
def decrypt_vector(private_key, x):
return np.array([private_key.decrypt(i) for i in x])
def sum_encrypted_vectors(x, y):
if len(x) != len(y):
raise ValueError('Encrypted vectors must have the same size')
return [x[i] + y[i] for i in range(len(x))]
class Server:
"""Private key holder. Decrypts the average gradient"""
def __init__(self, key_length):
keypair = paillier.generate_paillier_keypair(n_length=key_length)
self.pubkey, self.privkey = keypair
def decrypt_aggregate(self, input_model, n_clients):
return decrypt_vector(self.privkey, input_model) / n_clients
def sigmoid(scores):
return 1 / (1 + np.exp(-scores))
class Client:
"""Runs linear regression with local data or by gradient steps,
where gradient can be passed in.
Using public key can encrypt locally computed gradients.
"""
def __init__(self, name, X, y, pubkey):
self.name = name
self.pubkey = pubkey
self.X, self.y = X, y
self.weights = np.zeros(X.shape[1])
def fit(self, n_iter, eta=0.01):
"""Linear regression for n_iter"""
for _ in range(n_iter):
gradient = self.compute_gradient()
self.gradient_step(gradient, eta)
def gradient_step(self, gradient, eta=0.01):
"""Update the model with the given gradient"""
self.weights -= eta * gradient
def compute_gradient(self):
"""Compute the gradient of the current model using the training set
"""
delta = self.predict(self.X) - self.y
return delta.dot(self.X)
def predict(self, X):
"""Score test data"""
scores = X.dot(self.weights)
return sigmoid(scores)
def encrypted_gradient(self, sum_to=None):
"""Compute and encrypt gradient.
When `sum_to` is given, sum the encrypted gradient to it, assumed
to be another vector of the same size
"""
gradient = self.compute_gradient()
encrypted_gradient = encrypt_vector(self.pubkey, gradient)
if sum_to is not None:
return sum_encrypted_vectors(sum_to, encrypted_gradient)
else:
return encrypted_gradient
def federated_learning(n_iter, eta, n_clients, key_length):
names = ['CTA Member {}'.format(i) for i in range(1, n_clients + 1)]
X, y, X_test, y_test = get_data_artificial(n_clients=n_clients)
# Instantiate the server and generate private and public keys
# NOTE: using smaller keys sizes wouldn't be cryptographically safe
server = Server(key_length=key_length)
# Instantiate the clients.
# Each client gets the public key at creation and its own local dataset
clients = []
for i in range(n_clients):
clients.append(Client(names[i], X[i], y[i], server.pubkey))
# Each client trains a linear regressor on its own data
print('Accuracy that each client gets on test set by '
'training only on own local data:')
for c in clients:
c.fit(n_iter, eta)
y_pred = c.predict(X_test)
acc = accuracy_score(y_pred.round(), y_test)
print('{:s}:\t{:.2f}'.format(c.name, acc))
#clf = LogisticRegression()
#clf.fit(c.X,c.y)
#y_pred = c.predict(X_test)
#acc = accuracy_score(y_pred.round(), y_test)
#print('Vanilla Logistic Regression {:s}:\t{:.2f}'.format(c.name, acc))
# The federated learning with gradient descent
print('Running distributed gradient aggregation for {:d} iterations'
.format(n_iter))
for i in range(n_iter):
# Compute gradients, encrypt and aggregate
encrypt_aggr = clients[0].encrypted_gradient(sum_to=None)
for c in clients:
encrypt_aggr = c.encrypted_gradient(sum_to=encrypt_aggr)
# Send aggregate to server and decrypt it
aggr = server.decrypt_aggregate(encrypt_aggr, n_clients)
# Take gradient steps
for c in clients:
c.gradient_step(aggr, eta)
print('Accuracy that each client gets after running the protocol:')
for c in clients:
y_pred = c.predict(X_test)
acc = accuracy_score(y_pred.round(), y_test)
print('{:s}:\t{:.2f}'.format(c.name, acc))
import sys
if __name__ == '__main__':
# Set learning, data split, and security params
federated_learning(n_iter=50, eta=0.01, n_clients=4, key_length=1024)
Output:
Accuracy that each client gets on test set by training only on own local data:
CTA Member 1: 1.00
CTA Member 2: 0.85
CTA Member 3: 0.95
CTA Member 4: 0.95
Running distributed gradient aggregation for 50 iterations
Accuracy that each client gets after running the protocol:
CTA Member 1: 1.00
CTA Member 2: 0.85
CTA Member 3: 1.00
CTA Member 4: 1.00
I am not sure whether I can use the sigmoid function like that or we have to approximate it as a Taylor expansion?
Do we have to do something like gradient clipping?
Cheers.