Skip to content

suggestion code for logistic classification #63

@robomotic

Description

@robomotic

Hello guys,
I have converted the hospital example with a logistic regression (binary classification) and it seems to work but somebody please check my math.
Code below:

import numpy as np
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import phe as paillier
import pandas as pd
from sklearn.linear_model import LogisticRegression
seed = 42
np.random.seed(seed)
import os

def get_data_artificial(n_clients):
	"""
	Import the dataset via sklearn, shuffle and split train/test.
	Return training, target lists for `n_clients` and a holdout test set
	"""
	print("Loading data")
	X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,random_state=1, n_clusters_per_class=1)
	X = StandardScaler().fit_transform(X)
	# add some noise
	rng = np.random.RandomState(2)
	X += 2 * rng.uniform(size=X.shape)

	# Add constant to emulate intercept
	X = np.c_[X, np.ones(X.shape[0])]

	# The features are already preprocessed
	# Shuffle
	perm = np.random.permutation(X.shape[0])
	X, y = X[perm, :], y[perm]

	# Select test at random
	test_size = round(X.shape[0]*20/100)
	test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
	train_idx = np.ones(X.shape[0], dtype=bool)
	train_idx[test_idx] = False
	X_test, y_test = X[test_idx, :], y[test_idx]
	X_train, y_train = X[train_idx, :], y[train_idx]

	# Split train among multiple clients.
	# The selection is not at random. We simulate the fact that each client
	# sees a potentially very different sample of patients.
	X, y = [], []
	step = int(X_train.shape[0] / n_clients)
	for c in range(n_clients):
		X.append(X_train[step * c: step * (c + 1), :])
		y.append(y_train[step * c: step * (c + 1)])

	return X, y, X_test, y_test


def mean_square_error(y_pred, y):
	""" 1/m * \sum_{i=1..m} (y_pred_i - y_i)^2 """
	return np.mean((y - y_pred) ** 2)


def encrypt_vector(public_key, x):
	return [public_key.encrypt(i) for i in x]


def decrypt_vector(private_key, x):
	return np.array([private_key.decrypt(i) for i in x])


def sum_encrypted_vectors(x, y):
	if len(x) != len(y):
		raise ValueError('Encrypted vectors must have the same size')
	return [x[i] + y[i] for i in range(len(x))]


class Server:
	"""Private key holder. Decrypts the average gradient"""

	def __init__(self, key_length):
		 keypair = paillier.generate_paillier_keypair(n_length=key_length)
		 self.pubkey, self.privkey = keypair

	def decrypt_aggregate(self, input_model, n_clients):
		return decrypt_vector(self.privkey, input_model) / n_clients

def sigmoid(scores):
	return 1 / (1 + np.exp(-scores))


class Client:
	"""Runs linear regression with local data or by gradient steps,
	where gradient can be passed in.

	Using public key can encrypt locally computed gradients.
	"""

	def __init__(self, name, X, y, pubkey):
		self.name = name
		self.pubkey = pubkey
		self.X, self.y = X, y
		self.weights = np.zeros(X.shape[1])

	def fit(self, n_iter, eta=0.01):
		"""Linear regression for n_iter"""
		for _ in range(n_iter):
			gradient = self.compute_gradient()
			self.gradient_step(gradient, eta)

	def gradient_step(self, gradient, eta=0.01):
		"""Update the model with the given gradient"""
		self.weights -= eta * gradient

	def compute_gradient(self):
		"""Compute the gradient of the current model using the training set
		"""
		delta = self.predict(self.X) - self.y
		return delta.dot(self.X)

	def predict(self, X):
		"""Score test data"""
		scores = X.dot(self.weights)
		return sigmoid(scores)

	def encrypted_gradient(self, sum_to=None):
		"""Compute and encrypt gradient.

		When `sum_to` is given, sum the encrypted gradient to it, assumed
		to be another vector of the same size
		"""
		gradient = self.compute_gradient()
		encrypted_gradient = encrypt_vector(self.pubkey, gradient)

		if sum_to is not None:
			return sum_encrypted_vectors(sum_to, encrypted_gradient)
		else:
			return encrypted_gradient


def federated_learning(n_iter, eta, n_clients, key_length):
	names = ['CTA Member {}'.format(i) for i in range(1, n_clients + 1)]

	X, y, X_test, y_test = get_data_artificial(n_clients=n_clients)

	# Instantiate the server and generate private and public keys
	# NOTE: using smaller keys sizes wouldn't be cryptographically safe
	server = Server(key_length=key_length)

	# Instantiate the clients.
	# Each client gets the public key at creation and its own local dataset
	clients = []
	for i in range(n_clients):
		clients.append(Client(names[i], X[i], y[i], server.pubkey))

	# Each client trains a linear regressor on its own data
	print('Accuracy that each client gets on test set by '
		  'training only on own local data:')
	for c in clients:
		c.fit(n_iter, eta)
		y_pred = c.predict(X_test)
		acc = accuracy_score(y_pred.round(), y_test)
		print('{:s}:\t{:.2f}'.format(c.name, acc))

		#clf = LogisticRegression()
		#clf.fit(c.X,c.y)
		#y_pred = c.predict(X_test)
		#acc = accuracy_score(y_pred.round(), y_test)
		#print('Vanilla Logistic Regression {:s}:\t{:.2f}'.format(c.name, acc))

	# The federated learning with gradient descent
	print('Running distributed gradient aggregation for {:d} iterations'
		  .format(n_iter))
	for i in range(n_iter):

		# Compute gradients, encrypt and aggregate
		encrypt_aggr = clients[0].encrypted_gradient(sum_to=None)
		for c in clients:
			encrypt_aggr = c.encrypted_gradient(sum_to=encrypt_aggr)

		# Send aggregate to server and decrypt it
		aggr = server.decrypt_aggregate(encrypt_aggr, n_clients)

		# Take gradient steps
		for c in clients:
			c.gradient_step(aggr, eta)

	print('Accuracy that each client gets after running the protocol:')
	for c in clients:
		y_pred = c.predict(X_test)
		acc = accuracy_score(y_pred.round(), y_test)
		print('{:s}:\t{:.2f}'.format(c.name, acc))
import sys
if __name__ == '__main__':
	# Set learning, data split, and security params
	federated_learning(n_iter=50, eta=0.01, n_clients=4, key_length=1024)

Output:

Accuracy that each client gets on test set by training only on own local data:
CTA Member 1: 1.00
CTA Member 2: 0.85
CTA Member 3: 0.95
CTA Member 4: 0.95
Running distributed gradient aggregation for 50 iterations
Accuracy that each client gets after running the protocol:
CTA Member 1: 1.00
CTA Member 2: 0.85
CTA Member 3: 1.00
CTA Member 4: 1.00

I am not sure whether I can use the sigmoid function like that or we have to approximate it as a Taylor expansion?

Do we have to do something like gradient clipping?

Cheers.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions