Merge pull request #18 from FETS-AI/train_loop_stops

sarthakpati · web-flow · commit b4c783b4c9ee · 2021-06-04T13:25:07.000-04:00
Train loop stops
diff --git a/Task_1/FeTS_Challenge.ipynb b/Task_1/FeTS_Challenge.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -256,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -345,7 +345,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -489,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -606,46 +606,81 @@
     "- ```device``` : Which device to use for training and validation"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up the experiment\n",
+    "Now that we've defined our custom functions, the last thing to do is to configure the experiment. The following cell shows the various settings you can change in your experiment.\n",
+    "\n",
+    "Note that ```rounds_to_train``` can be set as high as you want. However, the experiment will exit once the simulated time value exceeds 1 week of simulated time, or if the specified number of rounds has completed."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# change any of these you wish to your custom functions. You may leave defaults if you wish.\n",
     "aggregation_function = weighted_average_aggregation\n",
     "choose_training_collaborators = all_collaborators_train\n",
     "training_hyper_parameters_for_round = constant_hyper_parameters\n",
     "validation_functions = [('sensitivity', sensitivity), ('specificity', specificity)]\n",
+    "\n",
+    "# Final scoring will be on partitioning_1, partitioning_2, and a hidden partitioning\n",
+    "# We encourage you to experiment with other partitionings\n",
     "institution_split_csv_filename = 'partitioning_1.csv'\n",
+    "\n",
+    "# change this to point to the parent directory of the data\n",
     "brats_training_data_parent_dir = '/raid/datasets/FeTS21/MICCAI_FeTS2021_TrainingData'\n",
+    "\n",
+    "# increase this if you need a longer history for your algorithms\n",
+    "# decrease this if you need to reduce system RAM consumption\n",
     "db_store_rounds = 5\n",
-    "rounds_to_train = 5\n",
+    "\n",
+    "# this is passed to PyTorch, so set it accordingly for your system\n",
     "device = 'cuda'\n",
     "\n",
-    "run_challenge_experiment(aggregation_function=aggregation_function,\n",
-    "                         choose_training_collaborators=choose_training_collaborators,\n",
-    "                         training_hyper_parameters_for_round=training_hyper_parameters_for_round,\n",
-    "                         validation_functions=validation_functions,\n",
-    "                         institution_split_csv_filename=institution_split_csv_filename,\n",
-    "                         brats_training_data_parent_dir=brats_training_data_parent_dir,\n",
-    "                         db_store_rounds=db_store_rounds,\n",
-    "                         rounds_to_train=rounds_to_train,\n",
-    "                         device=device)"
+    "# you'll want to increase this most likely. You can set it as high as you like, \n",
+    "# however, the experiment will exit once the simulated time exceeds one week. \n",
+    "rounds_to_train = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the scores are returned in a Pandas dataframe\n",
+    "scores_dataframe = run_challenge_experiment(\n",
+    "    aggregation_function=aggregation_function,\n",
+    "    choose_training_collaborators=choose_training_collaborators,\n",
+    "    training_hyper_parameters_for_round=training_hyper_parameters_for_round,\n",
+    "    validation_functions=validation_functions,\n",
+    "    institution_split_csv_filename=institution_split_csv_filename,\n",
+    "    brats_training_data_parent_dir=brats_training_data_parent_dir,\n",
+    "    db_store_rounds=db_store_rounds,\n",
+    "    rounds_to_train=rounds_to_train,\n",
+    "    device=device)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "scores_dataframe"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "fets_challenge_test_2",
+   "display_name": "openfl",
    "language": "python",
-   "name": "fets_challenge_test_2"
+   "name": "openfl"
   },
   "language_info": {
    "codemirror_mode": {
@@ -657,7 +692,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.12"
+   "version": "3.6.13"
   }
  },
  "nbformat": 4,
diff --git a/Task_1/README.md b/Task_1/README.md
@@ -32,6 +32,9 @@ Along with the typical DICE and Hausdorff metrics, we include a "time to converg
 
 The time to convergence metric will be computed as the area under the validation learning curve over 1 week of simulated time where the horizontal axis measures simulated runtime and the vertical axis measures the current best score, computed as the average of enhancing tumor, tumor core, and whole tumor DICE scores over the validation split of the training data.
 
+You can find the code for the "time to convergence metric" in the experiment.py file by searching for ## CONVERGENCE METRIC COMPUTATION.
+
+### How Simulated Time is computed
 The simulated time is stochastic, and computed per collaborator, per round, with the round time equaling the greatest round time of all collaborators in the round.
  
 A given collaborator's round time is computed as the sum of:
@@ -57,8 +60,6 @@ We assign these network and compute distributions by drawing uniform-randomly fr
 
 For a given collaborator, these normal distributions are constant throughout the experiment. Again, each possible timing distribution is based on actual timing information from a subset of the hospitals in the FeTS intitiative. You can find these distributions in the experiment.py file (search for ## COLLABORATOR TIMING DISTRIBUTIONS), as well as the random seed used to ensure reproducibility.
 
-You can find the code for the "time to convergence metric" in the experiment.py file by searching for ## CONVERGENCE METRIC COMPUTATION.
-
 ## Data Partitioning and Sharding
 The FeTS 2021 data release consists of a training set and two CSV files - each providing information for how to partition the training data into non-IID institutional subsets. The release will contain subfolders for single patient records whose names have the format `FeTS21_Training_###`, and two CSV files: 
 - **partitioning_1.csv**
diff --git a/Task_1/fets_challenge/experiment.py b/Task_1/fets_challenge/experiment.py
@@ -11,6 +11,7 @@
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 from openfl.utilities import split_tensor_dict_for_holdouts, TensorKey
 from openfl.protocols import utils
 import openfl.native as fx
@@ -19,6 +20,10 @@
 from .custom_aggregation_wrapper import CustomAggregationWrapper
 
 # one week
+# MINUTE = 60
+# HOUR = 60 * MINUTE
+# DAY = 24 * HOUR
+# WEEK = 7 * DAY
 MAX_SIMULATION_TIME = 7 * 24 * 60 * 60 
 
 ## COLLABORATOR TIMING DISTRIBUTIONS
@@ -194,6 +199,13 @@ def compute_times_per_collaborator(collaborator_names,
                 data_size *= epochs_per_round
             time += data_size * training_time_per
             
+            # if training, we also validate the locally updated model 
+            data_size = data.get_valid_data_size()
+            validation_time_per = np.random.normal(loc=stats.validation_mean,
+                                                   scale=stats.validation_std)
+            validation_time_per = max(1, validation_time_per)
+            time += data_size * validation_time_per
+
             # upload time
             upload_time = np.random.normal(loc=stats.upload_speed_mean,
                                            scale=stats.upload_speed_std)
@@ -295,6 +307,19 @@ def run_challenge_experiment(aggregation_function,
     best_dice = -1.0
     best_dice_over_time_auc = 0
 
+    # results dataframe data
+    experiment_results = {
+        'round':[],
+        'time': [],
+        'convergence_score': [],
+        'binary_dice_wt': [],
+        'binary_dice_et': [],
+        'binary_dice_tc': [],
+        'hausdorff95_wt': [],
+        'hausdorff95_et': [],
+        'hausdorff95_tc': [],
+    }
+
     for round_num in range(rounds_to_train):
         # pick collaborators to train for the round
         training_collaborators = choose_training_collaborators(collaborator_names,
@@ -416,13 +441,32 @@ def run_challenge_experiment(aggregation_function,
         # End of round summary
         summary = '"**** END OF ROUND {} SUMMARY *****"'.format(round_num)
         summary += "\n\tSimulation Time: {} minutes".format(round(total_simulated_time / 60, 2))
-        summary += "\n\tProjected Convergence Score: {}".format(projected_auc)
+        summary += "\n\t(Projected) Convergence Score: {}".format(projected_auc)
         summary += "\n\tBinary DICE WT: {}".format(binary_dice_wt)
         summary += "\n\tBinary DICE ET: {}".format(binary_dice_et)
         summary += "\n\tBinary DICE TC: {}".format(binary_dice_tc)
         summary += "\n\tHausdorff95 WT: {}".format(hausdorff95_wt)
         summary += "\n\tHausdorff95 ET: {}".format(hausdorff95_et)
         summary += "\n\tHausdorff95 TC: {}".format(hausdorff95_tc)
 
+        experiment_results['round'].append(round_num)
+        experiment_results['time'].append(total_simulated_time)
+        experiment_results['convergence_score'].append(projected_auc)
+        experiment_results['binary_dice_wt'].append(binary_dice_wt)
+        experiment_results['binary_dice_et'].append(binary_dice_et)
+        experiment_results['binary_dice_tc'].append(binary_dice_tc)
+        experiment_results['hausdorff95_wt'].append(hausdorff95_wt)
+        experiment_results['hausdorff95_et'].append(hausdorff95_et)
+        experiment_results['hausdorff95_tc'].append(hausdorff95_tc)
+
         logger.info(summary)
-        
+
+        # if the total_simulated_time has exceeded the maximum time, we break
+        # in practice, this means that the previous round's model is the last model scored,
+        # so a long final round should not actually benefit the competitor, since that final
+        # model is never globally validated
+        if total_simulated_time > MAX_SIMULATION_TIME:
+            logger.info("Simulation time exceeded. Ending Experiment")
+            break
+
+    return pd.DataFrame.from_dict(experiment_results)