guillaume-chevalier · alexbrillant · Nov 1, 2019 · Nov 1, 2019 · Nov 2, 2019 · Nov 3, 2019
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ steps/one_hot_encoder.py
 steps/transform_expected_output_only_wrapper.py
 venv/**
 cache/**
+neuraxle_tensorflow/**
diff --git a/data_reading.py b/data_reading.py
@@ -1,3 +1,5 @@
+import os
+
 import numpy as np
 
 INPUT_SIGNAL_TYPES = [
@@ -68,3 +70,25 @@ def load_y(y_path):
 
     # Substract 1 to each output class for friendly 0-based indexing
     return y_ - 1
+
+
+def load_data():
+    # Load "X" (the neural network's training and testing inputs)
+
+    X_train = load_X(X_train_signals_paths)
+    # X_test = load_X(X_test_signals_paths)
+
+    # Load "y" (the neural network's training and testing outputs)
+
+    y_train_path = os.path.join(DATASET_PATH, TRAIN, TRAIN_FILE_NAME)
+    # y_test_path = os.path.join(DATASET_PATH, TEST, TEST_FILE_NAME)
+
+    y_train = load_y(y_train_path)
+    # y_test = load_y(y_test_path)
+
+    print("Some useful info to get an insight on dataset's shape and normalisation:")
+    print("(data_inputs shape, expected_outputs shape, every data input mean, every data input standard deviation)")
+    print(X_train.shape, y_train.shape, np.mean(X_train), np.std(X_train))
+    print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")
+
+    return X_train, y_train
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 tensorflow==1.15
 tensorflow-gpu==1.15
 conv==0.2
-git+git://github.com/alexbrillant/Neuraxle@one-hot-encoder-step#egg=Neuraxle
+-e git://github.com/alexbrillant/Neuraxle.git@a270fe2b2f73c9350d76fcf4b6f058b764a8c8f7#egg=neuraxle
diff --git a/steps/forma_data.py b/steps/forma_data.py
@@ -0,0 +1,26 @@
+import numpy as np
+from neuraxle.base import BaseStep, NonFittableMixin
+from neuraxle.steps.output_handlers import InputAndOutputTransformerMixin
+
+
+class FormatData(NonFittableMixin, InputAndOutputTransformerMixin, BaseStep):
+    def __init__(self, n_classes):
+        NonFittableMixin.__init__(self)
+        InputAndOutputTransformerMixin.__init__(self)
+        BaseStep.__init__(self)
+        self.n_classes = n_classes
+
+    def transform(self, data_inputs):
+        data_inputs, expected_outputs = data_inputs
+
+        if not isinstance(data_inputs, np.ndarray):
+            data_inputs = np.array(data_inputs)
+
+        if expected_outputs is not None:
+            if not isinstance(expected_outputs, np.ndarray):
+                expected_outputs = np.array(expected_outputs)
+
+            if expected_outputs.shape != (len(data_inputs), self.n_classes):
+                expected_outputs = np.reshape(expected_outputs, (len(data_inputs), self.n_classes))
+
+        return data_inputs, expected_outputs
diff --git a/train_and_save.py b/train_and_save.py
@@ -0,0 +1,171 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
+from neuraxle.api import DeepLearningPipeline
+from neuraxle.base import ExecutionContext, DEFAULT_CACHE_FOLDER
+from neuraxle.hyperparams.space import HyperparameterSamples
+from neuraxle.pipeline import Pipeline
+from neuraxle.steps.numpy import OneHotEncoder
+from neuraxle.steps.output_handlers import OutputTransformerWrapper
+from sklearn.metrics import accuracy_score
+
+from data_reading import load_data
+from neuraxle_tensorflow.tensorflow_v1 import TensorflowV1ModelStep
+from steps.forma_data import FormatData
+
+
+def create_graph(step: TensorflowV1ModelStep):
+    # Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters.
+    # Moreover, two LSTM cells are stacked which adds deepness to the neural network.
+    # Note, some code of this notebook is inspired from an slightly different
+    # RNN architecture used on another dataset, some of the credits goes to
+    # "aymericdamien" under the MIT license.
+    # (NOTE: This step could be greatly optimised by shaping the dataset once
+    # input shape: (batch_size, n_steps, n_input)
+
+    # Graph input/output
+    data_inputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_steps'], step.hyperparams['n_inputs']],
+                                 name='data_inputs')
+    expected_outputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_classes']], name='expected_outputs')
+
+    # Graph weights
+    weights = {
+        'hidden': tf.Variable(
+            tf.random_normal([step.hyperparams['n_inputs'], step.hyperparams['n_hidden']])
+        ),  # Hidden layer weights
+        'out': tf.Variable(
+            tf.random_normal([step.hyperparams['n_hidden'], step.hyperparams['n_classes']], mean=1.0)
+        )
+    }
+
+    biases = {
+        'hidden': tf.Variable(
+            tf.random_normal([step.hyperparams['n_hidden']])
+        ),
+        'out': tf.Variable(
+            tf.random_normal([step.hyperparams['n_classes']])
+        )
+    }
+
+    data_inputs = tf.transpose(
+        data_inputs,
+        [1, 0, 2])  # permute n_steps and batch_size
+
+    # Reshape to prepare input to hidden activation
+    data_inputs = tf.reshape(data_inputs, [-1, step.hyperparams['n_inputs']])
+    # new shape: (n_steps*batch_size, n_input)
+
+    # ReLU activation, thanks to Yu Zhao for adding this improvement here:
+    _X = tf.nn.relu(
+        tf.matmul(data_inputs, weights['hidden']) + biases['hidden']
+    )
+
+    # Split data because rnn cell needs a list of inputs for the RNN inner loop
+    _X = tf.split(_X, step.hyperparams['n_steps'], 0)
+    # new shape: n_steps * (batch_size, n_hidden)
+
+    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
+    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True)
+    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True)
+    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
+
+    # Get LSTM cell output
+    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
+
+    # Get last time step's output feature for a "many-to-one" style classifier,
+    # as in the image describing RNNs at the top of this page
+    lstm_last_output = outputs[-1]
+
+    # Linear activation
+    return tf.matmul(lstm_last_output, weights['out']) + biases['out']
+
+
+def create_optimizer(step: TensorflowV1ModelStep):
+    return tf.train.AdamOptimizer(learning_rate=step.hyperparams['learning_rate'])
+
+
+def create_loss(step: TensorflowV1ModelStep):
+    # Loss, optimizer and evaluation
+    # L2 loss prevents this overkill neural network to overfit the data
+    l2 = step.hyperparams['lambda_loss_amount'] * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
+
+    # Softmax loss
+    return tf.reduce_mean(
+        tf.nn.softmax_cross_entropy_with_logits(
+            labels=step['expected_outputs'],
+            logits=step['output']
+        )
+    ) + l2
+
+
+def accuracy_score_classification(data_inputs, expected_outputs):
+    accuracy = np.mean(np.argmax(data_inputs, axis=1) == np.argmax(expected_outputs, axis=1))
+    return accuracy
+
+
+class HumanActivityRecognitionPipeline(DeepLearningPipeline):
+    N_HIDDEN = 32
+    N_STEPS = 128
+    N_INPUTS = 9
+    LAMBDA_LOSS_AMOUNT = 0.0015
+    LEARNING_RATE = 0.0025
+    N_CLASSES = 6
+    BATCH_SIZE = 1500
+    EPOCHS = 14
+
+    def __init__(self):
+        super().__init__(
+            Pipeline([
+                OutputTransformerWrapper(OneHotEncoder(nb_columns=self.N_CLASSES, name='one_hot_encoded_label')),
+                FormatData(n_classes=self.N_CLASSES),
+                TensorflowV1ModelStep(
+                    create_graph=create_graph,
+                    create_loss=create_loss,
+                    create_optimizer=create_optimizer
+                ).set_hyperparams(
+                    HyperparameterSamples({
+                        'n_steps': self.N_STEPS,  # 128 timesteps per series
+                        'n_inputs': self.N_INPUTS,  # 9 input parameters per timestep
+                        'n_hidden': self.N_HIDDEN,  # Hidden layer num of features
+                        'n_classes': self.N_CLASSES,  # Total classes (should go up, or should go down)
+                        'learning_rate': self.LEARNING_RATE,
+                        'lambda_loss_amount': self.LAMBDA_LOSS_AMOUNT,
+                        'batch_size': self.BATCH_SIZE
+                    })
+                )
+            ]),
+            validation_size=0.15,
+            batch_size=self.BATCH_SIZE,
+            batch_metrics={'accuracy': accuracy_score},
+            shuffle_in_each_epoch_at_train=True,
+            n_epochs=self.EPOCHS,
+            epochs_metrics={'accuracy': accuracy_score},
+            scoring_function=accuracy_score
+        )
+
+
+def main():
+    pipeline = HumanActivityRecognitionPipeline()
+
+    data_inputs, expected_outputs = load_data()
+    pipeline, outputs = pipeline.fit_transform(data_inputs, expected_outputs)
+
+    accuracies = pipeline.get_epoch_metric_train('accuracy')
+    plt.plot(range(len(accuracies)), accuracies)
+    plt.xlabel('epochs')
+    plt.xlabel('accuracy')
+    plt.title('Training accuracy')
+
+    accuracies = pipeline.get_epoch_metric_validation('accuracy')
+    plt.plot(range(len(accuracies)), accuracies)
+    plt.xlabel('epochs')
+    plt.xlabel('accuracy')
+    plt.title('Validation accuracy')
+    plt.show()
+
+    pipeline.save(ExecutionContext(DEFAULT_CACHE_FOLDER))
+    pipeline.teardown()
+
+
+if __name__ == '__main__':
+    main()