From d47056a1470146ff29aa9cbf02fcade12c3e7db8 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 23 Dec 2024 03:27:48 -0800 Subject: [PATCH 01/13] code changes Signed-off-by: yes --- .../keras_cnn_mnist}/.workspace | 0 .../keras_cnn_mnist/plan/cols.yaml | 0 .../keras_cnn_mnist/plan/data.yaml | 0 .../keras_cnn_mnist}/plan/defaults | 0 .../keras_cnn_mnist/plan/plan.yaml | 0 .../keras_cnn_mnist/requirements.txt | 2 + .../keras_cnn_mnist}/src/__init__.py | 0 .../keras_cnn_mnist/src/dataloader.py | 0 .../keras_cnn_mnist/src/mnist_utils.py | 0 .../keras_cnn_mnist/src/taskrunner.py | 0 .../keras_2dunet}/.workspace | 0 .../{ => keras_tf}/keras_2dunet/README.md | 0 .../keras_2dunet/plan/cols.yaml | 0 .../keras_2dunet/plan/data.yaml | 0 .../keras_2dunet}/plan/defaults | 0 .../keras_2dunet/plan/plan.yaml | 0 .../keras_2dunet/requirements.txt | 0 .../keras_2dunet}/src/__init__.py | 0 .../keras_2dunet/src/brats_utils.py | 0 .../keras_2dunet/src/dataloader.py | 0 .../keras_2dunet/src/nii_reader.py | 0 .../keras_2dunet/src/taskrunner.py | 0 .../keras_cnn_mnist}/.workspace | 0 .../keras_cnn_mnist}/plan/cols.yaml | 0 .../keras_cnn_mnist}/plan/data.yaml | 0 .../keras_tf/keras_cnn_mnist/plan/defaults | 2 + .../keras_tf/keras_cnn_mnist/plan/plan.yaml | 46 +++++++ .../keras_cnn_mnist/requirements.txt | 0 .../keras_tf/keras_cnn_mnist/src/__init__.py | 3 + .../keras_cnn_mnist/src/dataloader.py | 47 +++++++ .../keras_cnn_mnist/src/mnist_utils.py | 118 ++++++++++++++++++ .../keras_cnn_mnist/src/taskrunner.py | 78 ++++++++++++ .../keras_tf/keras_nlp/.workspace | 2 + .../keras_tf/keras_nlp/plan/cols.yaml | 5 + .../keras_tf/keras_nlp/plan/data.yaml | 7 ++ .../{ => keras_tf}/keras_nlp/plan/plan.yaml | 0 .../{ => keras_tf}/keras_nlp/requirements.txt | 0 .../{ => keras_tf}/keras_nlp/src/__init__.py | 0 .../keras_nlp/src/dataloader.py | 0 .../keras_nlp/src/dataloader_utils.py | 0 .../keras_nlp/src/taskrunner.py | 0 .../keras_torch/keras_cnn_mnist/.workspace | 2 + .../keras_cnn_mnist/plan/cols.yaml | 5 + .../keras_cnn_mnist/plan/data.yaml | 7 ++ .../keras_torch/keras_cnn_mnist/plan/defaults | 2 + .../keras_cnn_mnist/plan/plan.yaml | 46 +++++++ .../keras_cnn_mnist/requirements.txt | 3 + .../keras_cnn_mnist/src/__init__.py | 3 + .../keras_cnn_mnist/src/dataloader.py | 47 +++++++ .../keras_cnn_mnist/src/mnist_utils.py | 118 ++++++++++++++++++ .../keras_cnn_mnist/src/taskrunner.py | 78 ++++++++++++ 51 files changed, 621 insertions(+) rename openfl-workspace/{keras_2dunet => keras_jax/keras_cnn_mnist}/.workspace (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/plan/cols.yaml (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/plan/data.yaml (100%) rename openfl-workspace/{keras_2dunet => keras_jax/keras_cnn_mnist}/plan/defaults (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/plan/plan.yaml (100%) create mode 100644 openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt rename openfl-workspace/{keras_2dunet => keras_jax/keras_cnn_mnist}/src/__init__.py (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/src/dataloader.py (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/src/mnist_utils.py (100%) rename openfl-workspace/{ => keras_jax}/keras_cnn_mnist/src/taskrunner.py (100%) rename openfl-workspace/{keras_cnn_mnist => keras_tf/keras_2dunet}/.workspace (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/README.md (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/plan/cols.yaml (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/plan/data.yaml (100%) rename openfl-workspace/{keras_cnn_mnist => keras_tf/keras_2dunet}/plan/defaults (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/plan/plan.yaml (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/requirements.txt (100%) rename openfl-workspace/{keras_cnn_mnist => keras_tf/keras_2dunet}/src/__init__.py (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/src/brats_utils.py (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/src/dataloader.py (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/src/nii_reader.py (100%) rename openfl-workspace/{ => keras_tf}/keras_2dunet/src/taskrunner.py (100%) rename openfl-workspace/{keras_nlp => keras_tf/keras_cnn_mnist}/.workspace (100%) rename openfl-workspace/{keras_nlp => keras_tf/keras_cnn_mnist}/plan/cols.yaml (100%) rename openfl-workspace/{keras_nlp => keras_tf/keras_cnn_mnist}/plan/data.yaml (100%) create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml rename openfl-workspace/{ => keras_tf}/keras_cnn_mnist/requirements.txt (100%) create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py create mode 100644 openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py create mode 100644 openfl-workspace/keras_tf/keras_nlp/.workspace create mode 100644 openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml create mode 100644 openfl-workspace/keras_tf/keras_nlp/plan/data.yaml rename openfl-workspace/{ => keras_tf}/keras_nlp/plan/plan.yaml (100%) rename openfl-workspace/{ => keras_tf}/keras_nlp/requirements.txt (100%) rename openfl-workspace/{ => keras_tf}/keras_nlp/src/__init__.py (100%) rename openfl-workspace/{ => keras_tf}/keras_nlp/src/dataloader.py (100%) rename openfl-workspace/{ => keras_tf}/keras_nlp/src/dataloader_utils.py (100%) rename openfl-workspace/{ => keras_tf}/keras_nlp/src/taskrunner.py (100%) create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/.workspace create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py create mode 100644 openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py diff --git a/openfl-workspace/keras_2dunet/.workspace b/openfl-workspace/keras_jax/keras_cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_2dunet/.workspace rename to openfl-workspace/keras_jax/keras_cnn_mnist/.workspace diff --git a/openfl-workspace/keras_cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_jax/keras_cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras_jax/keras_cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_cnn_mnist/plan/data.yaml b/openfl-workspace/keras_jax/keras_cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_cnn_mnist/plan/data.yaml rename to openfl-workspace/keras_jax/keras_cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_2dunet/plan/defaults b/openfl-workspace/keras_jax/keras_cnn_mnist/plan/defaults similarity index 100% rename from openfl-workspace/keras_2dunet/plan/defaults rename to openfl-workspace/keras_jax/keras_cnn_mnist/plan/defaults diff --git a/openfl-workspace/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_jax/keras_cnn_mnist/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras_jax/keras_cnn_mnist/plan/plan.yaml diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt new file mode 100644 index 0000000000..d8b1358d95 --- /dev/null +++ b/openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt @@ -0,0 +1,2 @@ +keras==3.6.0 +jaxlib==0.4.38 \ No newline at end of file diff --git a/openfl-workspace/keras_2dunet/src/__init__.py b/openfl-workspace/keras_jax/keras_cnn_mnist/src/__init__.py similarity index 100% rename from openfl-workspace/keras_2dunet/src/__init__.py rename to openfl-workspace/keras_jax/keras_cnn_mnist/src/__init__.py diff --git a/openfl-workspace/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_jax/keras_cnn_mnist/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_cnn_mnist/src/dataloader.py rename to openfl-workspace/keras_jax/keras_cnn_mnist/src/dataloader.py diff --git a/openfl-workspace/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_jax/keras_cnn_mnist/src/mnist_utils.py similarity index 100% rename from openfl-workspace/keras_cnn_mnist/src/mnist_utils.py rename to openfl-workspace/keras_jax/keras_cnn_mnist/src/mnist_utils.py diff --git a/openfl-workspace/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_jax/keras_cnn_mnist/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_cnn_mnist/src/taskrunner.py rename to openfl-workspace/keras_jax/keras_cnn_mnist/src/taskrunner.py diff --git a/openfl-workspace/keras_cnn_mnist/.workspace b/openfl-workspace/keras_tf/keras_2dunet/.workspace similarity index 100% rename from openfl-workspace/keras_cnn_mnist/.workspace rename to openfl-workspace/keras_tf/keras_2dunet/.workspace diff --git a/openfl-workspace/keras_2dunet/README.md b/openfl-workspace/keras_tf/keras_2dunet/README.md similarity index 100% rename from openfl-workspace/keras_2dunet/README.md rename to openfl-workspace/keras_tf/keras_2dunet/README.md diff --git a/openfl-workspace/keras_2dunet/plan/cols.yaml b/openfl-workspace/keras_tf/keras_2dunet/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_2dunet/plan/cols.yaml rename to openfl-workspace/keras_tf/keras_2dunet/plan/cols.yaml diff --git a/openfl-workspace/keras_2dunet/plan/data.yaml b/openfl-workspace/keras_tf/keras_2dunet/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_2dunet/plan/data.yaml rename to openfl-workspace/keras_tf/keras_2dunet/plan/data.yaml diff --git a/openfl-workspace/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_tf/keras_2dunet/plan/defaults similarity index 100% rename from openfl-workspace/keras_cnn_mnist/plan/defaults rename to openfl-workspace/keras_tf/keras_2dunet/plan/defaults diff --git a/openfl-workspace/keras_2dunet/plan/plan.yaml b/openfl-workspace/keras_tf/keras_2dunet/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_2dunet/plan/plan.yaml rename to openfl-workspace/keras_tf/keras_2dunet/plan/plan.yaml diff --git a/openfl-workspace/keras_2dunet/requirements.txt b/openfl-workspace/keras_tf/keras_2dunet/requirements.txt similarity index 100% rename from openfl-workspace/keras_2dunet/requirements.txt rename to openfl-workspace/keras_tf/keras_2dunet/requirements.txt diff --git a/openfl-workspace/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_tf/keras_2dunet/src/__init__.py similarity index 100% rename from openfl-workspace/keras_cnn_mnist/src/__init__.py rename to openfl-workspace/keras_tf/keras_2dunet/src/__init__.py diff --git a/openfl-workspace/keras_2dunet/src/brats_utils.py b/openfl-workspace/keras_tf/keras_2dunet/src/brats_utils.py similarity index 100% rename from openfl-workspace/keras_2dunet/src/brats_utils.py rename to openfl-workspace/keras_tf/keras_2dunet/src/brats_utils.py diff --git a/openfl-workspace/keras_2dunet/src/dataloader.py b/openfl-workspace/keras_tf/keras_2dunet/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_2dunet/src/dataloader.py rename to openfl-workspace/keras_tf/keras_2dunet/src/dataloader.py diff --git a/openfl-workspace/keras_2dunet/src/nii_reader.py b/openfl-workspace/keras_tf/keras_2dunet/src/nii_reader.py similarity index 100% rename from openfl-workspace/keras_2dunet/src/nii_reader.py rename to openfl-workspace/keras_tf/keras_2dunet/src/nii_reader.py diff --git a/openfl-workspace/keras_2dunet/src/taskrunner.py b/openfl-workspace/keras_tf/keras_2dunet/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_2dunet/src/taskrunner.py rename to openfl-workspace/keras_tf/keras_2dunet/src/taskrunner.py diff --git a/openfl-workspace/keras_nlp/.workspace b/openfl-workspace/keras_tf/keras_cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_nlp/.workspace rename to openfl-workspace/keras_tf/keras_cnn_mnist/.workspace diff --git a/openfl-workspace/keras_nlp/plan/cols.yaml b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_nlp/plan/cols.yaml rename to openfl-workspace/keras_tf/keras_cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_nlp/plan/data.yaml b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_nlp/plan/data.yaml rename to openfl-workspace/keras_tf/keras_cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml new file mode 100644 index 0000000000..54867f4578 --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml @@ -0,0 +1,46 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf + rounds_to_train : 10 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : src.dataloader.KerasMNISTInMemory + settings : + collaborator_count : 2 + data_group_name : mnist + batch_size : 256 + +task_runner : + defaults : plan/defaults/task_runner.yaml + template : src.taskrunner.KerasCNN + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + +tasks : + defaults : plan/defaults/tasks_keras.yaml + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml + # To use different Compression Pipeline, uncomment the following lines + # template : openfl.pipelines.KCPipeline + # settings : + # n_clusters : 6 diff --git a/openfl-workspace/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_tf/keras_cnn_mnist/requirements.txt similarity index 100% rename from openfl-workspace/keras_cnn_mnist/requirements.txt rename to openfl-workspace/keras_tf/keras_cnn_mnist/requirements.txt diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py new file mode 100644 index 0000000000..f1410b1298 --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py @@ -0,0 +1,3 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py new file mode 100644 index 0000000000..040e8091c9 --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py @@ -0,0 +1,47 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from openfl.federated import KerasDataLoader +from .mnist_utils import load_mnist_shard + + +class KerasMNISTInMemory(KerasDataLoader): + """Data Loader for MNIST Dataset.""" + + def __init__(self, data_path, batch_size, **kwargs): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + super().__init__(batch_size, **kwargs) + + # TODO: We should be downloading the dataset shard into a directory + # TODO: There needs to be a method to ask how many collaborators and + # what index/rank is this collaborator. + # Then we have a way to automatically shard based on rank and size of + # collaborator list. + try: + int(data_path) + except: + raise ValueError( + "Expected `%s` to be representable as `int`, as it refers to the data shard " + + "number used by the collaborator.", + data_path + ) + + _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( + shard_num=int(data_path), **kwargs + ) + + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + + self.num_classes = num_classes diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py new file mode 100644 index 0000000000..d19e13d9dd --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py @@ -0,0 +1,118 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from logging import getLogger + +import numpy as np +from tensorflow.python.keras.utils.data_utils import get_file + +logger = getLogger(__name__) + + +def one_hot(labels, classes): + """ + One Hot encode a vector. + + Args: + labels (list): List of labels to onehot encode + classes (int): Total number of categorical classes + + Returns: + np.array: Matrix of one-hot encoded labels + """ + return np.eye(classes)[labels] + + +def _load_raw_datashards(shard_num, collaborator_count): + """ + Load the raw data by shard. + + Returns tuples of the dataset shard divided into training and validation. + + Args: + shard_num (int): The shard number to use + collaborator_count (int): The number of collaborators in the federation + + Returns: + 2 tuples: (image, label) of the training, validation dataset + """ + origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + path = get_file('mnist.npz', + origin=origin_folder + 'mnist.npz', + file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') + + with np.load(path) as f: + # get all of mnist + X_train_tot = f['x_train'] + y_train_tot = f['y_train'] + + X_valid_tot = f['x_test'] + y_valid_tot = f['y_test'] + + # create the shards + shard_num = int(shard_num) + X_train = X_train_tot[shard_num::collaborator_count] + y_train = y_train_tot[shard_num::collaborator_count] + + X_valid = X_valid_tot[shard_num::collaborator_count] + y_valid = y_valid_tot[shard_num::collaborator_count] + + return (X_train, y_train), (X_valid, y_valid) + + +def load_mnist_shard(shard_num, collaborator_count, categorical=True, + channels_last=True, **kwargs): + """ + Load the MNIST dataset. + + Args: + shard_num (int): The shard to use from the dataset + collaborator_count (int): The number of collaborators in the federation + categorical (bool): True = convert the labels to one-hot encoded + vectors (Default = True) + channels_last (bool): True = The input images have the channels + last (Default = True) + **kwargs: Additional parameters to pass to the function + + Returns: + list: The input shape + int: The number of classes + numpy.ndarray: The training data + numpy.ndarray: The training labels + numpy.ndarray: The validation data + numpy.ndarray: The validation labels + """ + img_rows, img_cols = 28, 28 + num_classes = 10 + + (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( + shard_num, collaborator_count + ) + + if channels_last: + X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) + X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + else: + X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) + X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + + X_train = X_train.astype('float32') + X_valid = X_valid.astype('float32') + X_train /= 255 + X_valid /= 255 + + logger.info(f'MNIST > X_train Shape : {X_train.shape}') + logger.info(f'MNIST > y_train Shape : {y_train.shape}') + logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') + logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') + + if categorical: + # convert class vectors to binary class matrices + y_train = one_hot(y_train, num_classes) + y_valid = one_hot(y_valid, num_classes) + + return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py new file mode 100644 index 0000000000..165861033c --- /dev/null +++ b/openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py @@ -0,0 +1,78 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from keras.models import Sequential +from keras.layers import Conv2D +from keras.layers import Dense +from keras.layers import Flatten + +from openfl.federated import KerasTaskRunner + + +class KerasCNN(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, **kwargs): + """ + Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') + + def build_model(self, + input_shape, + num_classes, + conv_kernel_size=(4, 4), + conv_strides=(2, 2), + conv1_channels_out=16, + conv2_channels_out=32, + final_dense_inputsize=100, + **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + + Returns: + keras.models.Sequential: The model defined in Keras + + """ + model = Sequential() + + model.add(Conv2D(conv1_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu', + input_shape=input_shape)) + + model.add(Conv2D(conv2_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu')) + + model.add(Flatten()) + + model.add(Dense(final_dense_inputsize, activation='relu')) + + model.add(Dense(num_classes, activation='softmax')) + + model.compile(loss="categorical_crossentropy", + optimizer="adam", + metrics=["accuracy"]) + + return model diff --git a/openfl-workspace/keras_tf/keras_nlp/.workspace b/openfl-workspace/keras_tf/keras_nlp/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/keras_tf/keras_nlp/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml b/openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/openfl-workspace/keras_tf/keras_nlp/plan/data.yaml b/openfl-workspace/keras_tf/keras_nlp/plan/data.yaml new file mode 100644 index 0000000000..257c7825fe --- /dev/null +++ b/openfl-workspace/keras_tf/keras_nlp/plan/data.yaml @@ -0,0 +1,7 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +# collaborator_name,data_directory_path +one,1 + + diff --git a/openfl-workspace/keras_nlp/plan/plan.yaml b/openfl-workspace/keras_tf/keras_nlp/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_nlp/plan/plan.yaml rename to openfl-workspace/keras_tf/keras_nlp/plan/plan.yaml diff --git a/openfl-workspace/keras_nlp/requirements.txt b/openfl-workspace/keras_tf/keras_nlp/requirements.txt similarity index 100% rename from openfl-workspace/keras_nlp/requirements.txt rename to openfl-workspace/keras_tf/keras_nlp/requirements.txt diff --git a/openfl-workspace/keras_nlp/src/__init__.py b/openfl-workspace/keras_tf/keras_nlp/src/__init__.py similarity index 100% rename from openfl-workspace/keras_nlp/src/__init__.py rename to openfl-workspace/keras_tf/keras_nlp/src/__init__.py diff --git a/openfl-workspace/keras_nlp/src/dataloader.py b/openfl-workspace/keras_tf/keras_nlp/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_nlp/src/dataloader.py rename to openfl-workspace/keras_tf/keras_nlp/src/dataloader.py diff --git a/openfl-workspace/keras_nlp/src/dataloader_utils.py b/openfl-workspace/keras_tf/keras_nlp/src/dataloader_utils.py similarity index 100% rename from openfl-workspace/keras_nlp/src/dataloader_utils.py rename to openfl-workspace/keras_tf/keras_nlp/src/dataloader_utils.py diff --git a/openfl-workspace/keras_nlp/src/taskrunner.py b/openfl-workspace/keras_tf/keras_nlp/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_nlp/src/taskrunner.py rename to openfl-workspace/keras_tf/keras_nlp/src/taskrunner.py diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/.workspace b/openfl-workspace/keras_torch/keras_cnn_mnist/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml new file mode 100644 index 0000000000..257c7825fe --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml @@ -0,0 +1,7 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +# collaborator_name,data_directory_path +one,1 + + diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml new file mode 100644 index 0000000000..54867f4578 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml @@ -0,0 +1,46 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf + rounds_to_train : 10 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : src.dataloader.KerasMNISTInMemory + settings : + collaborator_count : 2 + data_group_name : mnist + batch_size : 256 + +task_runner : + defaults : plan/defaults/task_runner.yaml + template : src.taskrunner.KerasCNN + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + +tasks : + defaults : plan/defaults/tasks_keras.yaml + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml + # To use different Compression Pipeline, uncomment the following lines + # template : openfl.pipelines.KCPipeline + # settings : + # n_clusters : 6 diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt new file mode 100644 index 0000000000..5fa9907811 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt @@ -0,0 +1,3 @@ +keras==3.6.0 +tensorflow==2.18.0 + diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py new file mode 100644 index 0000000000..f1410b1298 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py @@ -0,0 +1,3 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py new file mode 100644 index 0000000000..040e8091c9 --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py @@ -0,0 +1,47 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from openfl.federated import KerasDataLoader +from .mnist_utils import load_mnist_shard + + +class KerasMNISTInMemory(KerasDataLoader): + """Data Loader for MNIST Dataset.""" + + def __init__(self, data_path, batch_size, **kwargs): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + super().__init__(batch_size, **kwargs) + + # TODO: We should be downloading the dataset shard into a directory + # TODO: There needs to be a method to ask how many collaborators and + # what index/rank is this collaborator. + # Then we have a way to automatically shard based on rank and size of + # collaborator list. + try: + int(data_path) + except: + raise ValueError( + "Expected `%s` to be representable as `int`, as it refers to the data shard " + + "number used by the collaborator.", + data_path + ) + + _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( + shard_num=int(data_path), **kwargs + ) + + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + + self.num_classes = num_classes diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py new file mode 100644 index 0000000000..d19e13d9dd --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py @@ -0,0 +1,118 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from logging import getLogger + +import numpy as np +from tensorflow.python.keras.utils.data_utils import get_file + +logger = getLogger(__name__) + + +def one_hot(labels, classes): + """ + One Hot encode a vector. + + Args: + labels (list): List of labels to onehot encode + classes (int): Total number of categorical classes + + Returns: + np.array: Matrix of one-hot encoded labels + """ + return np.eye(classes)[labels] + + +def _load_raw_datashards(shard_num, collaborator_count): + """ + Load the raw data by shard. + + Returns tuples of the dataset shard divided into training and validation. + + Args: + shard_num (int): The shard number to use + collaborator_count (int): The number of collaborators in the federation + + Returns: + 2 tuples: (image, label) of the training, validation dataset + """ + origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + path = get_file('mnist.npz', + origin=origin_folder + 'mnist.npz', + file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') + + with np.load(path) as f: + # get all of mnist + X_train_tot = f['x_train'] + y_train_tot = f['y_train'] + + X_valid_tot = f['x_test'] + y_valid_tot = f['y_test'] + + # create the shards + shard_num = int(shard_num) + X_train = X_train_tot[shard_num::collaborator_count] + y_train = y_train_tot[shard_num::collaborator_count] + + X_valid = X_valid_tot[shard_num::collaborator_count] + y_valid = y_valid_tot[shard_num::collaborator_count] + + return (X_train, y_train), (X_valid, y_valid) + + +def load_mnist_shard(shard_num, collaborator_count, categorical=True, + channels_last=True, **kwargs): + """ + Load the MNIST dataset. + + Args: + shard_num (int): The shard to use from the dataset + collaborator_count (int): The number of collaborators in the federation + categorical (bool): True = convert the labels to one-hot encoded + vectors (Default = True) + channels_last (bool): True = The input images have the channels + last (Default = True) + **kwargs: Additional parameters to pass to the function + + Returns: + list: The input shape + int: The number of classes + numpy.ndarray: The training data + numpy.ndarray: The training labels + numpy.ndarray: The validation data + numpy.ndarray: The validation labels + """ + img_rows, img_cols = 28, 28 + num_classes = 10 + + (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( + shard_num, collaborator_count + ) + + if channels_last: + X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) + X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + else: + X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) + X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + + X_train = X_train.astype('float32') + X_valid = X_valid.astype('float32') + X_train /= 255 + X_valid /= 255 + + logger.info(f'MNIST > X_train Shape : {X_train.shape}') + logger.info(f'MNIST > y_train Shape : {y_train.shape}') + logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') + logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') + + if categorical: + # convert class vectors to binary class matrices + y_train = one_hot(y_train, num_classes) + y_valid = one_hot(y_valid, num_classes) + + return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py new file mode 100644 index 0000000000..165861033c --- /dev/null +++ b/openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py @@ -0,0 +1,78 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from keras.models import Sequential +from keras.layers import Conv2D +from keras.layers import Dense +from keras.layers import Flatten + +from openfl.federated import KerasTaskRunner + + +class KerasCNN(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, **kwargs): + """ + Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') + + def build_model(self, + input_shape, + num_classes, + conv_kernel_size=(4, 4), + conv_strides=(2, 2), + conv1_channels_out=16, + conv2_channels_out=32, + final_dense_inputsize=100, + **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + + Returns: + keras.models.Sequential: The model defined in Keras + + """ + model = Sequential() + + model.add(Conv2D(conv1_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu', + input_shape=input_shape)) + + model.add(Conv2D(conv2_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu')) + + model.add(Flatten()) + + model.add(Dense(final_dense_inputsize, activation='relu')) + + model.add(Dense(num_classes, activation='softmax')) + + model.compile(loss="categorical_crossentropy", + optimizer="adam", + metrics=["accuracy"]) + + return model From 4850493b433c2487fc479450b01096d7bca81789 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 23 Dec 2024 03:28:02 -0800 Subject: [PATCH 02/13] code changes Signed-off-by: yes --- .../keras_jax/{keras_cnn_mnist => cnn_mnist}/.workspace | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/defaults | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/requirements.txt | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/src/__init__.py | 0 .../keras_jax/{keras_cnn_mnist => cnn_mnist}/src/dataloader.py | 0 .../keras_cnn_mnist => keras_jax/cnn_mnist}/src/mnist_utils.py | 2 +- .../keras_jax/{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py | 0 openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/.workspace | 0 openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/README.md | 0 .../keras_tf/{keras_2dunet => 2dunet}/plan/cols.yaml | 0 .../keras_tf/{keras_2dunet => 2dunet}/plan/data.yaml | 0 .../keras_tf/{keras_2dunet => 2dunet}/plan/defaults | 0 .../keras_tf/{keras_2dunet => 2dunet}/plan/plan.yaml | 0 .../keras_tf/{keras_2dunet => 2dunet}/requirements.txt | 0 .../keras_tf/{keras_2dunet => 2dunet}/src/__init__.py | 0 .../keras_tf/{keras_2dunet => 2dunet}/src/brats_utils.py | 0 .../keras_tf/{keras_2dunet => 2dunet}/src/dataloader.py | 0 .../keras_tf/{keras_2dunet => 2dunet}/src/nii_reader.py | 0 .../keras_tf/{keras_2dunet => 2dunet}/src/taskrunner.py | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/.workspace | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/defaults | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/requirements.txt | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/src/__init__.py | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/src/dataloader.py | 0 .../keras_cnn_mnist => keras_tf/cnn_mnist}/src/mnist_utils.py | 0 .../keras_tf/{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/.workspace | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/cols.yaml | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/data.yaml | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/plan.yaml | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/requirements.txt | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/src/__init__.py | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/src/dataloader.py | 0 .../keras_tf/{keras_nlp => nlp}/src/dataloader_utils.py | 0 openfl-workspace/keras_tf/{keras_nlp => nlp}/src/taskrunner.py | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/.workspace | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/defaults | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/requirements.txt | 0 .../keras_torch/{keras_cnn_mnist => cnn_mnist}/src/__init__.py | 0 .../{keras_cnn_mnist => cnn_mnist}/src/dataloader.py | 0 .../cnn_mnist}/src/mnist_utils.py | 0 .../{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py | 0 51 files changed, 1 insertion(+), 1 deletion(-) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/.workspace (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/defaults (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/requirements.txt (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/src/__init__.py (100%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/src/dataloader.py (100%) rename openfl-workspace/{keras_torch/keras_cnn_mnist => keras_jax/cnn_mnist}/src/mnist_utils.py (98%) rename openfl-workspace/keras_jax/{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/.workspace (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/README.md (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/plan/cols.yaml (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/plan/data.yaml (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/plan/defaults (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/plan/plan.yaml (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/requirements.txt (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/src/__init__.py (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/src/brats_utils.py (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/src/dataloader.py (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/src/nii_reader.py (100%) rename openfl-workspace/keras_tf/{keras_2dunet => 2dunet}/src/taskrunner.py (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/.workspace (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/defaults (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/requirements.txt (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/src/__init__.py (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/src/dataloader.py (100%) rename openfl-workspace/{keras_jax/keras_cnn_mnist => keras_tf/cnn_mnist}/src/mnist_utils.py (100%) rename openfl-workspace/keras_tf/{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/.workspace (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/cols.yaml (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/data.yaml (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/plan/plan.yaml (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/requirements.txt (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/src/__init__.py (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/src/dataloader.py (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/src/dataloader_utils.py (100%) rename openfl-workspace/keras_tf/{keras_nlp => nlp}/src/taskrunner.py (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/.workspace (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/cols.yaml (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/data.yaml (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/defaults (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/plan/plan.yaml (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/requirements.txt (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/src/__init__.py (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/src/dataloader.py (100%) rename openfl-workspace/{keras_tf/keras_cnn_mnist => keras_torch/cnn_mnist}/src/mnist_utils.py (100%) rename openfl-workspace/keras_torch/{keras_cnn_mnist => cnn_mnist}/src/taskrunner.py (100%) diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/.workspace b/openfl-workspace/keras_jax/cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/.workspace rename to openfl-workspace/keras_jax/cnn_mnist/.workspace diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_jax/cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras_jax/cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/plan/data.yaml b/openfl-workspace/keras_jax/cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/plan/data.yaml rename to openfl-workspace/keras_jax/cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_jax/cnn_mnist/plan/defaults similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/plan/defaults rename to openfl-workspace/keras_jax/cnn_mnist/plan/defaults diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_jax/cnn_mnist/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras_jax/cnn_mnist/plan/plan.yaml diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_jax/cnn_mnist/requirements.txt similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/requirements.txt rename to openfl-workspace/keras_jax/cnn_mnist/requirements.txt diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_jax/cnn_mnist/src/__init__.py similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/src/__init__.py rename to openfl-workspace/keras_jax/cnn_mnist/src/__init__.py diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_jax/cnn_mnist/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/src/dataloader.py rename to openfl-workspace/keras_jax/cnn_mnist/src/dataloader.py diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py similarity index 98% rename from openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py rename to openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py index d19e13d9dd..87c440f272 100644 --- a/openfl-workspace/keras_torch/keras_cnn_mnist/src/mnist_utils.py +++ b/openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py @@ -6,7 +6,7 @@ from logging import getLogger import numpy as np -from tensorflow.python.keras.utils.data_utils import get_file +from tensorflow.python.keras.utils.data_utils import get_file # modify this logger = getLogger(__name__) diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_jax/cnn_mnist/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/src/taskrunner.py rename to openfl-workspace/keras_jax/cnn_mnist/src/taskrunner.py diff --git a/openfl-workspace/keras_tf/keras_2dunet/.workspace b/openfl-workspace/keras_tf/2dunet/.workspace similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/.workspace rename to openfl-workspace/keras_tf/2dunet/.workspace diff --git a/openfl-workspace/keras_tf/keras_2dunet/README.md b/openfl-workspace/keras_tf/2dunet/README.md similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/README.md rename to openfl-workspace/keras_tf/2dunet/README.md diff --git a/openfl-workspace/keras_tf/keras_2dunet/plan/cols.yaml b/openfl-workspace/keras_tf/2dunet/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/plan/cols.yaml rename to openfl-workspace/keras_tf/2dunet/plan/cols.yaml diff --git a/openfl-workspace/keras_tf/keras_2dunet/plan/data.yaml b/openfl-workspace/keras_tf/2dunet/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/plan/data.yaml rename to openfl-workspace/keras_tf/2dunet/plan/data.yaml diff --git a/openfl-workspace/keras_tf/keras_2dunet/plan/defaults b/openfl-workspace/keras_tf/2dunet/plan/defaults similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/plan/defaults rename to openfl-workspace/keras_tf/2dunet/plan/defaults diff --git a/openfl-workspace/keras_tf/keras_2dunet/plan/plan.yaml b/openfl-workspace/keras_tf/2dunet/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/plan/plan.yaml rename to openfl-workspace/keras_tf/2dunet/plan/plan.yaml diff --git a/openfl-workspace/keras_tf/keras_2dunet/requirements.txt b/openfl-workspace/keras_tf/2dunet/requirements.txt similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/requirements.txt rename to openfl-workspace/keras_tf/2dunet/requirements.txt diff --git a/openfl-workspace/keras_tf/keras_2dunet/src/__init__.py b/openfl-workspace/keras_tf/2dunet/src/__init__.py similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/src/__init__.py rename to openfl-workspace/keras_tf/2dunet/src/__init__.py diff --git a/openfl-workspace/keras_tf/keras_2dunet/src/brats_utils.py b/openfl-workspace/keras_tf/2dunet/src/brats_utils.py similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/src/brats_utils.py rename to openfl-workspace/keras_tf/2dunet/src/brats_utils.py diff --git a/openfl-workspace/keras_tf/keras_2dunet/src/dataloader.py b/openfl-workspace/keras_tf/2dunet/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/src/dataloader.py rename to openfl-workspace/keras_tf/2dunet/src/dataloader.py diff --git a/openfl-workspace/keras_tf/keras_2dunet/src/nii_reader.py b/openfl-workspace/keras_tf/2dunet/src/nii_reader.py similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/src/nii_reader.py rename to openfl-workspace/keras_tf/2dunet/src/nii_reader.py diff --git a/openfl-workspace/keras_tf/keras_2dunet/src/taskrunner.py b/openfl-workspace/keras_tf/2dunet/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_tf/keras_2dunet/src/taskrunner.py rename to openfl-workspace/keras_tf/2dunet/src/taskrunner.py diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/.workspace b/openfl-workspace/keras_tf/cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/.workspace rename to openfl-workspace/keras_tf/cnn_mnist/.workspace diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_tf/cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras_tf/cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/data.yaml b/openfl-workspace/keras_tf/cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/plan/data.yaml rename to openfl-workspace/keras_tf/cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_tf/cnn_mnist/plan/defaults similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/plan/defaults rename to openfl-workspace/keras_tf/cnn_mnist/plan/defaults diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_tf/cnn_mnist/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras_tf/cnn_mnist/plan/plan.yaml diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_tf/cnn_mnist/requirements.txt similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/requirements.txt rename to openfl-workspace/keras_tf/cnn_mnist/requirements.txt diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_tf/cnn_mnist/src/__init__.py similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/src/__init__.py rename to openfl-workspace/keras_tf/cnn_mnist/src/__init__.py diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/src/dataloader.py rename to openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py diff --git a/openfl-workspace/keras_jax/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_tf/cnn_mnist/src/mnist_utils.py similarity index 100% rename from openfl-workspace/keras_jax/keras_cnn_mnist/src/mnist_utils.py rename to openfl-workspace/keras_tf/cnn_mnist/src/mnist_utils.py diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/src/taskrunner.py rename to openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py diff --git a/openfl-workspace/keras_tf/keras_nlp/.workspace b/openfl-workspace/keras_tf/nlp/.workspace similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/.workspace rename to openfl-workspace/keras_tf/nlp/.workspace diff --git a/openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml b/openfl-workspace/keras_tf/nlp/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/plan/cols.yaml rename to openfl-workspace/keras_tf/nlp/plan/cols.yaml diff --git a/openfl-workspace/keras_tf/keras_nlp/plan/data.yaml b/openfl-workspace/keras_tf/nlp/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/plan/data.yaml rename to openfl-workspace/keras_tf/nlp/plan/data.yaml diff --git a/openfl-workspace/keras_tf/keras_nlp/plan/plan.yaml b/openfl-workspace/keras_tf/nlp/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/plan/plan.yaml rename to openfl-workspace/keras_tf/nlp/plan/plan.yaml diff --git a/openfl-workspace/keras_tf/keras_nlp/requirements.txt b/openfl-workspace/keras_tf/nlp/requirements.txt similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/requirements.txt rename to openfl-workspace/keras_tf/nlp/requirements.txt diff --git a/openfl-workspace/keras_tf/keras_nlp/src/__init__.py b/openfl-workspace/keras_tf/nlp/src/__init__.py similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/src/__init__.py rename to openfl-workspace/keras_tf/nlp/src/__init__.py diff --git a/openfl-workspace/keras_tf/keras_nlp/src/dataloader.py b/openfl-workspace/keras_tf/nlp/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/src/dataloader.py rename to openfl-workspace/keras_tf/nlp/src/dataloader.py diff --git a/openfl-workspace/keras_tf/keras_nlp/src/dataloader_utils.py b/openfl-workspace/keras_tf/nlp/src/dataloader_utils.py similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/src/dataloader_utils.py rename to openfl-workspace/keras_tf/nlp/src/dataloader_utils.py diff --git a/openfl-workspace/keras_tf/keras_nlp/src/taskrunner.py b/openfl-workspace/keras_tf/nlp/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_tf/keras_nlp/src/taskrunner.py rename to openfl-workspace/keras_tf/nlp/src/taskrunner.py diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/.workspace b/openfl-workspace/keras_torch/cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/.workspace rename to openfl-workspace/keras_torch/cnn_mnist/.workspace diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_torch/cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras_torch/cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml b/openfl-workspace/keras_torch/cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/plan/data.yaml rename to openfl-workspace/keras_torch/cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults b/openfl-workspace/keras_torch/cnn_mnist/plan/defaults similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/plan/defaults rename to openfl-workspace/keras_torch/cnn_mnist/plan/defaults diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_torch/cnn_mnist/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras_torch/cnn_mnist/plan/plan.yaml diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_torch/cnn_mnist/requirements.txt similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/requirements.txt rename to openfl-workspace/keras_torch/cnn_mnist/requirements.txt diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py b/openfl-workspace/keras_torch/cnn_mnist/src/__init__.py similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/src/__init__.py rename to openfl-workspace/keras_torch/cnn_mnist/src/__init__.py diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py b/openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/src/dataloader.py rename to openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py diff --git a/openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py similarity index 100% rename from openfl-workspace/keras_tf/keras_cnn_mnist/src/mnist_utils.py rename to openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py diff --git a/openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_torch/keras_cnn_mnist/src/taskrunner.py rename to openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py From 9cdbdf12609c4fe2c8c4695d260f81ad1024380c Mon Sep 17 00:00:00 2001 From: yes Date: Sun, 5 Jan 2025 03:43:05 -0800 Subject: [PATCH 03/13] code changes Signed-off-by: yes --- .../cnn_mnist => keras/2dunet}/.workspace | 0 .../{keras_tf => keras}/2dunet/README.md | 0 .../{keras_tf => keras}/2dunet/plan/cols.yaml | 0 .../{keras_tf => keras}/2dunet/plan/data.yaml | 0 .../cnn_mnist => keras/2dunet}/plan/defaults | 0 .../{keras_tf => keras}/2dunet/plan/plan.yaml | 0 .../2dunet/requirements.txt | 0 .../2dunet}/src/__init__.py | 0 .../2dunet/src/brats_utils.py | 0 .../2dunet/src/dataloader.py | 0 .../2dunet/src/nii_reader.py | 0 .../2dunet/src/taskrunner.py | 0 .../2dunet => keras/cnn_mnist}/.workspace | 0 .../cnn_mnist/plan/cols.yaml | 0 .../cnn_mnist/plan/data.yaml | 0 .../2dunet => keras/cnn_mnist}/plan/defaults | 0 .../cnn_mnist/plan/plan.yaml | 0 .../keras/cnn_mnist/requirements.txt | 2 + .../cnn_mnist}/src/__init__.py | 0 .../cnn_mnist/src/dataloader.py | 0 .../cnn_mnist/src/mnist_utils.py | 0 .../cnn_mnist/src/taskrunner.py | 0 .../cnn_mnist => keras/nlp}/.workspace | 0 .../cnn_mnist => keras/nlp}/plan/cols.yaml | 0 .../cnn_mnist => keras/nlp}/plan/data.yaml | 0 .../cnn_mnist => keras/nlp}/plan/plan.yaml | 16 +- .../cnn_mnist => keras/nlp}/requirements.txt | 1 - openfl-workspace/keras/nlp/src/__init__.py | 4 + openfl-workspace/keras/nlp/src/dataloader.py | 142 +++++++++++ .../keras/nlp/src/dataloader_utils.py | 230 ++++++++++++++++++ openfl-workspace/keras/nlp/src/taskrunner.py | 72 ++++++ .../keras_jax/cnn_mnist/requirements.txt | 2 - .../keras_jax/cnn_mnist/src/mnist_utils.py | 118 --------- .../cnn_mnist => keras_jax/nlp}/.workspace | 0 .../nlp}/plan/cols.yaml | 0 .../nlp}/plan/data.yaml | 0 .../nlp}/plan/plan.yaml | 16 +- .../keras_jax/nlp/requirements.txt | 2 + .../keras_jax/nlp/src/__init__.py | 4 + .../keras_jax/nlp/src/dataloader.py | 142 +++++++++++ .../keras_jax/nlp/src/dataloader_utils.py | 230 ++++++++++++++++++ .../keras_jax/nlp/src/taskrunner.py | 74 ++++++ .../keras_tf/cnn_mnist/plan/defaults | 2 - .../keras_tf/cnn_mnist/src/__init__.py | 3 - .../keras_tf/cnn_mnist/src/dataloader.py | 47 ---- .../keras_tf/cnn_mnist/src/taskrunner.py | 78 ------ .../keras_tf/nlp/requirements.txt | 2 +- .../keras_tf/nlp/src/taskrunner.py | 2 + .../keras_torch/cnn_mnist/plan/defaults | 2 - .../keras_torch/cnn_mnist/requirements.txt | 3 - .../keras_torch/cnn_mnist/src/__init__.py | 3 - .../keras_torch/cnn_mnist/src/dataloader.py | 47 ---- .../keras_torch/cnn_mnist/src/mnist_utils.py | 118 --------- .../keras_torch/cnn_mnist/src/taskrunner.py | 78 ------ openfl-workspace/keras_torch/nlp/.workspace | 2 + .../keras_torch/nlp/plan/cols.yaml | 5 + .../keras_torch/nlp/plan/data.yaml | 7 + .../keras_torch/nlp/plan/plan.yaml | 46 ++++ .../keras_torch/nlp/requirements.txt | 2 + .../keras_torch/nlp/src/__init__.py | 4 + .../keras_torch/nlp/src/dataloader.py | 142 +++++++++++ .../keras_torch/nlp/src/dataloader_utils.py | 230 ++++++++++++++++++ .../keras_torch/nlp/src/taskrunner.py | 74 ++++++ openfl/federated/task/runner_keras.py | 3 + 64 files changed, 1436 insertions(+), 519 deletions(-) rename openfl-workspace/{keras_jax/cnn_mnist => keras/2dunet}/.workspace (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/README.md (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/plan/cols.yaml (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/plan/data.yaml (100%) rename openfl-workspace/{keras_jax/cnn_mnist => keras/2dunet}/plan/defaults (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/plan/plan.yaml (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/requirements.txt (100%) rename openfl-workspace/{keras_jax/cnn_mnist => keras/2dunet}/src/__init__.py (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/src/brats_utils.py (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/src/dataloader.py (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/src/nii_reader.py (100%) rename openfl-workspace/{keras_tf => keras}/2dunet/src/taskrunner.py (100%) rename openfl-workspace/{keras_tf/2dunet => keras/cnn_mnist}/.workspace (100%) rename openfl-workspace/{keras_jax => keras}/cnn_mnist/plan/cols.yaml (100%) rename openfl-workspace/{keras_jax => keras}/cnn_mnist/plan/data.yaml (100%) rename openfl-workspace/{keras_tf/2dunet => keras/cnn_mnist}/plan/defaults (100%) rename openfl-workspace/{keras_jax => keras}/cnn_mnist/plan/plan.yaml (100%) create mode 100644 openfl-workspace/keras/cnn_mnist/requirements.txt rename openfl-workspace/{keras_tf/2dunet => keras/cnn_mnist}/src/__init__.py (100%) rename openfl-workspace/{keras_jax => keras}/cnn_mnist/src/dataloader.py (100%) rename openfl-workspace/{keras_tf => keras}/cnn_mnist/src/mnist_utils.py (100%) rename openfl-workspace/{keras_jax => keras}/cnn_mnist/src/taskrunner.py (100%) rename openfl-workspace/{keras_tf/cnn_mnist => keras/nlp}/.workspace (100%) rename openfl-workspace/{keras_tf/cnn_mnist => keras/nlp}/plan/cols.yaml (100%) rename openfl-workspace/{keras_tf/cnn_mnist => keras/nlp}/plan/data.yaml (100%) rename openfl-workspace/{keras_torch/cnn_mnist => keras/nlp}/plan/plan.yaml (76%) rename openfl-workspace/{keras_tf/cnn_mnist => keras/nlp}/requirements.txt (96%) create mode 100644 openfl-workspace/keras/nlp/src/__init__.py create mode 100644 openfl-workspace/keras/nlp/src/dataloader.py create mode 100644 openfl-workspace/keras/nlp/src/dataloader_utils.py create mode 100644 openfl-workspace/keras/nlp/src/taskrunner.py delete mode 100644 openfl-workspace/keras_jax/cnn_mnist/requirements.txt delete mode 100644 openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py rename openfl-workspace/{keras_torch/cnn_mnist => keras_jax/nlp}/.workspace (100%) rename openfl-workspace/{keras_torch/cnn_mnist => keras_jax/nlp}/plan/cols.yaml (100%) rename openfl-workspace/{keras_torch/cnn_mnist => keras_jax/nlp}/plan/data.yaml (100%) rename openfl-workspace/{keras_tf/cnn_mnist => keras_jax/nlp}/plan/plan.yaml (76%) create mode 100644 openfl-workspace/keras_jax/nlp/requirements.txt create mode 100644 openfl-workspace/keras_jax/nlp/src/__init__.py create mode 100644 openfl-workspace/keras_jax/nlp/src/dataloader.py create mode 100644 openfl-workspace/keras_jax/nlp/src/dataloader_utils.py create mode 100644 openfl-workspace/keras_jax/nlp/src/taskrunner.py delete mode 100644 openfl-workspace/keras_tf/cnn_mnist/plan/defaults delete mode 100644 openfl-workspace/keras_tf/cnn_mnist/src/__init__.py delete mode 100644 openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py delete mode 100644 openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/plan/defaults delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/requirements.txt delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/src/__init__.py delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py delete mode 100644 openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py create mode 100644 openfl-workspace/keras_torch/nlp/.workspace create mode 100644 openfl-workspace/keras_torch/nlp/plan/cols.yaml create mode 100644 openfl-workspace/keras_torch/nlp/plan/data.yaml create mode 100644 openfl-workspace/keras_torch/nlp/plan/plan.yaml create mode 100644 openfl-workspace/keras_torch/nlp/requirements.txt create mode 100644 openfl-workspace/keras_torch/nlp/src/__init__.py create mode 100644 openfl-workspace/keras_torch/nlp/src/dataloader.py create mode 100644 openfl-workspace/keras_torch/nlp/src/dataloader_utils.py create mode 100644 openfl-workspace/keras_torch/nlp/src/taskrunner.py diff --git a/openfl-workspace/keras_jax/cnn_mnist/.workspace b/openfl-workspace/keras/2dunet/.workspace similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/.workspace rename to openfl-workspace/keras/2dunet/.workspace diff --git a/openfl-workspace/keras_tf/2dunet/README.md b/openfl-workspace/keras/2dunet/README.md similarity index 100% rename from openfl-workspace/keras_tf/2dunet/README.md rename to openfl-workspace/keras/2dunet/README.md diff --git a/openfl-workspace/keras_tf/2dunet/plan/cols.yaml b/openfl-workspace/keras/2dunet/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_tf/2dunet/plan/cols.yaml rename to openfl-workspace/keras/2dunet/plan/cols.yaml diff --git a/openfl-workspace/keras_tf/2dunet/plan/data.yaml b/openfl-workspace/keras/2dunet/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_tf/2dunet/plan/data.yaml rename to openfl-workspace/keras/2dunet/plan/data.yaml diff --git a/openfl-workspace/keras_jax/cnn_mnist/plan/defaults b/openfl-workspace/keras/2dunet/plan/defaults similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/plan/defaults rename to openfl-workspace/keras/2dunet/plan/defaults diff --git a/openfl-workspace/keras_tf/2dunet/plan/plan.yaml b/openfl-workspace/keras/2dunet/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_tf/2dunet/plan/plan.yaml rename to openfl-workspace/keras/2dunet/plan/plan.yaml diff --git a/openfl-workspace/keras_tf/2dunet/requirements.txt b/openfl-workspace/keras/2dunet/requirements.txt similarity index 100% rename from openfl-workspace/keras_tf/2dunet/requirements.txt rename to openfl-workspace/keras/2dunet/requirements.txt diff --git a/openfl-workspace/keras_jax/cnn_mnist/src/__init__.py b/openfl-workspace/keras/2dunet/src/__init__.py similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/src/__init__.py rename to openfl-workspace/keras/2dunet/src/__init__.py diff --git a/openfl-workspace/keras_tf/2dunet/src/brats_utils.py b/openfl-workspace/keras/2dunet/src/brats_utils.py similarity index 100% rename from openfl-workspace/keras_tf/2dunet/src/brats_utils.py rename to openfl-workspace/keras/2dunet/src/brats_utils.py diff --git a/openfl-workspace/keras_tf/2dunet/src/dataloader.py b/openfl-workspace/keras/2dunet/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_tf/2dunet/src/dataloader.py rename to openfl-workspace/keras/2dunet/src/dataloader.py diff --git a/openfl-workspace/keras_tf/2dunet/src/nii_reader.py b/openfl-workspace/keras/2dunet/src/nii_reader.py similarity index 100% rename from openfl-workspace/keras_tf/2dunet/src/nii_reader.py rename to openfl-workspace/keras/2dunet/src/nii_reader.py diff --git a/openfl-workspace/keras_tf/2dunet/src/taskrunner.py b/openfl-workspace/keras/2dunet/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_tf/2dunet/src/taskrunner.py rename to openfl-workspace/keras/2dunet/src/taskrunner.py diff --git a/openfl-workspace/keras_tf/2dunet/.workspace b/openfl-workspace/keras/cnn_mnist/.workspace similarity index 100% rename from openfl-workspace/keras_tf/2dunet/.workspace rename to openfl-workspace/keras/cnn_mnist/.workspace diff --git a/openfl-workspace/keras_jax/cnn_mnist/plan/cols.yaml b/openfl-workspace/keras/cnn_mnist/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras/cnn_mnist/plan/cols.yaml diff --git a/openfl-workspace/keras_jax/cnn_mnist/plan/data.yaml b/openfl-workspace/keras/cnn_mnist/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/plan/data.yaml rename to openfl-workspace/keras/cnn_mnist/plan/data.yaml diff --git a/openfl-workspace/keras_tf/2dunet/plan/defaults b/openfl-workspace/keras/cnn_mnist/plan/defaults similarity index 100% rename from openfl-workspace/keras_tf/2dunet/plan/defaults rename to openfl-workspace/keras/cnn_mnist/plan/defaults diff --git a/openfl-workspace/keras_jax/cnn_mnist/plan/plan.yaml b/openfl-workspace/keras/cnn_mnist/plan/plan.yaml similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras/cnn_mnist/plan/plan.yaml diff --git a/openfl-workspace/keras/cnn_mnist/requirements.txt b/openfl-workspace/keras/cnn_mnist/requirements.txt new file mode 100644 index 0000000000..34a7b94009 --- /dev/null +++ b/openfl-workspace/keras/cnn_mnist/requirements.txt @@ -0,0 +1,2 @@ +keras==3.6.0 +tensorflow==2.18.0 \ No newline at end of file diff --git a/openfl-workspace/keras_tf/2dunet/src/__init__.py b/openfl-workspace/keras/cnn_mnist/src/__init__.py similarity index 100% rename from openfl-workspace/keras_tf/2dunet/src/__init__.py rename to openfl-workspace/keras/cnn_mnist/src/__init__.py diff --git a/openfl-workspace/keras_jax/cnn_mnist/src/dataloader.py b/openfl-workspace/keras/cnn_mnist/src/dataloader.py similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/src/dataloader.py rename to openfl-workspace/keras/cnn_mnist/src/dataloader.py diff --git a/openfl-workspace/keras_tf/cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras/cnn_mnist/src/mnist_utils.py similarity index 100% rename from openfl-workspace/keras_tf/cnn_mnist/src/mnist_utils.py rename to openfl-workspace/keras/cnn_mnist/src/mnist_utils.py diff --git a/openfl-workspace/keras_jax/cnn_mnist/src/taskrunner.py b/openfl-workspace/keras/cnn_mnist/src/taskrunner.py similarity index 100% rename from openfl-workspace/keras_jax/cnn_mnist/src/taskrunner.py rename to openfl-workspace/keras/cnn_mnist/src/taskrunner.py diff --git a/openfl-workspace/keras_tf/cnn_mnist/.workspace b/openfl-workspace/keras/nlp/.workspace similarity index 100% rename from openfl-workspace/keras_tf/cnn_mnist/.workspace rename to openfl-workspace/keras/nlp/.workspace diff --git a/openfl-workspace/keras_tf/cnn_mnist/plan/cols.yaml b/openfl-workspace/keras/nlp/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_tf/cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras/nlp/plan/cols.yaml diff --git a/openfl-workspace/keras_tf/cnn_mnist/plan/data.yaml b/openfl-workspace/keras/nlp/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_tf/cnn_mnist/plan/data.yaml rename to openfl-workspace/keras/nlp/plan/data.yaml diff --git a/openfl-workspace/keras_torch/cnn_mnist/plan/plan.yaml b/openfl-workspace/keras/nlp/plan/plan.yaml similarity index 76% rename from openfl-workspace/keras_torch/cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras/nlp/plan/plan.yaml index 54867f4578..ce7476ab85 100644 --- a/openfl-workspace/keras_torch/cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/keras/nlp/plan/plan.yaml @@ -14,20 +14,24 @@ collaborator : defaults : plan/defaults/collaborator.yaml template : openfl.component.Collaborator settings : + db_store_rounds: 2 delta_updates : false opt_treatment : RESET data_loader : defaults : plan/defaults/data_loader.yaml - template : src.dataloader.KerasMNISTInMemory + template : src.dataloader.NLPDataLoader settings : collaborator_count : 2 - data_group_name : mnist - batch_size : 256 + batch_size : 64 + split_ratio: 0.2 + num_samples: 10000 task_runner : defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.KerasCNN + template : src.taskrunner.KerasNLP + settings : + latent_dim : 256 network : defaults : plan/defaults/network.yaml @@ -40,7 +44,3 @@ tasks : compression_pipeline : defaults : plan/defaults/compression_pipeline.yaml - # To use different Compression Pipeline, uncomment the following lines - # template : openfl.pipelines.KCPipeline - # settings : - # n_clusters : 6 diff --git a/openfl-workspace/keras_tf/cnn_mnist/requirements.txt b/openfl-workspace/keras/nlp/requirements.txt similarity index 96% rename from openfl-workspace/keras_tf/cnn_mnist/requirements.txt rename to openfl-workspace/keras/nlp/requirements.txt index 5fa9907811..6bba4a277d 100644 --- a/openfl-workspace/keras_tf/cnn_mnist/requirements.txt +++ b/openfl-workspace/keras/nlp/requirements.txt @@ -1,3 +1,2 @@ keras==3.6.0 tensorflow==2.18.0 - diff --git a/openfl-workspace/keras/nlp/src/__init__.py b/openfl-workspace/keras/nlp/src/__init__.py new file mode 100644 index 0000000000..8201974015 --- /dev/null +++ b/openfl-workspace/keras/nlp/src/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2021-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""openfl nlp keras template.""" diff --git a/openfl-workspace/keras/nlp/src/dataloader.py b/openfl-workspace/keras/nlp/src/dataloader.py new file mode 100644 index 0000000000..d7180b4d3c --- /dev/null +++ b/openfl-workspace/keras/nlp/src/dataloader.py @@ -0,0 +1,142 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from typing import Optional +from typing import Iterator +from typing import Tuple +from typing import Union + +import numpy as np +import src.dataloader_utils as dlu + +from openfl.federated import KerasDataLoader + +logger = getLogger(__name__) + + +class NLPDataLoader(KerasDataLoader): + """NLP Dataloader template.""" + + def __init__(self, collaborator_count: int, split_ratio: float, + num_samples: int, data_path: str, batch_size: int, **kwargs) -> None: + """Instantiate the data object. + + Args: + data_path: The file path to the data Returns: + batch_size: The batch size of the data loader tuple: shape of an example feature array + **kwargs: Additional arguments, passed to super init and load_mnist_shard + + Returns: + none + """ + self.shard_num = data_path + self.data_path = dlu.download_data_() + + self.batch_size = batch_size + + train, valid, details = dlu.load_shard(collaborator_count, self.shard_num, + self.data_path, num_samples, split_ratio) + + self.num_samples = details['num_samples'] + self.num_encoder_tokens = details['num_encoder_tokens'] + self.num_decoder_tokens = details['num_decoder_tokens'] + self.max_encoder_seq_length = details['max_encoder_seq_length'] + self.max_decoder_seq_length = details['max_decoder_seq_length'] + + self.X_train = [train[0], train[1]] + self.y_train = train[2] + self.X_valid = [valid[0], valid[1]] + self.y_valid = valid[2] + + def get_feature_shape(self) -> Tuple[int, ...]: + """Get the shape of an example feature array.""" + return self.X_train[0].shape + + def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self._get_batch_generator(X1=self.X_train[0], X2=self.X_train[1], + y=self.y_train, batch_size=batch_size) + + def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get validation data loader. + + Returns: + loader object + """ + return self._get_batch_generator(X1=self.X_valid[0], X2=self.X_valid[1], + y=self.y_valid, batch_size=batch_size) + + def get_train_data_size(self) -> int: + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.X_train[0].shape[0] + + def get_valid_data_size(self) -> int: + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.X_valid[0].shape[0] + + @staticmethod + def _batch_generator(X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, idxs: np.ndarray, + batch_size: int, + num_batches: int) -> Iterator[Tuple[np.ndarray]]: + """ + Generate batch of data. + + Args: + X: input data + y: label data + idxs: The index of the dataset + batch_size: The batch size for the data loader + num_batches: The number of batches + Yields: + tuple: input data, label data + """ + for i in range(num_batches): + a = i * batch_size + b = a + batch_size + yield (X1[idxs[a:b]], X2[idxs[a:b]]), y[idxs[a:b]] + + def _get_batch_generator(self, X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, + batch_size: Union[int, None]): + """ + Return the dataset generator. + + Args: + X1: input data (encoder) + X2: input data (decoder) + y: label data + batch_size: The batch size for the data loader + """ + if batch_size is None: + batch_size = self.batch_size + # shuffle data indices + idxs = np.random.permutation(np.arange(X1.shape[0])) + # compute the number of batches + num_batches = int(np.ceil(X1.shape[0] / batch_size)) + # build the generator and return it + # TODO: due to _batch_generator(X1, ...) has first param X1, all params here will be moved, + # X1 -> X2, X2 -> y, y -> idxs, idxs -> batch_size, batch_size -> num_batches, + # and num_batches -> should be unexpected in this function + return self._batch_generator(X1, X2, y, idxs, batch_size, num_batches) diff --git a/openfl-workspace/keras/nlp/src/dataloader_utils.py b/openfl-workspace/keras/nlp/src/dataloader_utils.py new file mode 100644 index 0000000000..6e86ee5dcb --- /dev/null +++ b/openfl-workspace/keras/nlp/src/dataloader_utils.py @@ -0,0 +1,230 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from os import getcwd +from os import path +from os import remove +from typing import Dict +from typing import Tuple +from zipfile import ZipFile + +import numpy as np +import requests + +logger = getLogger(__name__) + + +def download_data_() -> str: + """Download data. + + Returns: + string: relative path to data file + """ + pkg = 'fra-eng.zip' # Language file: change this to change the language + data_dir = 'data' + url = 'https://www.manythings.org/anki/' + pkg + filename = pkg.split('-')[0] + '.txt' + + workspace_dir = getcwd() + default_path = path.join(workspace_dir, data_dir) + pkgpath = path.join(default_path, pkg) # path to downloaded zipfile + filepath = path.join(default_path, filename) # path to extracted file + + if path.isfile(filepath): + return path.join(data_dir, filename) + try: + response = requests.get(url, headers={'User-Agent': 'openfl'}) + if response.status_code == 200: + with open(pkgpath, 'wb') as f: + f.write(response.content) + else: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + except Exception: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + + try: + with ZipFile(pkgpath, 'r') as z: + z.extract(filename, default_path) + except Exception: + print(f'Error while extracting {pkgpath}: Aborting!') + exit() + + if path.isfile(filepath): + remove(pkgpath) + return path.join(data_dir, filename) + else: + return '' + + +def import_raw_data_( + data_path: str = '', + num_samples: int = 0 +) -> Tuple[Dict[str, int], np.ndarray, np.ndarray, np.ndarray]: + """Import data. + + Returns: + dict: variable details + numpy.ndarray: encoder input data + numpy.ndarray: decoder input data + numpy.ndarray: decoder labels + """ + # Vectorize the data. + input_texts = [] + target_texts = [] + input_characters = set() + target_characters = set() + with open(data_path, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + for line in lines[: min(num_samples, len(lines) - 1)]: + input_text, target_text, _ = line.split('\t') + # We use 'tab' as the 'start sequence' character + # for the targets, and '\n' as 'end sequence' character. + target_text = '\t' + target_text + '\n' + input_texts.append(input_text) + target_texts.append(target_text) + for char in input_text: + if char not in input_characters: + input_characters.add(char) + for char in target_text: + if char not in target_characters: + target_characters.add(char) + + input_characters = sorted(input_characters) + target_characters = sorted(target_characters) + num_encoder_tokens = len(input_characters) + num_decoder_tokens = len(target_characters) + max_encoder_seq_length = max([len(txt) for txt in input_texts]) + max_decoder_seq_length = max([len(txt) for txt in target_texts]) + + details = {'num_samples': len(input_texts), + 'num_encoder_tokens': num_encoder_tokens, + 'num_decoder_tokens': num_decoder_tokens, + 'max_encoder_seq_length': max_encoder_seq_length, + 'max_decoder_seq_length': max_decoder_seq_length} + + input_token_index = {char: i for i, char in enumerate(input_characters)} + target_token_index = {char: i for i, char in enumerate(target_characters)} + + encoder_input_data = np.zeros( + (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') + + decoder_input_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + decoder_target_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): + for t, char in enumerate(input_text): + encoder_input_data[i, t, input_token_index[char]] = 1.0 + encoder_input_data[i, t + 1:, input_token_index[' ']] = 1.0 + for t, char in enumerate(target_text): + # decoder_target_data is ahead of decoder_input_data by one timestep + decoder_input_data[i, t, target_token_index[char]] = 1.0 + if t > 0: + # decoder_target_data will be ahead by one timestep + # and will not include the start character. + decoder_target_data[i, t - 1, target_token_index[char]] = 1.0 + decoder_input_data[i, t + 1:, target_token_index[' ']] = 1.0 + decoder_target_data[i, t:, target_token_index[' ']] = 1.0 + + logger.info(f'[DL]-import_raw_data: Number of samples = {len(input_texts)}') + logger.info(f'[DL]-import_raw_data: Number of unique input tokens = {num_encoder_tokens}') + logger.info(f'[DL]-import_raw_data: ' + f'Number of unique decoder tokens = {num_decoder_tokens}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for inputs = {max_encoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for outputs = {max_decoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: encoder_input_data = {encoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_input_data = {decoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_target_data = {decoder_target_data.shape}') + + return details, encoder_input_data, decoder_input_data, decoder_target_data + + +def get_datasets_(encoder_input_data: np.ndarray, decoder_input_data: np.ndarray, + decoder_target_data: np.ndarray, + num_samples: int, split_ratio: float) -> Dict[str, np.ndarray]: + """Create train/val. + + Returns: + dict: Results, containing the train-valid split of the dataset (split_ratio = 0.2) + """ + import random + + random.seed(42) + train_indexes = random.sample(range(num_samples), int(num_samples * (1 - split_ratio))) + valid_indexes = np.delete(range(num_samples), train_indexes) + + # Dataset creation (2 inputs , 1 output ) + encoder_train_input = encoder_input_data[train_indexes, :, :] + decoder_train_input = decoder_input_data[train_indexes, :, :] + decoder_train_labels = decoder_target_data[train_indexes, :, :] + + encoder_valid_input = encoder_input_data[valid_indexes, :, :] + decoder_valid_input = decoder_input_data[valid_indexes, :, :] + decoder_valid_labels = decoder_target_data[valid_indexes, :, :] + + results = {'encoder_train_input': encoder_train_input, + 'decoder_train_input': decoder_train_input, + 'decoder_train_labels': decoder_train_labels, + 'encoder_valid_input': encoder_valid_input, + 'decoder_valid_input': decoder_valid_input, + 'decoder_valid_labels': decoder_valid_labels} + + logger.info(f'[DL]get_datasets: encoder_train_input = {encoder_train_input.shape}') + logger.info(f'[DL]get_datasets: decoder_train_labels= {decoder_train_labels.shape}') + + return results + + +def load_shard( + collaborator_count: int, shard_num: str, data_path: str, + num_samples: int, split_ratio: float +) -> Tuple[Tuple[np.ndarray, ...], Tuple[np.ndarray, ...], Dict[str, int]]: + """Load data-shards. + + Returns: + Tuple: ( numpy.ndarray: X_train_encoder, + numpy.ndarray: X_train_decoder, + numpy.ndarray: y_train) + Tuple: ( numpy.ndarray: X_valid_encoder, + numpy.ndarray: X_valid_decoder, + numpy.ndarray: y_valid) + Dict: details, from DataLoader_utils.get_datasets + """ + details, encoder_input_data, decoder_input_data, decoder_target_data = import_raw_data_( + data_path, + num_samples + ) + + train_val_dataset = get_datasets_(encoder_input_data, decoder_input_data, + decoder_target_data, num_samples, split_ratio) + # Get the data shards + shard_num = int(shard_num) + X_train_encoder = train_val_dataset['encoder_train_input'][shard_num::collaborator_count] + X_train_decoder = train_val_dataset['decoder_train_input'][shard_num::collaborator_count] + y_train = train_val_dataset['decoder_train_labels'][shard_num::collaborator_count] + + X_valid_encoder = train_val_dataset['encoder_valid_input'][shard_num::collaborator_count] + X_valid_decoder = train_val_dataset['decoder_valid_input'][shard_num::collaborator_count] + y_valid = train_val_dataset['decoder_valid_labels'][shard_num::collaborator_count] + + logger.info(f'[DL]load_shard: X_train_encoder = {X_train_encoder.shape}') + logger.info(f'[DL]load_shard: y_train = {y_train.shape}') + + return ( + (X_train_encoder, X_train_decoder, y_train), + (X_valid_encoder, X_valid_decoder, y_valid), + details + ) diff --git a/openfl-workspace/keras/nlp/src/taskrunner.py b/openfl-workspace/keras/nlp/src/taskrunner.py new file mode 100644 index 0000000000..88563452f7 --- /dev/null +++ b/openfl-workspace/keras/nlp/src/taskrunner.py @@ -0,0 +1,72 @@ +"""Copyright (C) 2020-2024 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +import keras + +from openfl.federated import KerasTaskRunner + + +class KerasNLP(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, latent_dim, **kwargs): + """ + Init taskrunner. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(latent_dim, + self.data_loader.num_encoder_tokens, + self.data_loader.num_decoder_tokens, + **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + + def build_model(self, latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + """ + encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) + encoder = keras.layers.LSTM(latent_dim, return_state=True) + encoder_outputs, state_h, state_c = encoder(encoder_inputs) + + # We discard `encoder_outputs` and only keep the states. + encoder_states = [state_h, state_c] + + # Set up the decoder, using `encoder_states` as initial state. + decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) + + # We set up our decoder to return full output sequences, + # and to return internal states as well. We don't use the + # return states in the training model, but we will use them in inference. + decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) + decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) + decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') + decoder_outputs = decoder_dense(decoder_outputs) + + # Define the model that will turn + # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` + model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + + model.compile( + optimizer="RMSprop", + loss='categorical_crossentropy', metrics=['accuracy'] + ) + + return model diff --git a/openfl-workspace/keras_jax/cnn_mnist/requirements.txt b/openfl-workspace/keras_jax/cnn_mnist/requirements.txt deleted file mode 100644 index d8b1358d95..0000000000 --- a/openfl-workspace/keras_jax/cnn_mnist/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -keras==3.6.0 -jaxlib==0.4.38 \ No newline at end of file diff --git a/openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py deleted file mode 100644 index 87c440f272..0000000000 --- a/openfl-workspace/keras_jax/cnn_mnist/src/mnist_utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from logging import getLogger - -import numpy as np -from tensorflow.python.keras.utils.data_utils import get_file # modify this - -logger = getLogger(__name__) - - -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - -def _load_raw_datashards(shard_num, collaborator_count): - """ - Load the raw data by shard. - - Returns tuples of the dataset shard divided into training and validation. - - Args: - shard_num (int): The shard number to use - collaborator_count (int): The number of collaborators in the federation - - Returns: - 2 tuples: (image, label) of the training, validation dataset - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file('mnist.npz', - origin=origin_folder + 'mnist.npz', - file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') - - with np.load(path) as f: - # get all of mnist - X_train_tot = f['x_train'] - y_train_tot = f['y_train'] - - X_valid_tot = f['x_test'] - y_valid_tot = f['y_test'] - - # create the shards - shard_num = int(shard_num) - X_train = X_train_tot[shard_num::collaborator_count] - y_train = y_train_tot[shard_num::collaborator_count] - - X_valid = X_valid_tot[shard_num::collaborator_count] - y_valid = y_valid_tot[shard_num::collaborator_count] - - return (X_train, y_train), (X_valid, y_valid) - - -def load_mnist_shard(shard_num, collaborator_count, categorical=True, - channels_last=True, **kwargs): - """ - Load the MNIST dataset. - - Args: - shard_num (int): The shard to use from the dataset - collaborator_count (int): The number of collaborators in the federation - categorical (bool): True = convert the labels to one-hot encoded - vectors (Default = True) - channels_last (bool): True = The input images have the channels - last (Default = True) - **kwargs: Additional parameters to pass to the function - - Returns: - list: The input shape - int: The number of classes - numpy.ndarray: The training data - numpy.ndarray: The training labels - numpy.ndarray: The validation data - numpy.ndarray: The validation labels - """ - img_rows, img_cols = 28, 28 - num_classes = 10 - - (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( - shard_num, collaborator_count - ) - - if channels_last: - X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) - X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - else: - X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) - X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_valid = X_valid.astype('float32') - X_train /= 255 - X_valid /= 255 - - logger.info(f'MNIST > X_train Shape : {X_train.shape}') - logger.info(f'MNIST > y_train Shape : {y_train.shape}') - logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') - logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') - - if categorical: - # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) - - return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/keras_torch/cnn_mnist/.workspace b/openfl-workspace/keras_jax/nlp/.workspace similarity index 100% rename from openfl-workspace/keras_torch/cnn_mnist/.workspace rename to openfl-workspace/keras_jax/nlp/.workspace diff --git a/openfl-workspace/keras_torch/cnn_mnist/plan/cols.yaml b/openfl-workspace/keras_jax/nlp/plan/cols.yaml similarity index 100% rename from openfl-workspace/keras_torch/cnn_mnist/plan/cols.yaml rename to openfl-workspace/keras_jax/nlp/plan/cols.yaml diff --git a/openfl-workspace/keras_torch/cnn_mnist/plan/data.yaml b/openfl-workspace/keras_jax/nlp/plan/data.yaml similarity index 100% rename from openfl-workspace/keras_torch/cnn_mnist/plan/data.yaml rename to openfl-workspace/keras_jax/nlp/plan/data.yaml diff --git a/openfl-workspace/keras_tf/cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_jax/nlp/plan/plan.yaml similarity index 76% rename from openfl-workspace/keras_tf/cnn_mnist/plan/plan.yaml rename to openfl-workspace/keras_jax/nlp/plan/plan.yaml index 54867f4578..ce7476ab85 100644 --- a/openfl-workspace/keras_tf/cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/keras_jax/nlp/plan/plan.yaml @@ -14,20 +14,24 @@ collaborator : defaults : plan/defaults/collaborator.yaml template : openfl.component.Collaborator settings : + db_store_rounds: 2 delta_updates : false opt_treatment : RESET data_loader : defaults : plan/defaults/data_loader.yaml - template : src.dataloader.KerasMNISTInMemory + template : src.dataloader.NLPDataLoader settings : collaborator_count : 2 - data_group_name : mnist - batch_size : 256 + batch_size : 64 + split_ratio: 0.2 + num_samples: 10000 task_runner : defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.KerasCNN + template : src.taskrunner.KerasNLP + settings : + latent_dim : 256 network : defaults : plan/defaults/network.yaml @@ -40,7 +44,3 @@ tasks : compression_pipeline : defaults : plan/defaults/compression_pipeline.yaml - # To use different Compression Pipeline, uncomment the following lines - # template : openfl.pipelines.KCPipeline - # settings : - # n_clusters : 6 diff --git a/openfl-workspace/keras_jax/nlp/requirements.txt b/openfl-workspace/keras_jax/nlp/requirements.txt new file mode 100644 index 0000000000..38c991c679 --- /dev/null +++ b/openfl-workspace/keras_jax/nlp/requirements.txt @@ -0,0 +1,2 @@ +keras==3.6.0 +jax==0.4.38 \ No newline at end of file diff --git a/openfl-workspace/keras_jax/nlp/src/__init__.py b/openfl-workspace/keras_jax/nlp/src/__init__.py new file mode 100644 index 0000000000..8201974015 --- /dev/null +++ b/openfl-workspace/keras_jax/nlp/src/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2021-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""openfl nlp keras template.""" diff --git a/openfl-workspace/keras_jax/nlp/src/dataloader.py b/openfl-workspace/keras_jax/nlp/src/dataloader.py new file mode 100644 index 0000000000..d7180b4d3c --- /dev/null +++ b/openfl-workspace/keras_jax/nlp/src/dataloader.py @@ -0,0 +1,142 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from typing import Optional +from typing import Iterator +from typing import Tuple +from typing import Union + +import numpy as np +import src.dataloader_utils as dlu + +from openfl.federated import KerasDataLoader + +logger = getLogger(__name__) + + +class NLPDataLoader(KerasDataLoader): + """NLP Dataloader template.""" + + def __init__(self, collaborator_count: int, split_ratio: float, + num_samples: int, data_path: str, batch_size: int, **kwargs) -> None: + """Instantiate the data object. + + Args: + data_path: The file path to the data Returns: + batch_size: The batch size of the data loader tuple: shape of an example feature array + **kwargs: Additional arguments, passed to super init and load_mnist_shard + + Returns: + none + """ + self.shard_num = data_path + self.data_path = dlu.download_data_() + + self.batch_size = batch_size + + train, valid, details = dlu.load_shard(collaborator_count, self.shard_num, + self.data_path, num_samples, split_ratio) + + self.num_samples = details['num_samples'] + self.num_encoder_tokens = details['num_encoder_tokens'] + self.num_decoder_tokens = details['num_decoder_tokens'] + self.max_encoder_seq_length = details['max_encoder_seq_length'] + self.max_decoder_seq_length = details['max_decoder_seq_length'] + + self.X_train = [train[0], train[1]] + self.y_train = train[2] + self.X_valid = [valid[0], valid[1]] + self.y_valid = valid[2] + + def get_feature_shape(self) -> Tuple[int, ...]: + """Get the shape of an example feature array.""" + return self.X_train[0].shape + + def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self._get_batch_generator(X1=self.X_train[0], X2=self.X_train[1], + y=self.y_train, batch_size=batch_size) + + def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get validation data loader. + + Returns: + loader object + """ + return self._get_batch_generator(X1=self.X_valid[0], X2=self.X_valid[1], + y=self.y_valid, batch_size=batch_size) + + def get_train_data_size(self) -> int: + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.X_train[0].shape[0] + + def get_valid_data_size(self) -> int: + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.X_valid[0].shape[0] + + @staticmethod + def _batch_generator(X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, idxs: np.ndarray, + batch_size: int, + num_batches: int) -> Iterator[Tuple[np.ndarray]]: + """ + Generate batch of data. + + Args: + X: input data + y: label data + idxs: The index of the dataset + batch_size: The batch size for the data loader + num_batches: The number of batches + Yields: + tuple: input data, label data + """ + for i in range(num_batches): + a = i * batch_size + b = a + batch_size + yield (X1[idxs[a:b]], X2[idxs[a:b]]), y[idxs[a:b]] + + def _get_batch_generator(self, X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, + batch_size: Union[int, None]): + """ + Return the dataset generator. + + Args: + X1: input data (encoder) + X2: input data (decoder) + y: label data + batch_size: The batch size for the data loader + """ + if batch_size is None: + batch_size = self.batch_size + # shuffle data indices + idxs = np.random.permutation(np.arange(X1.shape[0])) + # compute the number of batches + num_batches = int(np.ceil(X1.shape[0] / batch_size)) + # build the generator and return it + # TODO: due to _batch_generator(X1, ...) has first param X1, all params here will be moved, + # X1 -> X2, X2 -> y, y -> idxs, idxs -> batch_size, batch_size -> num_batches, + # and num_batches -> should be unexpected in this function + return self._batch_generator(X1, X2, y, idxs, batch_size, num_batches) diff --git a/openfl-workspace/keras_jax/nlp/src/dataloader_utils.py b/openfl-workspace/keras_jax/nlp/src/dataloader_utils.py new file mode 100644 index 0000000000..6e86ee5dcb --- /dev/null +++ b/openfl-workspace/keras_jax/nlp/src/dataloader_utils.py @@ -0,0 +1,230 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from os import getcwd +from os import path +from os import remove +from typing import Dict +from typing import Tuple +from zipfile import ZipFile + +import numpy as np +import requests + +logger = getLogger(__name__) + + +def download_data_() -> str: + """Download data. + + Returns: + string: relative path to data file + """ + pkg = 'fra-eng.zip' # Language file: change this to change the language + data_dir = 'data' + url = 'https://www.manythings.org/anki/' + pkg + filename = pkg.split('-')[0] + '.txt' + + workspace_dir = getcwd() + default_path = path.join(workspace_dir, data_dir) + pkgpath = path.join(default_path, pkg) # path to downloaded zipfile + filepath = path.join(default_path, filename) # path to extracted file + + if path.isfile(filepath): + return path.join(data_dir, filename) + try: + response = requests.get(url, headers={'User-Agent': 'openfl'}) + if response.status_code == 200: + with open(pkgpath, 'wb') as f: + f.write(response.content) + else: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + except Exception: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + + try: + with ZipFile(pkgpath, 'r') as z: + z.extract(filename, default_path) + except Exception: + print(f'Error while extracting {pkgpath}: Aborting!') + exit() + + if path.isfile(filepath): + remove(pkgpath) + return path.join(data_dir, filename) + else: + return '' + + +def import_raw_data_( + data_path: str = '', + num_samples: int = 0 +) -> Tuple[Dict[str, int], np.ndarray, np.ndarray, np.ndarray]: + """Import data. + + Returns: + dict: variable details + numpy.ndarray: encoder input data + numpy.ndarray: decoder input data + numpy.ndarray: decoder labels + """ + # Vectorize the data. + input_texts = [] + target_texts = [] + input_characters = set() + target_characters = set() + with open(data_path, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + for line in lines[: min(num_samples, len(lines) - 1)]: + input_text, target_text, _ = line.split('\t') + # We use 'tab' as the 'start sequence' character + # for the targets, and '\n' as 'end sequence' character. + target_text = '\t' + target_text + '\n' + input_texts.append(input_text) + target_texts.append(target_text) + for char in input_text: + if char not in input_characters: + input_characters.add(char) + for char in target_text: + if char not in target_characters: + target_characters.add(char) + + input_characters = sorted(input_characters) + target_characters = sorted(target_characters) + num_encoder_tokens = len(input_characters) + num_decoder_tokens = len(target_characters) + max_encoder_seq_length = max([len(txt) for txt in input_texts]) + max_decoder_seq_length = max([len(txt) for txt in target_texts]) + + details = {'num_samples': len(input_texts), + 'num_encoder_tokens': num_encoder_tokens, + 'num_decoder_tokens': num_decoder_tokens, + 'max_encoder_seq_length': max_encoder_seq_length, + 'max_decoder_seq_length': max_decoder_seq_length} + + input_token_index = {char: i for i, char in enumerate(input_characters)} + target_token_index = {char: i for i, char in enumerate(target_characters)} + + encoder_input_data = np.zeros( + (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') + + decoder_input_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + decoder_target_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): + for t, char in enumerate(input_text): + encoder_input_data[i, t, input_token_index[char]] = 1.0 + encoder_input_data[i, t + 1:, input_token_index[' ']] = 1.0 + for t, char in enumerate(target_text): + # decoder_target_data is ahead of decoder_input_data by one timestep + decoder_input_data[i, t, target_token_index[char]] = 1.0 + if t > 0: + # decoder_target_data will be ahead by one timestep + # and will not include the start character. + decoder_target_data[i, t - 1, target_token_index[char]] = 1.0 + decoder_input_data[i, t + 1:, target_token_index[' ']] = 1.0 + decoder_target_data[i, t:, target_token_index[' ']] = 1.0 + + logger.info(f'[DL]-import_raw_data: Number of samples = {len(input_texts)}') + logger.info(f'[DL]-import_raw_data: Number of unique input tokens = {num_encoder_tokens}') + logger.info(f'[DL]-import_raw_data: ' + f'Number of unique decoder tokens = {num_decoder_tokens}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for inputs = {max_encoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for outputs = {max_decoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: encoder_input_data = {encoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_input_data = {decoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_target_data = {decoder_target_data.shape}') + + return details, encoder_input_data, decoder_input_data, decoder_target_data + + +def get_datasets_(encoder_input_data: np.ndarray, decoder_input_data: np.ndarray, + decoder_target_data: np.ndarray, + num_samples: int, split_ratio: float) -> Dict[str, np.ndarray]: + """Create train/val. + + Returns: + dict: Results, containing the train-valid split of the dataset (split_ratio = 0.2) + """ + import random + + random.seed(42) + train_indexes = random.sample(range(num_samples), int(num_samples * (1 - split_ratio))) + valid_indexes = np.delete(range(num_samples), train_indexes) + + # Dataset creation (2 inputs , 1 output ) + encoder_train_input = encoder_input_data[train_indexes, :, :] + decoder_train_input = decoder_input_data[train_indexes, :, :] + decoder_train_labels = decoder_target_data[train_indexes, :, :] + + encoder_valid_input = encoder_input_data[valid_indexes, :, :] + decoder_valid_input = decoder_input_data[valid_indexes, :, :] + decoder_valid_labels = decoder_target_data[valid_indexes, :, :] + + results = {'encoder_train_input': encoder_train_input, + 'decoder_train_input': decoder_train_input, + 'decoder_train_labels': decoder_train_labels, + 'encoder_valid_input': encoder_valid_input, + 'decoder_valid_input': decoder_valid_input, + 'decoder_valid_labels': decoder_valid_labels} + + logger.info(f'[DL]get_datasets: encoder_train_input = {encoder_train_input.shape}') + logger.info(f'[DL]get_datasets: decoder_train_labels= {decoder_train_labels.shape}') + + return results + + +def load_shard( + collaborator_count: int, shard_num: str, data_path: str, + num_samples: int, split_ratio: float +) -> Tuple[Tuple[np.ndarray, ...], Tuple[np.ndarray, ...], Dict[str, int]]: + """Load data-shards. + + Returns: + Tuple: ( numpy.ndarray: X_train_encoder, + numpy.ndarray: X_train_decoder, + numpy.ndarray: y_train) + Tuple: ( numpy.ndarray: X_valid_encoder, + numpy.ndarray: X_valid_decoder, + numpy.ndarray: y_valid) + Dict: details, from DataLoader_utils.get_datasets + """ + details, encoder_input_data, decoder_input_data, decoder_target_data = import_raw_data_( + data_path, + num_samples + ) + + train_val_dataset = get_datasets_(encoder_input_data, decoder_input_data, + decoder_target_data, num_samples, split_ratio) + # Get the data shards + shard_num = int(shard_num) + X_train_encoder = train_val_dataset['encoder_train_input'][shard_num::collaborator_count] + X_train_decoder = train_val_dataset['decoder_train_input'][shard_num::collaborator_count] + y_train = train_val_dataset['decoder_train_labels'][shard_num::collaborator_count] + + X_valid_encoder = train_val_dataset['encoder_valid_input'][shard_num::collaborator_count] + X_valid_decoder = train_val_dataset['decoder_valid_input'][shard_num::collaborator_count] + y_valid = train_val_dataset['decoder_valid_labels'][shard_num::collaborator_count] + + logger.info(f'[DL]load_shard: X_train_encoder = {X_train_encoder.shape}') + logger.info(f'[DL]load_shard: y_train = {y_train.shape}') + + return ( + (X_train_encoder, X_train_decoder, y_train), + (X_valid_encoder, X_valid_decoder, y_valid), + details + ) diff --git a/openfl-workspace/keras_jax/nlp/src/taskrunner.py b/openfl-workspace/keras_jax/nlp/src/taskrunner.py new file mode 100644 index 0000000000..68ba3c7f3d --- /dev/null +++ b/openfl-workspace/keras_jax/nlp/src/taskrunner.py @@ -0,0 +1,74 @@ +"""Copyright (C) 2020-2024 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +import os +os.environ["KERAS_BACKEND"] = "jax" +import keras + +from openfl.federated import KerasTaskRunner + + +class KerasNLP(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, latent_dim, **kwargs): + """ + Init taskrunner. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(latent_dim, + self.data_loader.num_encoder_tokens, + self.data_loader.num_decoder_tokens, + **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + + def build_model(self, latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + """ + encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) + encoder = keras.layers.LSTM(latent_dim, return_state=True) + encoder_outputs, state_h, state_c = encoder(encoder_inputs) + + # We discard `encoder_outputs` and only keep the states. + encoder_states = [state_h, state_c] + + # Set up the decoder, using `encoder_states` as initial state. + decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) + + # We set up our decoder to return full output sequences, + # and to return internal states as well. We don't use the + # return states in the training model, but we will use them in inference. + decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) + decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) + decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') + decoder_outputs = decoder_dense(decoder_outputs) + + # Define the model that will turn + # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` + model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + + model.compile( + optimizer="RMSprop", + loss='categorical_crossentropy', metrics=['accuracy'] + ) + + return model diff --git a/openfl-workspace/keras_tf/cnn_mnist/plan/defaults b/openfl-workspace/keras_tf/cnn_mnist/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/keras_tf/cnn_mnist/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/keras_tf/cnn_mnist/src/__init__.py b/openfl-workspace/keras_tf/cnn_mnist/src/__init__.py deleted file mode 100644 index f1410b1298..0000000000 --- a/openfl-workspace/keras_tf/cnn_mnist/src/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py b/openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py deleted file mode 100644 index 040e8091c9..0000000000 --- a/openfl-workspace/keras_tf/cnn_mnist/src/dataloader.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import KerasDataLoader -from .mnist_utils import load_mnist_shard - - -class KerasMNISTInMemory(KerasDataLoader): - """Data Loader for MNIST Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - # TODO: We should be downloading the dataset shard into a directory - # TODO: There needs to be a method to ask how many collaborators and - # what index/rank is this collaborator. - # Then we have a way to automatically shard based on rank and size of - # collaborator list. - try: - int(data_path) - except: - raise ValueError( - "Expected `%s` to be representable as `int`, as it refers to the data shard " + - "number used by the collaborator.", - data_path - ) - - _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( - shard_num=int(data_path), **kwargs - ) - - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py deleted file mode 100644 index 165861033c..0000000000 --- a/openfl-workspace/keras_tf/cnn_mnist/src/taskrunner.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from keras.models import Sequential -from keras.layers import Conv2D -from keras.layers import Dense -from keras.layers import Flatten - -from openfl.federated import KerasTaskRunner - - -class KerasCNN(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') - self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') - - def build_model(self, - input_shape, - num_classes, - conv_kernel_size=(4, 4), - conv_strides=(2, 2), - conv1_channels_out=16, - conv2_channels_out=32, - final_dense_inputsize=100, - **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - - Returns: - keras.models.Sequential: The model defined in Keras - - """ - model = Sequential() - - model.add(Conv2D(conv1_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu', - input_shape=input_shape)) - - model.add(Conv2D(conv2_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu')) - - model.add(Flatten()) - - model.add(Dense(final_dense_inputsize, activation='relu')) - - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss="categorical_crossentropy", - optimizer="adam", - metrics=["accuracy"]) - - return model diff --git a/openfl-workspace/keras_tf/nlp/requirements.txt b/openfl-workspace/keras_tf/nlp/requirements.txt index 6bba4a277d..34a7b94009 100644 --- a/openfl-workspace/keras_tf/nlp/requirements.txt +++ b/openfl-workspace/keras_tf/nlp/requirements.txt @@ -1,2 +1,2 @@ keras==3.6.0 -tensorflow==2.18.0 +tensorflow==2.18.0 \ No newline at end of file diff --git a/openfl-workspace/keras_tf/nlp/src/taskrunner.py b/openfl-workspace/keras_tf/nlp/src/taskrunner.py index 88563452f7..0e9047d894 100644 --- a/openfl-workspace/keras_tf/nlp/src/taskrunner.py +++ b/openfl-workspace/keras_tf/nlp/src/taskrunner.py @@ -4,6 +4,8 @@ Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. """ +import os +os.environ["KERAS_BACKEND"] = "tensorflow" import keras from openfl.federated import KerasTaskRunner diff --git a/openfl-workspace/keras_torch/cnn_mnist/plan/defaults b/openfl-workspace/keras_torch/cnn_mnist/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/keras_torch/cnn_mnist/requirements.txt b/openfl-workspace/keras_torch/cnn_mnist/requirements.txt deleted file mode 100644 index 5fa9907811..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -keras==3.6.0 -tensorflow==2.18.0 - diff --git a/openfl-workspace/keras_torch/cnn_mnist/src/__init__.py b/openfl-workspace/keras_torch/cnn_mnist/src/__init__.py deleted file mode 100644 index f1410b1298..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/src/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py b/openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py deleted file mode 100644 index 040e8091c9..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/src/dataloader.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import KerasDataLoader -from .mnist_utils import load_mnist_shard - - -class KerasMNISTInMemory(KerasDataLoader): - """Data Loader for MNIST Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - # TODO: We should be downloading the dataset shard into a directory - # TODO: There needs to be a method to ask how many collaborators and - # what index/rank is this collaborator. - # Then we have a way to automatically shard based on rank and size of - # collaborator list. - try: - int(data_path) - except: - raise ValueError( - "Expected `%s` to be representable as `int`, as it refers to the data shard " + - "number used by the collaborator.", - data_path - ) - - _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( - shard_num=int(data_path), **kwargs - ) - - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py b/openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py deleted file mode 100644 index d19e13d9dd..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/src/mnist_utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from logging import getLogger - -import numpy as np -from tensorflow.python.keras.utils.data_utils import get_file - -logger = getLogger(__name__) - - -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - -def _load_raw_datashards(shard_num, collaborator_count): - """ - Load the raw data by shard. - - Returns tuples of the dataset shard divided into training and validation. - - Args: - shard_num (int): The shard number to use - collaborator_count (int): The number of collaborators in the federation - - Returns: - 2 tuples: (image, label) of the training, validation dataset - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file('mnist.npz', - origin=origin_folder + 'mnist.npz', - file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') - - with np.load(path) as f: - # get all of mnist - X_train_tot = f['x_train'] - y_train_tot = f['y_train'] - - X_valid_tot = f['x_test'] - y_valid_tot = f['y_test'] - - # create the shards - shard_num = int(shard_num) - X_train = X_train_tot[shard_num::collaborator_count] - y_train = y_train_tot[shard_num::collaborator_count] - - X_valid = X_valid_tot[shard_num::collaborator_count] - y_valid = y_valid_tot[shard_num::collaborator_count] - - return (X_train, y_train), (X_valid, y_valid) - - -def load_mnist_shard(shard_num, collaborator_count, categorical=True, - channels_last=True, **kwargs): - """ - Load the MNIST dataset. - - Args: - shard_num (int): The shard to use from the dataset - collaborator_count (int): The number of collaborators in the federation - categorical (bool): True = convert the labels to one-hot encoded - vectors (Default = True) - channels_last (bool): True = The input images have the channels - last (Default = True) - **kwargs: Additional parameters to pass to the function - - Returns: - list: The input shape - int: The number of classes - numpy.ndarray: The training data - numpy.ndarray: The training labels - numpy.ndarray: The validation data - numpy.ndarray: The validation labels - """ - img_rows, img_cols = 28, 28 - num_classes = 10 - - (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( - shard_num, collaborator_count - ) - - if channels_last: - X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) - X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - else: - X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) - X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_valid = X_valid.astype('float32') - X_train /= 255 - X_valid /= 255 - - logger.info(f'MNIST > X_train Shape : {X_train.shape}') - logger.info(f'MNIST > y_train Shape : {y_train.shape}') - logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') - logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') - - if categorical: - # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) - - return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py b/openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py deleted file mode 100644 index 165861033c..0000000000 --- a/openfl-workspace/keras_torch/cnn_mnist/src/taskrunner.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from keras.models import Sequential -from keras.layers import Conv2D -from keras.layers import Dense -from keras.layers import Flatten - -from openfl.federated import KerasTaskRunner - - -class KerasCNN(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') - self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') - - def build_model(self, - input_shape, - num_classes, - conv_kernel_size=(4, 4), - conv_strides=(2, 2), - conv1_channels_out=16, - conv2_channels_out=32, - final_dense_inputsize=100, - **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - - Returns: - keras.models.Sequential: The model defined in Keras - - """ - model = Sequential() - - model.add(Conv2D(conv1_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu', - input_shape=input_shape)) - - model.add(Conv2D(conv2_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu')) - - model.add(Flatten()) - - model.add(Dense(final_dense_inputsize, activation='relu')) - - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss="categorical_crossentropy", - optimizer="adam", - metrics=["accuracy"]) - - return model diff --git a/openfl-workspace/keras_torch/nlp/.workspace b/openfl-workspace/keras_torch/nlp/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/keras_torch/nlp/plan/cols.yaml b/openfl-workspace/keras_torch/nlp/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/openfl-workspace/keras_torch/nlp/plan/data.yaml b/openfl-workspace/keras_torch/nlp/plan/data.yaml new file mode 100644 index 0000000000..257c7825fe --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/plan/data.yaml @@ -0,0 +1,7 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +# collaborator_name,data_directory_path +one,1 + + diff --git a/openfl-workspace/keras_torch/nlp/plan/plan.yaml b/openfl-workspace/keras_torch/nlp/plan/plan.yaml new file mode 100644 index 0000000000..ce7476ab85 --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/plan/plan.yaml @@ -0,0 +1,46 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf + rounds_to_train : 10 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + db_store_rounds: 2 + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : src.dataloader.NLPDataLoader + settings : + collaborator_count : 2 + batch_size : 64 + split_ratio: 0.2 + num_samples: 10000 + +task_runner : + defaults : plan/defaults/task_runner.yaml + template : src.taskrunner.KerasNLP + settings : + latent_dim : 256 + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + +tasks : + defaults : plan/defaults/tasks_keras.yaml + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/openfl-workspace/keras_torch/nlp/requirements.txt b/openfl-workspace/keras_torch/nlp/requirements.txt new file mode 100644 index 0000000000..b35bba448a --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/requirements.txt @@ -0,0 +1,2 @@ +keras==3.6.0 +torch==2.5.1 \ No newline at end of file diff --git a/openfl-workspace/keras_torch/nlp/src/__init__.py b/openfl-workspace/keras_torch/nlp/src/__init__.py new file mode 100644 index 0000000000..8201974015 --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/src/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2021-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""openfl nlp keras template.""" diff --git a/openfl-workspace/keras_torch/nlp/src/dataloader.py b/openfl-workspace/keras_torch/nlp/src/dataloader.py new file mode 100644 index 0000000000..d7180b4d3c --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/src/dataloader.py @@ -0,0 +1,142 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from typing import Optional +from typing import Iterator +from typing import Tuple +from typing import Union + +import numpy as np +import src.dataloader_utils as dlu + +from openfl.federated import KerasDataLoader + +logger = getLogger(__name__) + + +class NLPDataLoader(KerasDataLoader): + """NLP Dataloader template.""" + + def __init__(self, collaborator_count: int, split_ratio: float, + num_samples: int, data_path: str, batch_size: int, **kwargs) -> None: + """Instantiate the data object. + + Args: + data_path: The file path to the data Returns: + batch_size: The batch size of the data loader tuple: shape of an example feature array + **kwargs: Additional arguments, passed to super init and load_mnist_shard + + Returns: + none + """ + self.shard_num = data_path + self.data_path = dlu.download_data_() + + self.batch_size = batch_size + + train, valid, details = dlu.load_shard(collaborator_count, self.shard_num, + self.data_path, num_samples, split_ratio) + + self.num_samples = details['num_samples'] + self.num_encoder_tokens = details['num_encoder_tokens'] + self.num_decoder_tokens = details['num_decoder_tokens'] + self.max_encoder_seq_length = details['max_encoder_seq_length'] + self.max_decoder_seq_length = details['max_decoder_seq_length'] + + self.X_train = [train[0], train[1]] + self.y_train = train[2] + self.X_valid = [valid[0], valid[1]] + self.y_valid = valid[2] + + def get_feature_shape(self) -> Tuple[int, ...]: + """Get the shape of an example feature array.""" + return self.X_train[0].shape + + def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self._get_batch_generator(X1=self.X_train[0], X2=self.X_train[1], + y=self.y_train, batch_size=batch_size) + + def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: + """ + Get validation data loader. + + Returns: + loader object + """ + return self._get_batch_generator(X1=self.X_valid[0], X2=self.X_valid[1], + y=self.y_valid, batch_size=batch_size) + + def get_train_data_size(self) -> int: + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.X_train[0].shape[0] + + def get_valid_data_size(self) -> int: + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.X_valid[0].shape[0] + + @staticmethod + def _batch_generator(X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, idxs: np.ndarray, + batch_size: int, + num_batches: int) -> Iterator[Tuple[np.ndarray]]: + """ + Generate batch of data. + + Args: + X: input data + y: label data + idxs: The index of the dataset + batch_size: The batch size for the data loader + num_batches: The number of batches + Yields: + tuple: input data, label data + """ + for i in range(num_batches): + a = i * batch_size + b = a + batch_size + yield (X1[idxs[a:b]], X2[idxs[a:b]]), y[idxs[a:b]] + + def _get_batch_generator(self, X1: np.ndarray, X2: np.ndarray, + y: np.ndarray, + batch_size: Union[int, None]): + """ + Return the dataset generator. + + Args: + X1: input data (encoder) + X2: input data (decoder) + y: label data + batch_size: The batch size for the data loader + """ + if batch_size is None: + batch_size = self.batch_size + # shuffle data indices + idxs = np.random.permutation(np.arange(X1.shape[0])) + # compute the number of batches + num_batches = int(np.ceil(X1.shape[0] / batch_size)) + # build the generator and return it + # TODO: due to _batch_generator(X1, ...) has first param X1, all params here will be moved, + # X1 -> X2, X2 -> y, y -> idxs, idxs -> batch_size, batch_size -> num_batches, + # and num_batches -> should be unexpected in this function + return self._batch_generator(X1, X2, y, idxs, batch_size, num_batches) diff --git a/openfl-workspace/keras_torch/nlp/src/dataloader_utils.py b/openfl-workspace/keras_torch/nlp/src/dataloader_utils.py new file mode 100644 index 0000000000..6e86ee5dcb --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/src/dataloader_utils.py @@ -0,0 +1,230 @@ +"""Copyright (C) 2020-2021 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +from logging import getLogger +from os import getcwd +from os import path +from os import remove +from typing import Dict +from typing import Tuple +from zipfile import ZipFile + +import numpy as np +import requests + +logger = getLogger(__name__) + + +def download_data_() -> str: + """Download data. + + Returns: + string: relative path to data file + """ + pkg = 'fra-eng.zip' # Language file: change this to change the language + data_dir = 'data' + url = 'https://www.manythings.org/anki/' + pkg + filename = pkg.split('-')[0] + '.txt' + + workspace_dir = getcwd() + default_path = path.join(workspace_dir, data_dir) + pkgpath = path.join(default_path, pkg) # path to downloaded zipfile + filepath = path.join(default_path, filename) # path to extracted file + + if path.isfile(filepath): + return path.join(data_dir, filename) + try: + response = requests.get(url, headers={'User-Agent': 'openfl'}) + if response.status_code == 200: + with open(pkgpath, 'wb') as f: + f.write(response.content) + else: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + except Exception: + print(f'Error while downloading {pkg} from {url}: Aborting!') + exit() + + try: + with ZipFile(pkgpath, 'r') as z: + z.extract(filename, default_path) + except Exception: + print(f'Error while extracting {pkgpath}: Aborting!') + exit() + + if path.isfile(filepath): + remove(pkgpath) + return path.join(data_dir, filename) + else: + return '' + + +def import_raw_data_( + data_path: str = '', + num_samples: int = 0 +) -> Tuple[Dict[str, int], np.ndarray, np.ndarray, np.ndarray]: + """Import data. + + Returns: + dict: variable details + numpy.ndarray: encoder input data + numpy.ndarray: decoder input data + numpy.ndarray: decoder labels + """ + # Vectorize the data. + input_texts = [] + target_texts = [] + input_characters = set() + target_characters = set() + with open(data_path, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + for line in lines[: min(num_samples, len(lines) - 1)]: + input_text, target_text, _ = line.split('\t') + # We use 'tab' as the 'start sequence' character + # for the targets, and '\n' as 'end sequence' character. + target_text = '\t' + target_text + '\n' + input_texts.append(input_text) + target_texts.append(target_text) + for char in input_text: + if char not in input_characters: + input_characters.add(char) + for char in target_text: + if char not in target_characters: + target_characters.add(char) + + input_characters = sorted(input_characters) + target_characters = sorted(target_characters) + num_encoder_tokens = len(input_characters) + num_decoder_tokens = len(target_characters) + max_encoder_seq_length = max([len(txt) for txt in input_texts]) + max_decoder_seq_length = max([len(txt) for txt in target_texts]) + + details = {'num_samples': len(input_texts), + 'num_encoder_tokens': num_encoder_tokens, + 'num_decoder_tokens': num_decoder_tokens, + 'max_encoder_seq_length': max_encoder_seq_length, + 'max_decoder_seq_length': max_decoder_seq_length} + + input_token_index = {char: i for i, char in enumerate(input_characters)} + target_token_index = {char: i for i, char in enumerate(target_characters)} + + encoder_input_data = np.zeros( + (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') + + decoder_input_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + decoder_target_data = np.zeros( + (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') + + for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): + for t, char in enumerate(input_text): + encoder_input_data[i, t, input_token_index[char]] = 1.0 + encoder_input_data[i, t + 1:, input_token_index[' ']] = 1.0 + for t, char in enumerate(target_text): + # decoder_target_data is ahead of decoder_input_data by one timestep + decoder_input_data[i, t, target_token_index[char]] = 1.0 + if t > 0: + # decoder_target_data will be ahead by one timestep + # and will not include the start character. + decoder_target_data[i, t - 1, target_token_index[char]] = 1.0 + decoder_input_data[i, t + 1:, target_token_index[' ']] = 1.0 + decoder_target_data[i, t:, target_token_index[' ']] = 1.0 + + logger.info(f'[DL]-import_raw_data: Number of samples = {len(input_texts)}') + logger.info(f'[DL]-import_raw_data: Number of unique input tokens = {num_encoder_tokens}') + logger.info(f'[DL]-import_raw_data: ' + f'Number of unique decoder tokens = {num_decoder_tokens}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for inputs = {max_encoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: ' + f'Max sequence length for outputs = {max_decoder_seq_length}') + + logger.info(f'[DL]-import_raw_data: encoder_input_data = {encoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_input_data = {decoder_input_data.shape}') + logger.info(f'[DL]-import_raw_data: decoder_target_data = {decoder_target_data.shape}') + + return details, encoder_input_data, decoder_input_data, decoder_target_data + + +def get_datasets_(encoder_input_data: np.ndarray, decoder_input_data: np.ndarray, + decoder_target_data: np.ndarray, + num_samples: int, split_ratio: float) -> Dict[str, np.ndarray]: + """Create train/val. + + Returns: + dict: Results, containing the train-valid split of the dataset (split_ratio = 0.2) + """ + import random + + random.seed(42) + train_indexes = random.sample(range(num_samples), int(num_samples * (1 - split_ratio))) + valid_indexes = np.delete(range(num_samples), train_indexes) + + # Dataset creation (2 inputs , 1 output ) + encoder_train_input = encoder_input_data[train_indexes, :, :] + decoder_train_input = decoder_input_data[train_indexes, :, :] + decoder_train_labels = decoder_target_data[train_indexes, :, :] + + encoder_valid_input = encoder_input_data[valid_indexes, :, :] + decoder_valid_input = decoder_input_data[valid_indexes, :, :] + decoder_valid_labels = decoder_target_data[valid_indexes, :, :] + + results = {'encoder_train_input': encoder_train_input, + 'decoder_train_input': decoder_train_input, + 'decoder_train_labels': decoder_train_labels, + 'encoder_valid_input': encoder_valid_input, + 'decoder_valid_input': decoder_valid_input, + 'decoder_valid_labels': decoder_valid_labels} + + logger.info(f'[DL]get_datasets: encoder_train_input = {encoder_train_input.shape}') + logger.info(f'[DL]get_datasets: decoder_train_labels= {decoder_train_labels.shape}') + + return results + + +def load_shard( + collaborator_count: int, shard_num: str, data_path: str, + num_samples: int, split_ratio: float +) -> Tuple[Tuple[np.ndarray, ...], Tuple[np.ndarray, ...], Dict[str, int]]: + """Load data-shards. + + Returns: + Tuple: ( numpy.ndarray: X_train_encoder, + numpy.ndarray: X_train_decoder, + numpy.ndarray: y_train) + Tuple: ( numpy.ndarray: X_valid_encoder, + numpy.ndarray: X_valid_decoder, + numpy.ndarray: y_valid) + Dict: details, from DataLoader_utils.get_datasets + """ + details, encoder_input_data, decoder_input_data, decoder_target_data = import_raw_data_( + data_path, + num_samples + ) + + train_val_dataset = get_datasets_(encoder_input_data, decoder_input_data, + decoder_target_data, num_samples, split_ratio) + # Get the data shards + shard_num = int(shard_num) + X_train_encoder = train_val_dataset['encoder_train_input'][shard_num::collaborator_count] + X_train_decoder = train_val_dataset['decoder_train_input'][shard_num::collaborator_count] + y_train = train_val_dataset['decoder_train_labels'][shard_num::collaborator_count] + + X_valid_encoder = train_val_dataset['encoder_valid_input'][shard_num::collaborator_count] + X_valid_decoder = train_val_dataset['decoder_valid_input'][shard_num::collaborator_count] + y_valid = train_val_dataset['decoder_valid_labels'][shard_num::collaborator_count] + + logger.info(f'[DL]load_shard: X_train_encoder = {X_train_encoder.shape}') + logger.info(f'[DL]load_shard: y_train = {y_train.shape}') + + return ( + (X_train_encoder, X_train_decoder, y_train), + (X_valid_encoder, X_valid_decoder, y_valid), + details + ) diff --git a/openfl-workspace/keras_torch/nlp/src/taskrunner.py b/openfl-workspace/keras_torch/nlp/src/taskrunner.py new file mode 100644 index 0000000000..4fc5276043 --- /dev/null +++ b/openfl-workspace/keras_torch/nlp/src/taskrunner.py @@ -0,0 +1,74 @@ +"""Copyright (C) 2020-2024 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +import os +os.environ["KERAS_BACKEND"] = "torch" +import keras + +from openfl.federated import KerasTaskRunner + + +class KerasNLP(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, latent_dim, **kwargs): + """ + Init taskrunner. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(latent_dim, + self.data_loader.num_encoder_tokens, + self.data_loader.num_decoder_tokens, + **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + + def build_model(self, latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + """ + encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) + encoder = keras.layers.LSTM(latent_dim, return_state=True) + encoder_outputs, state_h, state_c = encoder(encoder_inputs) + + # We discard `encoder_outputs` and only keep the states. + encoder_states = [state_h, state_c] + + # Set up the decoder, using `encoder_states` as initial state. + decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) + + # We set up our decoder to return full output sequences, + # and to return internal states as well. We don't use the + # return states in the training model, but we will use them in inference. + decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) + decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) + decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') + decoder_outputs = decoder_dense(decoder_outputs) + + # Define the model that will turn + # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` + model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + + model.compile( + optimizer="RMSprop", + loss='categorical_crossentropy', metrics=['accuracy'] + ) + + return model diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index e2dd069f72..6691cc3698 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -9,6 +9,7 @@ """ import copy +import os from warnings import catch_warnings, simplefilter import numpy as np @@ -17,6 +18,8 @@ from openfl.utilities import Metric, TensorKey, change_tags from openfl.utilities.split import split_tensor_dict_for_holdouts +os.environ["KERAS_BACKEND"] = "torch" + with catch_warnings(): simplefilter(action="ignore") import keras From e526aeb48fcfcb5d14e9aca460133d60ea8212e1 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 01:06:33 -0800 Subject: [PATCH 04/13] code changes Signed-off-by: yes --- .../keras_jax/nlp/src/taskrunner.py | 2 - openfl-workspace/keras_tf/nlp/plan/cols.yaml | 5 - openfl-workspace/keras_tf/nlp/plan/data.yaml | 7 - openfl-workspace/keras_tf/nlp/plan/plan.yaml | 46 ---- .../keras_tf/nlp/requirements.txt | 2 - openfl-workspace/keras_tf/nlp/src/__init__.py | 4 - .../keras_tf/nlp/src/dataloader.py | 142 ----------- .../keras_tf/nlp/src/dataloader_utils.py | 230 ------------------ .../keras_tf/nlp/src/taskrunner.py | 74 ------ .../keras_torch/nlp/src/taskrunner.py | 2 - .../nlp => torch_cnn_histology}/.workspace | 0 .../torch_cnn_histology_fedcurv/.workspace | 2 + openfl-workspace/torch_cnn_mnist/.workspace | 2 + openfl-workspace/xgb_higgs/.workspace | 2 + openfl/federated/task/runner_keras.py | 8 +- openfl/interface/workspace.py | 13 +- 16 files changed, 20 insertions(+), 521 deletions(-) delete mode 100644 openfl-workspace/keras_tf/nlp/plan/cols.yaml delete mode 100644 openfl-workspace/keras_tf/nlp/plan/data.yaml delete mode 100644 openfl-workspace/keras_tf/nlp/plan/plan.yaml delete mode 100644 openfl-workspace/keras_tf/nlp/requirements.txt delete mode 100644 openfl-workspace/keras_tf/nlp/src/__init__.py delete mode 100644 openfl-workspace/keras_tf/nlp/src/dataloader.py delete mode 100644 openfl-workspace/keras_tf/nlp/src/dataloader_utils.py delete mode 100644 openfl-workspace/keras_tf/nlp/src/taskrunner.py rename openfl-workspace/{keras_tf/nlp => torch_cnn_histology}/.workspace (100%) create mode 100644 openfl-workspace/torch_cnn_histology_fedcurv/.workspace create mode 100644 openfl-workspace/torch_cnn_mnist/.workspace create mode 100644 openfl-workspace/xgb_higgs/.workspace diff --git a/openfl-workspace/keras_jax/nlp/src/taskrunner.py b/openfl-workspace/keras_jax/nlp/src/taskrunner.py index 68ba3c7f3d..88563452f7 100644 --- a/openfl-workspace/keras_jax/nlp/src/taskrunner.py +++ b/openfl-workspace/keras_jax/nlp/src/taskrunner.py @@ -4,8 +4,6 @@ Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. """ -import os -os.environ["KERAS_BACKEND"] = "jax" import keras from openfl.federated import KerasTaskRunner diff --git a/openfl-workspace/keras_tf/nlp/plan/cols.yaml b/openfl-workspace/keras_tf/nlp/plan/cols.yaml deleted file mode 100644 index 95307de3bc..0000000000 --- a/openfl-workspace/keras_tf/nlp/plan/cols.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: - \ No newline at end of file diff --git a/openfl-workspace/keras_tf/nlp/plan/data.yaml b/openfl-workspace/keras_tf/nlp/plan/data.yaml deleted file mode 100644 index 257c7825fe..0000000000 --- a/openfl-workspace/keras_tf/nlp/plan/data.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# collaborator_name,data_directory_path -one,1 - - diff --git a/openfl-workspace/keras_tf/nlp/plan/plan.yaml b/openfl-workspace/keras_tf/nlp/plan/plan.yaml deleted file mode 100644 index ce7476ab85..0000000000 --- a/openfl-workspace/keras_tf/nlp/plan/plan.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/init.pbuf - best_state_path : save/best.pbuf - last_state_path : save/last.pbuf - rounds_to_train : 10 - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - db_store_rounds: 2 - delta_updates : false - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - template : src.dataloader.NLPDataLoader - settings : - collaborator_count : 2 - batch_size : 64 - split_ratio: 0.2 - num_samples: 10000 - -task_runner : - defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.KerasNLP - settings : - latent_dim : 256 - -network : - defaults : plan/defaults/network.yaml - -assigner : - defaults : plan/defaults/assigner.yaml - -tasks : - defaults : plan/defaults/tasks_keras.yaml - -compression_pipeline : - defaults : plan/defaults/compression_pipeline.yaml diff --git a/openfl-workspace/keras_tf/nlp/requirements.txt b/openfl-workspace/keras_tf/nlp/requirements.txt deleted file mode 100644 index 34a7b94009..0000000000 --- a/openfl-workspace/keras_tf/nlp/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -keras==3.6.0 -tensorflow==2.18.0 \ No newline at end of file diff --git a/openfl-workspace/keras_tf/nlp/src/__init__.py b/openfl-workspace/keras_tf/nlp/src/__init__.py deleted file mode 100644 index 8201974015..0000000000 --- a/openfl-workspace/keras_tf/nlp/src/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""openfl nlp keras template.""" diff --git a/openfl-workspace/keras_tf/nlp/src/dataloader.py b/openfl-workspace/keras_tf/nlp/src/dataloader.py deleted file mode 100644 index d7180b4d3c..0000000000 --- a/openfl-workspace/keras_tf/nlp/src/dataloader.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Copyright (C) 2020-2021 Intel Corporation - SPDX-License-Identifier: Apache-2.0 - -Licensed subject to the terms of the separately executed evaluation -license agreement between Intel Corporation and you. -""" -from logging import getLogger -from typing import Optional -from typing import Iterator -from typing import Tuple -from typing import Union - -import numpy as np -import src.dataloader_utils as dlu - -from openfl.federated import KerasDataLoader - -logger = getLogger(__name__) - - -class NLPDataLoader(KerasDataLoader): - """NLP Dataloader template.""" - - def __init__(self, collaborator_count: int, split_ratio: float, - num_samples: int, data_path: str, batch_size: int, **kwargs) -> None: - """Instantiate the data object. - - Args: - data_path: The file path to the data Returns: - batch_size: The batch size of the data loader tuple: shape of an example feature array - **kwargs: Additional arguments, passed to super init and load_mnist_shard - - Returns: - none - """ - self.shard_num = data_path - self.data_path = dlu.download_data_() - - self.batch_size = batch_size - - train, valid, details = dlu.load_shard(collaborator_count, self.shard_num, - self.data_path, num_samples, split_ratio) - - self.num_samples = details['num_samples'] - self.num_encoder_tokens = details['num_encoder_tokens'] - self.num_decoder_tokens = details['num_decoder_tokens'] - self.max_encoder_seq_length = details['max_encoder_seq_length'] - self.max_decoder_seq_length = details['max_decoder_seq_length'] - - self.X_train = [train[0], train[1]] - self.y_train = train[2] - self.X_valid = [valid[0], valid[1]] - self.y_valid = valid[2] - - def get_feature_shape(self) -> Tuple[int, ...]: - """Get the shape of an example feature array.""" - return self.X_train[0].shape - - def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: - """ - Get training data loader. - - Returns - ------- - loader object - """ - return self._get_batch_generator(X1=self.X_train[0], X2=self.X_train[1], - y=self.y_train, batch_size=batch_size) - - def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: - """ - Get validation data loader. - - Returns: - loader object - """ - return self._get_batch_generator(X1=self.X_valid[0], X2=self.X_valid[1], - y=self.y_valid, batch_size=batch_size) - - def get_train_data_size(self) -> int: - """ - Get total number of training samples. - - Returns: - int: number of training samples - """ - return self.X_train[0].shape[0] - - def get_valid_data_size(self) -> int: - """ - Get total number of validation samples. - - Returns: - int: number of validation samples - """ - return self.X_valid[0].shape[0] - - @staticmethod - def _batch_generator(X1: np.ndarray, X2: np.ndarray, - y: np.ndarray, idxs: np.ndarray, - batch_size: int, - num_batches: int) -> Iterator[Tuple[np.ndarray]]: - """ - Generate batch of data. - - Args: - X: input data - y: label data - idxs: The index of the dataset - batch_size: The batch size for the data loader - num_batches: The number of batches - Yields: - tuple: input data, label data - """ - for i in range(num_batches): - a = i * batch_size - b = a + batch_size - yield (X1[idxs[a:b]], X2[idxs[a:b]]), y[idxs[a:b]] - - def _get_batch_generator(self, X1: np.ndarray, X2: np.ndarray, - y: np.ndarray, - batch_size: Union[int, None]): - """ - Return the dataset generator. - - Args: - X1: input data (encoder) - X2: input data (decoder) - y: label data - batch_size: The batch size for the data loader - """ - if batch_size is None: - batch_size = self.batch_size - # shuffle data indices - idxs = np.random.permutation(np.arange(X1.shape[0])) - # compute the number of batches - num_batches = int(np.ceil(X1.shape[0] / batch_size)) - # build the generator and return it - # TODO: due to _batch_generator(X1, ...) has first param X1, all params here will be moved, - # X1 -> X2, X2 -> y, y -> idxs, idxs -> batch_size, batch_size -> num_batches, - # and num_batches -> should be unexpected in this function - return self._batch_generator(X1, X2, y, idxs, batch_size, num_batches) diff --git a/openfl-workspace/keras_tf/nlp/src/dataloader_utils.py b/openfl-workspace/keras_tf/nlp/src/dataloader_utils.py deleted file mode 100644 index 6e86ee5dcb..0000000000 --- a/openfl-workspace/keras_tf/nlp/src/dataloader_utils.py +++ /dev/null @@ -1,230 +0,0 @@ -"""Copyright (C) 2020-2021 Intel Corporation - SPDX-License-Identifier: Apache-2.0 - -Licensed subject to the terms of the separately executed evaluation -license agreement between Intel Corporation and you. -""" -from logging import getLogger -from os import getcwd -from os import path -from os import remove -from typing import Dict -from typing import Tuple -from zipfile import ZipFile - -import numpy as np -import requests - -logger = getLogger(__name__) - - -def download_data_() -> str: - """Download data. - - Returns: - string: relative path to data file - """ - pkg = 'fra-eng.zip' # Language file: change this to change the language - data_dir = 'data' - url = 'https://www.manythings.org/anki/' + pkg - filename = pkg.split('-')[0] + '.txt' - - workspace_dir = getcwd() - default_path = path.join(workspace_dir, data_dir) - pkgpath = path.join(default_path, pkg) # path to downloaded zipfile - filepath = path.join(default_path, filename) # path to extracted file - - if path.isfile(filepath): - return path.join(data_dir, filename) - try: - response = requests.get(url, headers={'User-Agent': 'openfl'}) - if response.status_code == 200: - with open(pkgpath, 'wb') as f: - f.write(response.content) - else: - print(f'Error while downloading {pkg} from {url}: Aborting!') - exit() - except Exception: - print(f'Error while downloading {pkg} from {url}: Aborting!') - exit() - - try: - with ZipFile(pkgpath, 'r') as z: - z.extract(filename, default_path) - except Exception: - print(f'Error while extracting {pkgpath}: Aborting!') - exit() - - if path.isfile(filepath): - remove(pkgpath) - return path.join(data_dir, filename) - else: - return '' - - -def import_raw_data_( - data_path: str = '', - num_samples: int = 0 -) -> Tuple[Dict[str, int], np.ndarray, np.ndarray, np.ndarray]: - """Import data. - - Returns: - dict: variable details - numpy.ndarray: encoder input data - numpy.ndarray: decoder input data - numpy.ndarray: decoder labels - """ - # Vectorize the data. - input_texts = [] - target_texts = [] - input_characters = set() - target_characters = set() - with open(data_path, 'r', encoding='utf-8') as f: - lines = f.read().split('\n') - for line in lines[: min(num_samples, len(lines) - 1)]: - input_text, target_text, _ = line.split('\t') - # We use 'tab' as the 'start sequence' character - # for the targets, and '\n' as 'end sequence' character. - target_text = '\t' + target_text + '\n' - input_texts.append(input_text) - target_texts.append(target_text) - for char in input_text: - if char not in input_characters: - input_characters.add(char) - for char in target_text: - if char not in target_characters: - target_characters.add(char) - - input_characters = sorted(input_characters) - target_characters = sorted(target_characters) - num_encoder_tokens = len(input_characters) - num_decoder_tokens = len(target_characters) - max_encoder_seq_length = max([len(txt) for txt in input_texts]) - max_decoder_seq_length = max([len(txt) for txt in target_texts]) - - details = {'num_samples': len(input_texts), - 'num_encoder_tokens': num_encoder_tokens, - 'num_decoder_tokens': num_decoder_tokens, - 'max_encoder_seq_length': max_encoder_seq_length, - 'max_decoder_seq_length': max_decoder_seq_length} - - input_token_index = {char: i for i, char in enumerate(input_characters)} - target_token_index = {char: i for i, char in enumerate(target_characters)} - - encoder_input_data = np.zeros( - (len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32') - - decoder_input_data = np.zeros( - (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') - - decoder_target_data = np.zeros( - (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32') - - for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): - for t, char in enumerate(input_text): - encoder_input_data[i, t, input_token_index[char]] = 1.0 - encoder_input_data[i, t + 1:, input_token_index[' ']] = 1.0 - for t, char in enumerate(target_text): - # decoder_target_data is ahead of decoder_input_data by one timestep - decoder_input_data[i, t, target_token_index[char]] = 1.0 - if t > 0: - # decoder_target_data will be ahead by one timestep - # and will not include the start character. - decoder_target_data[i, t - 1, target_token_index[char]] = 1.0 - decoder_input_data[i, t + 1:, target_token_index[' ']] = 1.0 - decoder_target_data[i, t:, target_token_index[' ']] = 1.0 - - logger.info(f'[DL]-import_raw_data: Number of samples = {len(input_texts)}') - logger.info(f'[DL]-import_raw_data: Number of unique input tokens = {num_encoder_tokens}') - logger.info(f'[DL]-import_raw_data: ' - f'Number of unique decoder tokens = {num_decoder_tokens}') - - logger.info(f'[DL]-import_raw_data: ' - f'Max sequence length for inputs = {max_encoder_seq_length}') - - logger.info(f'[DL]-import_raw_data: ' - f'Max sequence length for outputs = {max_decoder_seq_length}') - - logger.info(f'[DL]-import_raw_data: encoder_input_data = {encoder_input_data.shape}') - logger.info(f'[DL]-import_raw_data: decoder_input_data = {decoder_input_data.shape}') - logger.info(f'[DL]-import_raw_data: decoder_target_data = {decoder_target_data.shape}') - - return details, encoder_input_data, decoder_input_data, decoder_target_data - - -def get_datasets_(encoder_input_data: np.ndarray, decoder_input_data: np.ndarray, - decoder_target_data: np.ndarray, - num_samples: int, split_ratio: float) -> Dict[str, np.ndarray]: - """Create train/val. - - Returns: - dict: Results, containing the train-valid split of the dataset (split_ratio = 0.2) - """ - import random - - random.seed(42) - train_indexes = random.sample(range(num_samples), int(num_samples * (1 - split_ratio))) - valid_indexes = np.delete(range(num_samples), train_indexes) - - # Dataset creation (2 inputs , 1 output ) - encoder_train_input = encoder_input_data[train_indexes, :, :] - decoder_train_input = decoder_input_data[train_indexes, :, :] - decoder_train_labels = decoder_target_data[train_indexes, :, :] - - encoder_valid_input = encoder_input_data[valid_indexes, :, :] - decoder_valid_input = decoder_input_data[valid_indexes, :, :] - decoder_valid_labels = decoder_target_data[valid_indexes, :, :] - - results = {'encoder_train_input': encoder_train_input, - 'decoder_train_input': decoder_train_input, - 'decoder_train_labels': decoder_train_labels, - 'encoder_valid_input': encoder_valid_input, - 'decoder_valid_input': decoder_valid_input, - 'decoder_valid_labels': decoder_valid_labels} - - logger.info(f'[DL]get_datasets: encoder_train_input = {encoder_train_input.shape}') - logger.info(f'[DL]get_datasets: decoder_train_labels= {decoder_train_labels.shape}') - - return results - - -def load_shard( - collaborator_count: int, shard_num: str, data_path: str, - num_samples: int, split_ratio: float -) -> Tuple[Tuple[np.ndarray, ...], Tuple[np.ndarray, ...], Dict[str, int]]: - """Load data-shards. - - Returns: - Tuple: ( numpy.ndarray: X_train_encoder, - numpy.ndarray: X_train_decoder, - numpy.ndarray: y_train) - Tuple: ( numpy.ndarray: X_valid_encoder, - numpy.ndarray: X_valid_decoder, - numpy.ndarray: y_valid) - Dict: details, from DataLoader_utils.get_datasets - """ - details, encoder_input_data, decoder_input_data, decoder_target_data = import_raw_data_( - data_path, - num_samples - ) - - train_val_dataset = get_datasets_(encoder_input_data, decoder_input_data, - decoder_target_data, num_samples, split_ratio) - # Get the data shards - shard_num = int(shard_num) - X_train_encoder = train_val_dataset['encoder_train_input'][shard_num::collaborator_count] - X_train_decoder = train_val_dataset['decoder_train_input'][shard_num::collaborator_count] - y_train = train_val_dataset['decoder_train_labels'][shard_num::collaborator_count] - - X_valid_encoder = train_val_dataset['encoder_valid_input'][shard_num::collaborator_count] - X_valid_decoder = train_val_dataset['decoder_valid_input'][shard_num::collaborator_count] - y_valid = train_val_dataset['decoder_valid_labels'][shard_num::collaborator_count] - - logger.info(f'[DL]load_shard: X_train_encoder = {X_train_encoder.shape}') - logger.info(f'[DL]load_shard: y_train = {y_train.shape}') - - return ( - (X_train_encoder, X_train_decoder, y_train), - (X_valid_encoder, X_valid_decoder, y_valid), - details - ) diff --git a/openfl-workspace/keras_tf/nlp/src/taskrunner.py b/openfl-workspace/keras_tf/nlp/src/taskrunner.py deleted file mode 100644 index 0e9047d894..0000000000 --- a/openfl-workspace/keras_tf/nlp/src/taskrunner.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Copyright (C) 2020-2024 Intel Corporation - SPDX-License-Identifier: Apache-2.0 - -Licensed subject to the terms of the separately executed evaluation -license agreement between Intel Corporation and you. -""" -import os -os.environ["KERAS_BACKEND"] = "tensorflow" -import keras - -from openfl.federated import KerasTaskRunner - - -class KerasNLP(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, latent_dim, **kwargs): - """ - Init taskrunner. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = self.build_model(latent_dim, - self.data_loader.num_encoder_tokens, - self.data_loader.num_decoder_tokens, - **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') - - def build_model(self, latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras - """ - encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) - encoder = keras.layers.LSTM(latent_dim, return_state=True) - encoder_outputs, state_h, state_c = encoder(encoder_inputs) - - # We discard `encoder_outputs` and only keep the states. - encoder_states = [state_h, state_c] - - # Set up the decoder, using `encoder_states` as initial state. - decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) - - # We set up our decoder to return full output sequences, - # and to return internal states as well. We don't use the - # return states in the training model, but we will use them in inference. - decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) - decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) - decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') - decoder_outputs = decoder_dense(decoder_outputs) - - # Define the model that will turn - # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` - model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) - - model.compile( - optimizer="RMSprop", - loss='categorical_crossentropy', metrics=['accuracy'] - ) - - return model diff --git a/openfl-workspace/keras_torch/nlp/src/taskrunner.py b/openfl-workspace/keras_torch/nlp/src/taskrunner.py index 4fc5276043..88563452f7 100644 --- a/openfl-workspace/keras_torch/nlp/src/taskrunner.py +++ b/openfl-workspace/keras_torch/nlp/src/taskrunner.py @@ -4,8 +4,6 @@ Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. """ -import os -os.environ["KERAS_BACKEND"] = "torch" import keras from openfl.federated import KerasTaskRunner diff --git a/openfl-workspace/keras_tf/nlp/.workspace b/openfl-workspace/torch_cnn_histology/.workspace similarity index 100% rename from openfl-workspace/keras_tf/nlp/.workspace rename to openfl-workspace/torch_cnn_histology/.workspace diff --git a/openfl-workspace/torch_cnn_histology_fedcurv/.workspace b/openfl-workspace/torch_cnn_histology_fedcurv/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/torch_cnn_histology_fedcurv/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/torch_cnn_mnist/.workspace b/openfl-workspace/torch_cnn_mnist/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/torch_cnn_mnist/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/xgb_higgs/.workspace b/openfl-workspace/xgb_higgs/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/xgb_higgs/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index 6691cc3698..0642c35996 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -10,6 +10,7 @@ import copy import os +from importlib import util from warnings import catch_warnings, simplefilter import numpy as np @@ -18,7 +19,12 @@ from openfl.utilities import Metric, TensorKey, change_tags from openfl.utilities.split import split_tensor_dict_for_holdouts -os.environ["KERAS_BACKEND"] = "torch" +if util.find_spec("tensorflow") is not None: + os.environ["KERAS_BACKEND"] = "tensorflow" +elif util.find_spec("torch") is not None: + os.environ["KERAS_BACKEND"] = "torch" +elif util.find_spec("jax") is not None: + os.environ["KERAS_BACKEND"] = "jax" with catch_warnings(): simplefilter(action="ignore") diff --git a/openfl/interface/workspace.py b/openfl/interface/workspace.py index d3cb1713c5..6e042f928e 100644 --- a/openfl/interface/workspace.py +++ b/openfl/interface/workspace.py @@ -95,12 +95,13 @@ def get_templates(): Returns: list: A list of default templates. """ - - return [ - d.name - for d in WORKSPACE.glob("*") - if d.is_dir() and d.name not in ["__pycache__", "workspace", "experimental"] - ] + templates = [] + for root, dirs, files in os.walk(WORKSPACE): + if any(file.endswith(".workspace") for file in files): + for dir in str(root).split(str(WORKSPACE) + "/"): + if dir and not any(dir.startswith(prefix) for prefix in ["__pycache__", "workspace", "experimental"]): + templates.append(dir) + return templates @workspace.command(name="create") From 858b783f1bc14c8e891ca769e51a9df43410ff95 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 01:19:11 -0800 Subject: [PATCH 05/13] formating issue fix Signed-off-by: yes --- openfl/interface/workspace.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openfl/interface/workspace.py b/openfl/interface/workspace.py index 6e042f928e..990fc45fc2 100644 --- a/openfl/interface/workspace.py +++ b/openfl/interface/workspace.py @@ -96,10 +96,11 @@ def get_templates(): list: A list of default templates. """ templates = [] - for root, dirs, files in os.walk(WORKSPACE): + excluded_dirs = ["workspace", "experimental"] + for root, _, files in os.walk(WORKSPACE): if any(file.endswith(".workspace") for file in files): for dir in str(root).split(str(WORKSPACE) + "/"): - if dir and not any(dir.startswith(prefix) for prefix in ["__pycache__", "workspace", "experimental"]): + if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): templates.append(dir) return templates From 4e77c383275b8e04cb81da4a527808b4c9bfe0af Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 01:24:55 -0800 Subject: [PATCH 06/13] code changes Signed-off-by: yes --- .github/workflows/task_runner_basic_e2e.yml | 2 +- .github/workflows/taskrunner.yml | 2 +- .github/workflows/tr_docker_gramine_direct.yml | 2 +- .github/workflows/tr_docker_native.yml | 2 +- .github/workflows/ubuntu.yml | 2 +- .github/workflows/windows.yml | 2 +- Jenkinsfile | 5 ++--- docs/tutorials/taskrunner.ipynb | 2 +- tests/end_to_end/README.md | 2 +- tests/github/test_double_ws_export.py | 2 +- tests/github/test_gandlf.py | 2 +- tests/github/test_hello_federation.py | 2 +- 12 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/task_runner_basic_e2e.yml b/.github/workflows/task_runner_basic_e2e.yml index b50eedd526..b9f7832d98 100644 --- a/.github/workflows/task_runner_basic_e2e.yml +++ b/.github/workflows/task_runner_basic_e2e.yml @@ -36,7 +36,7 @@ jobs: matrix: # There are open issues for some of the models, so excluding them for now: # model_name: [ "torch_cnn_mnist", "keras_cnn_mnist", "torch_cnn_histology" ] - model_name: ["torch_cnn_mnist", "keras_cnn_mnist"] + model_name: ["torch_cnn_mnist", "keras/cnn_mnist"] python_version: ["3.10", "3.11", "3.12"] fail-fast: false # do not immediately fail if one of the combinations fail diff --git a/.github/workflows/taskrunner.yml b/.github/workflows/taskrunner.yml index a9093be4c1..77a87a16d5 100644 --- a/.github/workflows/taskrunner.yml +++ b/.github/workflows/taskrunner.yml @@ -42,4 +42,4 @@ jobs: pip install . - name: Test TaskRunner API run: | - python -m tests.github.test_hello_federation --template keras_cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model + python -m tests.github.test_hello_federation --template keras/cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model diff --git a/.github/workflows/tr_docker_gramine_direct.yml b/.github/workflows/tr_docker_gramine_direct.yml index 309351f385..2ae6936495 100644 --- a/.github/workflows/tr_docker_gramine_direct.yml +++ b/.github/workflows/tr_docker_gramine_direct.yml @@ -27,7 +27,7 @@ jobs: - name: Create workspace image run: | - fx workspace create --prefix example_workspace --template keras_cnn_mnist + fx workspace create --prefix example_workspace --template keras/cnn_mnist cd example_workspace fx plan initialize -a localhost diff --git a/.github/workflows/tr_docker_native.yml b/.github/workflows/tr_docker_native.yml index f5af424a18..2b5f490a30 100644 --- a/.github/workflows/tr_docker_native.yml +++ b/.github/workflows/tr_docker_native.yml @@ -27,7 +27,7 @@ jobs: - name: Create workspace image run: | - fx workspace create --prefix example_workspace --template keras_cnn_mnist + fx workspace create --prefix example_workspace --template keras/cnn_mnist cd example_workspace fx plan initialize -a localhost fx workspace dockerize --save --revision https://github.com/${GITHUB_REPOSITORY}.git@${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index c968e85f11..80f3ea2678 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -50,4 +50,4 @@ jobs: pip install . - name: Test TaskRunner API run: | - python -m tests.github.test_hello_federation --template keras_cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model + python -m tests.github.test_hello_federation --template keras/cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 341b93b7f1..5486ea56fc 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -48,4 +48,4 @@ jobs: pip install . - name: Test TaskRunner API run: | - python -m tests.github.test_hello_federation --template keras_cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model \ No newline at end of file + python -m tests.github.test_hello_federation --template keras/cnn_mnist --fed_workspace aggregator --col1 col1 --col2 col2 --rounds-to-train 3 --save-model output_model \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 73f919c844..8b9155410c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -6,13 +6,12 @@ def snykData = [ // CN-14619 snyk test CLI does not support -f in requirements.txt file // 'openfl-workspace_torch_cnn_histology': 'openfl-workspace/torch_cnn_histology/requirements.txt', 'openfl-workspace_torch_cnn_histology_src': 'openfl-workspace/torch_cnn_histology/src/requirements.txt', - 'openfl-workspace_keras_nlp': 'openfl-workspace/keras_nlp/requirements.txt', + 'openfl-workspace_keras_nlp': 'openfl-workspace/keras/nlp/requirements.txt', 'openfl-workspace_torch_cnn_mnist': 'openfl-workspace/torch_cnn_mnist/requirements.txt', 'openfl-workspace_torch_unet_kvasir': 'openfl-workspace/torch_unet_kvasir/requirements.txt', 'openfl-workspace_tf_cnn_histology': 'openfl-workspace/tf_cnn_histology/requirements.txt', 'openfl-workspace_tf_3dunet_brats': 'openfl-workspace/tf_3dunet_brats/requirements.txt', - 'openfl-workspace_keras_cnn_with_compression': 'openfl-workspace/keras_cnn_with_compression/requirements.txt', - 'openfl-workspace_keras_cnn_mnist': 'openfl-workspace/keras_cnn_mnist/requirements.txt', + 'openfl-workspace_keras_cnn_mnist': 'openfl-workspace/keras/cnn_mnist/requirements.txt', 'openfl-tutorials_interactive_api_pytorch_medmnist_2d_envoy': 'openfl-tutorials/interactive_api/PyTorch_MedMNIST_2D/envoy/requirements.txt', 'openfl-tutorials_interactive_api_pytorch_dogscats_vit_workspace': 'openfl-tutorials/interactive_api/PyTorch_DogsCats_ViT/workspace/requirements.txt', 'openfl-tutorials_interactive_api_pytorch_histology_envoy': 'openfl-tutorials/interactive_api/PyTorch_Histology/envoy/requirements.txt', diff --git a/docs/tutorials/taskrunner.ipynb b/docs/tutorials/taskrunner.ipynb index d19fcdc6d0..a95236e17b 100644 --- a/docs/tutorials/taskrunner.ipynb +++ b/docs/tutorials/taskrunner.ipynb @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "!fx workspace create --prefix ./mnist_example --template keras_cnn_mnist\n", + "!fx workspace create --prefix ./mnist_example --template keras/cnn_mnist\n", "%cd ./mnist_example" ] }, diff --git a/tests/end_to_end/README.md b/tests/end_to_end/README.md index 191cfd0db4..b71910632d 100644 --- a/tests/end_to_end/README.md +++ b/tests/end_to_end/README.md @@ -55,7 +55,7 @@ For example, to run Task runner (bare metal approach) with - torch_cnn_mnist mod python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py -m task_runner_basic --num_rounds 5 --num_collaborators 3 --model_name torch_cnn_mnist --disable_tls ``` -And, to run Task runner (via dockerized workspace) with keras_cnn_mnist, 2 collaborators, 3 rounds: +And, to run Task runner (via dockerized workspace) with keras/cnn_mnist, 2 collaborators, 3 rounds: ```sh python -m pytest -s tests/end_to_end/test_suites/task_runner_tests.py -m task_runner_dockerized_ws --num_rounds 3 --num_collaborators 2 --model_name keras_cnn_mnist diff --git a/tests/github/test_double_ws_export.py b/tests/github/test_double_ws_export.py index 95c9440b31..7e9b42bec3 100644 --- a/tests/github/test_double_ws_export.py +++ b/tests/github/test_double_ws_export.py @@ -22,7 +22,7 @@ def main(): for entry in iterator: if entry.name not in ['__init__.py', 'workspace', 'default']: workspace_choice.append(entry.name) - parser.add_argument('--template', default='keras_cnn_mnist', choices=workspace_choice) + parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') parser.add_argument('--col2', default='beta34unicorns') diff --git a/tests/github/test_gandlf.py b/tests/github/test_gandlf.py index a57f9f53a0..08e80e2118 100644 --- a/tests/github/test_gandlf.py +++ b/tests/github/test_gandlf.py @@ -21,7 +21,7 @@ def exec(command, directory): def main(): parser = argparse.ArgumentParser() - parser.add_argument('--template', default='keras_cnn_mnist') + parser.add_argument('--template', default='keras/cnn_mnist') parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one') parser.add_argument('--col2', default='two') diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index e6b84b8de2..2044b753c1 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -21,7 +21,7 @@ def main(): for entry in iterator: if entry.name not in ['__init__.py', 'workspace', 'default']: workspace_choice.append(entry.name) - parser.add_argument('--template', default='keras_cnn_mnist', choices=workspace_choice) + parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') parser.add_argument('--col2', default='beta34unicorns') From b6acb6e88c2534b476fd571be57a9f2a04a099e3 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 01:29:34 -0800 Subject: [PATCH 07/13] code changes Signed-off-by: yes --- tests/github/test_hello_federation.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 2044b753c1..3e86cc5e9a 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -17,10 +17,12 @@ def main(): # Test the pipeline parser = argparse.ArgumentParser() workspace_choice = [] - with os.scandir('openfl-workspace') as iterator: - for entry in iterator: - if entry.name not in ['__init__.py', 'workspace', 'default']: - workspace_choice.append(entry.name) + excluded_dirs = ['workspace', 'default'] + for root, _, files in os.walk('openfl-workspace'): + if any(file.endswith(".workspace") for file in files): + for dir in str(root).split(str('openfl-workspace') + "/"): + if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): + workspace_choice.append(dir) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') From b5eb73f1ca80330d9fed4e66d0c97a00161ab1af Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 21:13:22 -0800 Subject: [PATCH 08/13] code changes Signed-off-by: yes --- openfl-workspace/gandlf_seg_test/.workspace | 2 ++ openfl/federated/task/runner_keras.py | 7 ++++--- tests/github/test_hello_federation.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 openfl-workspace/gandlf_seg_test/.workspace diff --git a/openfl-workspace/gandlf_seg_test/.workspace b/openfl-workspace/gandlf_seg_test/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/gandlf_seg_test/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index 0642c35996..a9e5c8672e 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -19,12 +19,13 @@ from openfl.utilities import Metric, TensorKey, change_tags from openfl.utilities.split import split_tensor_dict_for_holdouts +# Set the KERAS_BACKEND environment variable based on the available deep learning framework if util.find_spec("tensorflow") is not None: - os.environ["KERAS_BACKEND"] = "tensorflow" + os.environ["KERAS_BACKEND"] = "tensorflow" # Use TensorFlow as the backend elif util.find_spec("torch") is not None: - os.environ["KERAS_BACKEND"] = "torch" + os.environ["KERAS_BACKEND"] = "torch" # Use PyTorch as the backend elif util.find_spec("jax") is not None: - os.environ["KERAS_BACKEND"] = "jax" + os.environ["KERAS_BACKEND"] = "jax" # Use JAX as the backend with catch_warnings(): simplefilter(action="ignore") diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 3e86cc5e9a..49880453e2 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -17,7 +17,7 @@ def main(): # Test the pipeline parser = argparse.ArgumentParser() workspace_choice = [] - excluded_dirs = ['workspace', 'default'] + excluded_dirs = ['workspace', 'default', "experimental"] for root, _, files in os.walk('openfl-workspace'): if any(file.endswith(".workspace") for file in files): for dir in str(root).split(str('openfl-workspace') + "/"): From 21b774afe40efdccce2ff05f4c32cd261c08e487 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 21:21:22 -0800 Subject: [PATCH 09/13] code changes Signed-off-by: yes --- tests/github/test_hello_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 49880453e2..548c2723b5 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -20,7 +20,7 @@ def main(): excluded_dirs = ['workspace', 'default', "experimental"] for root, _, files in os.walk('openfl-workspace'): if any(file.endswith(".workspace") for file in files): - for dir in str(root).split(str('openfl-workspace') + "/"): + for dir in str(root).split(str('openfl-workspace') + "\\"): if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): workspace_choice.append(dir) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) From c6405c8de36e5e48558ea4ef1a0b12410b77f494 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 21:27:34 -0800 Subject: [PATCH 10/13] code changes Signed-off-by: yes --- tests/github/test_hello_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 548c2723b5..0f3018263f 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -22,7 +22,7 @@ def main(): if any(file.endswith(".workspace") for file in files): for dir in str(root).split(str('openfl-workspace') + "\\"): if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): - workspace_choice.append(dir) + workspace_choice.append(dir.replace("openfl-workspace/", "")) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') From 0a0981de004e37d8669e5f24c5eb2cf79acff476 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 21:31:13 -0800 Subject: [PATCH 11/13] code changes Signed-off-by: yes --- tests/github/test_hello_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 0f3018263f..3c0a6d171c 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -22,7 +22,7 @@ def main(): if any(file.endswith(".workspace") for file in files): for dir in str(root).split(str('openfl-workspace') + "\\"): if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): - workspace_choice.append(dir.replace("openfl-workspace/", "")) + workspace_choice.append(dir.replace("openfl-workspace/", "").replace("\\", "/")) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') From a460f4127e34c311437b3e53c0426c236f3be0f0 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 22:12:37 -0800 Subject: [PATCH 12/13] code changes Signed-off-by: yes --- tests/github/test_hello_federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 3c0a6d171c..5fa28e7b46 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -20,9 +20,9 @@ def main(): excluded_dirs = ['workspace', 'default', "experimental"] for root, _, files in os.walk('openfl-workspace'): if any(file.endswith(".workspace") for file in files): - for dir in str(root).split(str('openfl-workspace') + "\\"): + for dir in str(root).split(str('openfl-workspace') + os.path.sep): if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): - workspace_choice.append(dir.replace("openfl-workspace/", "").replace("\\", "/")) + workspace_choice.append(dir) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons') From afe07ebc336fd8b369c327918d003958083f2268 Mon Sep 17 00:00:00 2001 From: yes Date: Mon, 6 Jan 2025 22:48:59 -0800 Subject: [PATCH 13/13] code changes Signed-off-by: yes --- openfl/interface/workspace.py | 7 ++++--- tests/github/test_hello_federation.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/openfl/interface/workspace.py b/openfl/interface/workspace.py index 990fc45fc2..238ab101d1 100644 --- a/openfl/interface/workspace.py +++ b/openfl/interface/workspace.py @@ -99,9 +99,10 @@ def get_templates(): excluded_dirs = ["workspace", "experimental"] for root, _, files in os.walk(WORKSPACE): if any(file.endswith(".workspace") for file in files): - for dir in str(root).split(str(WORKSPACE) + "/"): - if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): - templates.append(dir) + dir_path = os.path.relpath(root, WORKSPACE) + dir_path = dir_path.replace(os.sep, "/") + if dir_path and not any(dir_path.startswith(prefix) for prefix in excluded_dirs): + templates.append(dir_path) return templates diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index 5fa28e7b46..a3ef7296a8 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -20,9 +20,10 @@ def main(): excluded_dirs = ['workspace', 'default', "experimental"] for root, _, files in os.walk('openfl-workspace'): if any(file.endswith(".workspace") for file in files): - for dir in str(root).split(str('openfl-workspace') + os.path.sep): - if dir and not any(dir.startswith(prefix) for prefix in excluded_dirs): - workspace_choice.append(dir) + dir_path = os.path.relpath(root, 'openfl-workspace') + dir_path = dir_path.replace(os.sep, '/') + if dir_path and not any(dir_path.startswith(prefix) for prefix in excluded_dirs): + workspace_choice.append(dir_path) parser.add_argument('--template', default='keras/cnn_mnist', choices=workspace_choice) parser.add_argument('--fed_workspace', default='fed_work12345alpha81671') parser.add_argument('--col1', default='one123dragons')