Skip to content

Commit

Permalink
Add tests for Fed EVal
Browse files Browse the repository at this point in the history
Signed-off-by: Chaurasiya, Payal <[email protected]>
  • Loading branch information
payalcha committed Jan 22, 2025
1 parent 4a1a135 commit 56d8817
Show file tree
Hide file tree
Showing 10 changed files with 716 additions and 216 deletions.
150 changes: 150 additions & 0 deletions .github/workflows/task_runner_eval_dws_e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
---
# Task Runner E2E tests for dockerized approach

name: Task_Runner_Eval_DWS_E2E # Please do not modify the name as it is used in the composite action

on:
workflow_dispatch:
inputs:
num_rounds:
description: "Number of rounds to train"
required: false
default: "5"
type: string
num_collaborators:
description: "Number of collaborators"
required: false
default: "2"
type: string

permissions:
contents: read

# Environment variables common for all the jobs
env:
NUM_ROUNDS: ${{ inputs.num_rounds || '5' }}
NUM_COLLABORATORS: ${{ inputs.num_collaborators || '2' }}

jobs:
test_with_tls_dockerized_ws:
name: tr_tls_dockerized_ws
runs-on: ubuntu-22.04
timeout-minutes: 15
strategy:
matrix:
model_name: ["keras_cnn_mnist"]
python_version: ["3.10", "3.11", "3.12"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests with TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_dockerized_ws --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }}
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_tls_dockerized_ws"

test_with_non_tls_dockerized_ws:
name: tr_non_tls_dockerized_ws
runs-on: ubuntu-22.04
timeout-minutes: 15
strategy:
matrix:
model_name: ["keras_cnn_mnist"]
python_version: ["3.10"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests without TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_dockerized_ws --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }} --disable_tls
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_non_tls_dockerized_ws"

test_with_no_client_auth_dockerized_ws:
name: tr_no_client_auth_dockerized_ws
runs-on: ubuntu-22.04
timeout-minutes: 15
strategy:
matrix:
model_name: ["keras_cnn_mnist"]
python_version: ["3.10"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests without TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_dockerized_ws --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }} --disable_client_auth
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_no_client_auth_dockerized_ws"
158 changes: 158 additions & 0 deletions .github/workflows/task_runner_eval_e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
---
# Task Runner E2E tests for bare metal approach

name: Task_Runner_Eval_E2E # Please do not modify the name as it is used in the composite action

on:
schedule:
- cron: "0 0 * * *" # Run every day at midnight
workflow_dispatch:
inputs:
num_rounds:
description: "Number of rounds to train"
required: false
default: "5"
type: string
num_collaborators:
description: "Number of collaborators"
required: false
default: "2"
type: string

permissions:
contents: read

# Environment variables common for all the jobs
env:
NUM_ROUNDS: ${{ inputs.num_rounds || '5' }}
NUM_COLLABORATORS: ${{ inputs.num_collaborators || '2' }}

jobs:
test_with_tls:
name: tr_tls
runs-on: ubuntu-22.04
timeout-minutes: 30
strategy:
matrix:
# Models like XGBoost (xgb_higgs) and torch_cnn_histology require runners with higher memory and CPU to run.
# Thus these models are excluded from the matrix for now.
model_name: ["torch_cnn_mnist", "keras_cnn_mnist"]
python_version: ["3.10", "3.11", "3.12"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests with TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_basic --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }}
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_tls"

test_with_non_tls:
name: tr_non_tls
runs-on: ubuntu-22.04
timeout-minutes: 30
strategy:
matrix:
# Testing this scenario only for torch_cnn_mnist model and python 3.10
# If required, this can be extended to other models and python versions
model_name: ["torch_cnn_mnist"]
python_version: ["3.10"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests without TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_basic --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }} --disable_tls
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_non_tls"

test_with_no_client_auth:
name: tr_no_client_auth
runs-on: ubuntu-22.04
timeout-minutes: 30
strategy:
matrix:
# Testing this scenario for keras_cnn_mnist model and python 3.10
# If required, this can be extended to other models and python versions
model_name: ["keras_cnn_mnist"]
python_version: ["3.10"]
fail-fast: false # do not immediately fail if one of the combinations fail

env:
MODEL_NAME: ${{ matrix.model_name }}
PYTHON_VERSION: ${{ matrix.python_version }}

steps:
- name: Checkout OpenFL repository
id: checkout_openfl
uses: actions/[email protected]
with:
fetch-depth: 2 # needed for detecting changes
submodules: "true"
token: ${{ secrets.GITHUB_TOKEN }}

- name: Pre test run
uses: ./.github/actions/tr_pre_test_run
if: ${{ always() }}

- name: Run Task Runner E2E tests without TLS
id: run_tests
run: |
python -m pytest -s tests/end_to_end/test_suites/tr_with_eval_tests.py \
-m task_runner_basic --model_name ${{ env.MODEL_NAME }} \
--num_rounds ${{ env.NUM_ROUNDS }} --num_collaborators ${{ env.NUM_COLLABORATORS }} --disable_client_auth
echo "Task runner end to end test run completed"
- name: Post test run
uses: ./.github/actions/tr_post_test_run
if: ${{ always() }}
with:
test_type: "tr_no_client_auth"
8 changes: 6 additions & 2 deletions tests/end_to_end/models/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Aggregator():
2. Starting the aggregator
"""

def __init__(self, agg_domain_name=None, workspace_path=None, container_id=None):
def __init__(self, agg_domain_name=None, workspace_path=None, container_id=None, eval_scope=False):
"""
Initialize the Aggregator class
Args:
Expand All @@ -31,6 +31,7 @@ def __init__(self, agg_domain_name=None, workspace_path=None, container_id=None)
self.agg_domain_name = agg_domain_name
self.workspace_path = workspace_path
self.container_id = container_id
self.eval_scope = eval_scope

def generate_sign_request(self):
"""
Expand Down Expand Up @@ -63,8 +64,11 @@ def start(self, res_file, with_docker=False):
log.info(f"Starting {self.name}")
res_file = res_file if not with_docker else os.path.basename(res_file)
error_msg = "Failed to start the aggregator"
command = "fx aggregator start"
if self.eval_scope:
command = f"{command} --task_group evaluation"
fh.run_command(
"fx aggregator start",
command=command,
error_msg=error_msg,
container_id=self.container_id,
workspace_path=self.workspace_path if not with_docker else "",
Expand Down
14 changes: 12 additions & 2 deletions tests/end_to_end/models/model_owner.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def certify_collaborator(self, collaborator_name, zip_name):
raise e
return True

def modify_plan(self, param_config, plan_path):
def modify_plan(self, param_config, plan_path, eval_scope=False):
"""
Modify the plan to train the model
Args:
Expand Down Expand Up @@ -153,9 +153,19 @@ def modify_plan(self, param_config, plan_path):
data["network"]["settings"]["require_client_auth"] = param_config.require_client_auth
data["network"]["settings"]["use_tls"] = param_config.use_tls

if eval_scope:
# Remove all existing task_groups
data['assigner']['settings']['task_groups'] = []
# Add new task_groups for evaluation scope with task as aggregated_model_validation
new_task_group = {
"name": "evaluation",
"percentage": 1.0,
"tasks": ["aggregated_model_validation"]
}
data['assigner']['settings']['task_groups'].append(new_task_group)

with open(plan_file, "w+") as write_file:
yaml.dump(data, write_file)

log.info(f"Modified the plan with provided parameters.")
except Exception as e:
log.error(f"Failed to modify the plan: {e}")
Expand Down
Loading

0 comments on commit 56d8817

Please sign in to comment.