From 7e64b1de6cc2d1a989a02f50ef0121c36a0dd5aa Mon Sep 17 00:00:00 2001 From: Bernd Doser Date: Mon, 14 Oct 2024 11:14:02 +0200 Subject: [PATCH] default highlight --- _quarto.yml | 2 +- flyte.qmd | 23 ++++++++++++++++------- hpc-ai.qmd | 4 ++-- images/flyte-test-setup.svg | 2 +- images/projectx.svg | 2 +- index.qmd | 8 +++++++- streamflow.qmd | 27 ++++++++++++++++++--------- workflows.qmd | 21 ++++++++++++++------- 8 files changed, 60 insertions(+), 29 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 3ac6e0f..83799fc 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -19,6 +19,6 @@ format: logo: images/HITS_RGB_eng.jpg footer: "ML Workflow Orchestration (Bernd Doser, HITS)" slide-number: true - highlight-style: a11y + # highlight-style: a11y # code-block-height: 650px # width: 1300 diff --git a/flyte.qmd b/flyte.qmd index d58f521..8ba70a3 100644 --- a/flyte.qmd +++ b/flyte.qmd @@ -49,18 +49,24 @@ def mean(values: List[float]) -> float: ## Flyte Workflows -- Workflows doesn’t perform computations it’s used to structure tasks +Workflows are used to structure the task execution graph -```python + + +::: {style="font-size: 80%;"} +``` {.python} +@task +def generate_processed_corpus() -> List[List[str]]: @workflow -def nlp_workflow(target_word: str = "computer") -> workflow_outputs: +def nlp_workflow(target_word: str = "computer") -> [Dict[str, float], float, Dict[int, List[str]]]: corpus = generate_processed_corpus() model_wv = train_word2vec_model(training_data=corpus, hyperparams=Word2VecModelHyperparams()) lda_topics = train_lda_model(corpus=corpus, hyperparams=LDAModelHyperparams()) @@ -68,7 +74,11 @@ def nlp_workflow(target_word: str = "computer") -> workflow_outputs: distance = word_movers_distance(model_ser=model_wv.model) return similar_words, distance, lda_topics ``` +::: +::: aside +[Source](https://docs.flyte.org/en/latest/flytesnacks/examples/nlp_processing/word2vec_and_lda.html) +::: ## Containerized Tasks @@ -77,7 +87,7 @@ A Flyte task operates within its own container and runs on a Kubernetes pod ![](images/union_tasks.png){fig-align="center"} -## ImageSpec +## Container Image Specifications - Customize the container image without a Dockerfile - Container image is build at registration and pushed to a container registry @@ -101,7 +111,6 @@ custom_image = ImageSpec( - Clone and recover executions -## Flyte Demo +# [Flyte Demo](https://github.com/BerndDoser/flyte-mnist) -- [GitHub repo](https://github.com/BerndDoser/flyte-mnist) -- [PyTorch MNIST Classifier](https://docs.flyte.org/en/latest/flytesnacks/examples/mnist_classifier/pytorch_single_node_and_gpu.html) + diff --git a/hpc-ai.qmd b/hpc-ai.qmd index 4da19d3..fe32195 100644 --- a/hpc-ai.qmd +++ b/hpc-ai.qmd @@ -1,5 +1,5 @@ # Infrastructure for Machine Learning Workflows -## {background-image="images/ai_vs_hpc.png"} +## {background-image="images/ai_vs_hpc.png" transition="fade"} [The background image was generated with FLUX on fal.ai\ using the prompt 'Visualize two worlds showing the difference between HPC and AI infrastructure']{.top_white} @@ -10,7 +10,7 @@ using the prompt 'Visualize two worlds showing the difference between HPC and AI ::: {layout='[1,1]' layout-valign=center} ::: n1 -- SLURM +- Slurm - MPI - File Storage diff --git a/images/flyte-test-setup.svg b/images/flyte-test-setup.svg index 004adb2..80298d8 100644 --- a/images/flyte-test-setup.svg +++ b/images/flyte-test-setup.svg @@ -1,4 +1,4 @@ -
k3s-server (VM)
FlytePropeller
MinIO
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s-agent (GPU)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
4x A40
\ No newline at end of file +
k3s-server (VM)
FlytePropeller
MinIO
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s-agent (GPU)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
k3s pods
(Flyte tasks)
4x A40
\ No newline at end of file diff --git a/images/projectx.svg b/images/projectx.svg index 6305036..c287dfb 100644 --- a/images/projectx.svg +++ b/images/projectx.svg @@ -1,4 +1,4 @@ -
Spherinator
Training data
Preprocessing
Engine
(PEST)
Training
PINK
Zerinator
Preprocessed Data
Model Store
(ONNX)
Deployment
Aladin
Jasmin
Prediction
HiPSter
User data
Project X
Spectra
Time series
Data cubes
Point clouds
Images
\ No newline at end of file +
Spherinator
Training Data
Preprocessing
Engine
(PEST)
Training
PINK
Zerinator
Model Store
(ONNX)
Deployment
Aladin
Jasmin
Prediction
HiPSter
User Data
Project X
Preprocessed Data
Spectra
Time Series
Data Cubes
Point Clouds
Images
\ No newline at end of file diff --git a/index.qmd b/index.qmd index 285e638..5848a4d 100644 --- a/index.qmd +++ b/index.qmd @@ -1,9 +1,15 @@ ## Agenda -- General Overview to Workflow Orchestration +- Introduction to Workflow Orchestration + +. . . + - Solutions for Workflow Orchestration - StreamFlow - Flyte + +. . . + - Infrastructure for Machine Learning Workflows - HPC vs AI/ML - File vs Object Storage diff --git a/streamflow.qmd b/streamflow.qmd index f14213d..e12d758 100644 --- a/streamflow.qmd +++ b/streamflow.qmd @@ -10,7 +10,7 @@ - Kubernetes ::: {.callout-note} -[BioExcel Building Blocks](https://bioexcel.eu/biobb-new/) is a **showpiece** using CWL for biomolecular simulation. +[BioExcel Building Blocks](https://bioexcel.eu/biobb-new/) is a great example using CWL for biomolecular simulation. ::: @@ -27,10 +27,13 @@ From [StreamFlow documentation](https://streamflow.di.unito.it/documentation/0.2 ::: {layout='[1,1]'} ::: n1 +::: {style="font-size: 80%;"} ``` {.yaml filename="workflow.cwl"} {{< include code/workflow.cwl >}} ``` ::: +::: + ::: n2 - Declare the workflow in a YAML file - `inputs` @@ -46,16 +49,21 @@ From [StreamFlow documentation](https://streamflow.di.unito.it/documentation/0.2 ::: {layout='[1,1]'} ::: n1 +::: {style="font-size: 80%;"} ``` {.yaml filename="workflow.cwl"} {{< include code/workflow.cwl >}} ``` ::: +::: + ::: n2 +::: {style="font-size: 80%;"} ``` {.yaml filename="train.cwl"} {{< include code/train.cwl >}} ``` ::: ::: +::: ## CWL Implementations in Production @@ -67,11 +75,13 @@ From [CWL Documentation](https://www.commonwl.org/implementations/) ::: -## Visualizing CWL +## Visualizing CWL Graphs ![](images/rRNA_selection_cwl.svg){fig-align="center"} -[Try it out!](https://view.commonwl.org/){top=-100px} +::: aside +From [CWL Viewer](https://view.commonwl.org/) +::: ## StreamFlow Deployment @@ -91,14 +101,13 @@ From [CWL Documentation](https://www.commonwl.org/implementations/) ::: -## StreamFlow: Cross-Facility Federated Learning +## StreamFlow: Federated Learning ![](images/streamflow-xffl.webp){fig-align="center"} - -## StreamFlow Demo - -- [GitHub repo to Seminar example](https://github.com/BerndDoser/streamflow-example) -- [Example of the developers of StreamFlow](https://github.com/alpha-unito/streamflow-fl) +::: aside +[GitHub](https://github.com/alpha-unito/streamflow-fl) +::: +# [StreamFlow Demo](https://github.com/BerndDoser/streamflow-example) diff --git a/workflows.qmd b/workflows.qmd index 2448fa8..bd5136e 100644 --- a/workflows.qmd +++ b/workflows.qmd @@ -1,21 +1,24 @@ -## Motivation by [SPACE CoE](https://www.space-coe.eu/) +## Motivated by [SPACE CoE](https://www.space-coe.eu/) ![](images/SPACE_logo.png){.absolute top=-20 right=0 width=200} ::: {.fs34} -**Goal**: Astrophysical Simulations for Exascale Computing\ -**Challenge**: Analyze large-scale simulation data of Petabytes\ +**Goal**: Astrophysical simulations for exascale computing\ +**Challenge**: Analyze large-size simulation data (~ Petabytes)\ **Solution**: Efficient ML-based workflows using dimensionality reduction ::: -::: {layout='[1,2.4,0.7]' layout-valign=center} +::: {layout='[1,2.4,0.7]'} ::: n1 + +**Input** + ![](images/illustris.png) ::: {.fs20} ::: {.callout-note icon=false} -## Simulation Data (~ PetaBytes) +## Simulation Data (~ Petabytes) Time snapshots storing particle positions, velocities, and other properties, e.g. from OpenGADGET (IllustrisTNG) @@ -24,10 +27,14 @@ Time snapshots storing particle positions, velocities, and other properties, e.g ::: ::: n2 +**Workflow** + ![](images/projectx.svg) ::: ::: n3 +**Output** + ![](images/P404_f2.png) ::: {.fs20} @@ -51,14 +58,14 @@ K. L. Polsterer, B. Doser, A. Fehlner and S. Trujillo-Gomez [ADASS (2024)](). - **Reproducibility**: Ensure your experiments are reproducible by tracking the code, data, model, and environment. - **Resource management**: Manage resources efficiently by running tasks in parallel and optimizing resource usage. -- **Scalability**: Scale your workflows to handle large datasets and complex pipelines. +- **Scalability**: Scale your workflows to handle large amount of data and complex pipelines. - **Monitoring**: Track the progress of workflows and monitor their performance and results. - **Collaboration**: Enable collaboration between team members by sharing code, data, and results. ::: -## General Requirements for Workflows +## Requirements on Workflows Orchestration - Define node requirements (e.g. CPU, memory, GPU) - Control runtime environment with containers