From 501a707e95b9680a48cf188eb77180bff13bce7a Mon Sep 17 00:00:00 2001 From: Antongiacomo Polimeno Date: Tue, 14 Nov 2023 14:32:50 +0100 Subject: [PATCH] Cleaning up --- .github/workflows/blank.yml | 3 -- ... Access Control - extension.code-workspace | 3 ++ main.tex | 5 ++- pipeline_instance_example.tex | 12 +++---- pipeline_template.tex | 27 -------------- requirements.tex | 11 +++++- system_model.tex | 35 +++++++++++++++---- 7 files changed, 52 insertions(+), 44 deletions(-) diff --git a/.github/workflows/blank.yml b/.github/workflows/blank.yml index 2d839e6..5a332af 100644 --- a/.github/workflows/blank.yml +++ b/.github/workflows/blank.yml @@ -28,9 +28,6 @@ jobs: - name: Generate PDF document uses: hspaans/latexmk-action@v1 - with: - format: pdf - filename: main.tex - name: Create Release id: create_release diff --git a/Big Data Access Control - extension.code-workspace b/Big Data Access Control - extension.code-workspace index bdeae39..447a65b 100644 --- a/Big Data Access Control - extension.code-workspace +++ b/Big Data Access Control - extension.code-workspace @@ -203,6 +203,9 @@ ".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.*.so": true, ".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.*.pyd": true, ".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.pyx": true + }, + "files.associations": { + "*.tikz": "latex" } } } \ No newline at end of file diff --git a/main.tex b/main.tex index d7ea757..7aa3dfe 100644 --- a/main.tex +++ b/main.tex @@ -66,7 +66,10 @@ \input{introduction} \input{motivations} \input{system_model} -\input{service_composition} +\input{pipeline_template.tex} +\input{pipeline_template_example.tex} +\input{pipeline_instance.tex} +\input{pipeline_instance_example.tex} \input{metrics} \input{experiment} \input{related} diff --git a/pipeline_instance_example.tex b/pipeline_instance_example.tex index 6e2a7e9..6744af2 100644 --- a/pipeline_instance_example.tex +++ b/pipeline_instance_example.tex @@ -51,9 +51,9 @@ % \[ \forall S \in \mathrm{S}_{C} \exists \iChartFunction(S) = \mathrm{S}_{1} \] -\begin{figure} - \centering - \includegraphics[width=\columnwidth]{serviceDetail.pdf} - \caption{Service Detail} - \label{fig:service_detail}reinstall remote-ssh -\end{figure} +% \begin{figure} +% \centering +% \includegraphics[width=\columnwidth]{serviceDetail.pdf} +% \caption{Service Detail} +% \label{fig:service_detail}reinstall remote-ssh +% \end{figure} diff --git a/pipeline_template.tex b/pipeline_template.tex index cf45544..bdf74df 100644 --- a/pipeline_template.tex +++ b/pipeline_template.tex @@ -76,33 +76,6 @@ \subsection{Pipeline Template Definition}\label{sec:templatedefinition} - \begin{figure}[ht!] - \centering - \begin{tikzpicture}[scale=0.9] - % Nodes - \node[draw ] (node1) at (0,1) {$\s{r}$}; - \node[draw] (node2) at (0,2){$Data preparation $}; - \node[draw] (node3) at (0,3) {$\timesOperator$}; - \node[draw] (node4) at (-2,4) {$statistics$}; - \node[draw] (node5) at (2,4) {$Machine Learning$}; - \node[draw] (node6) at (0,5) {$\timesOperator$}; - \node[draw] (node65) at (0,6) {$Storage$}; - - \draw[->] (node1) -- (node2); - \draw[->] (node2) -- (node3); - \draw[->] (node3) -- (node4); - \draw[->] (node3) -- (node5); - \draw[->] (node5) -- (node6); - \draw[->] (node4) -- (node6); - \draw[->] (node6) -- (node65); - - - \end{tikzpicture} - \caption{Pipeline Template} - \label{fig:service_composition_template} - \end{figure} - - % \begin{figure}[ht!] % \centering diff --git a/requirements.tex b/requirements.tex index 47fde95..58420bd 100644 --- a/requirements.tex +++ b/requirements.tex @@ -1,5 +1,14 @@ \section{Requirements and System Model}\label{sec:requirements} -Big data is highly dependent on cloud-edge computing, which makes extensive use of multi-tenancy. Multi-tenancy permits sharing one instance of infrastructures, platforms or applications by multiple tenants to optimize costs. This leads to common scenarios where a service provider offers subscription-based analytics capabilities in the cloud, or a single data lake is accessed by multiple customers. Thus, it is a common situation to have a big data pipeline where data and services belong to various organizations, posing a serious risk of potential privacy and security violation. In the following of this section, we present our system model (Section \ref{sec:systemmodel}), the requirements driving our work (Section \ref{sec:accesscontrol_req}), and our reference scenario (Section \ref{sec:reference}). +Big data is highly dependent on cloud-edge computing, which makes extensive use of multi-tenancy. +Multi-tenancy permits sharing one instance of infrastructures, +platforms or applications by multiple tenants to optimize costs. +This leads to common scenarios where a service provider offers subscription-based analytics capabilities in the cloud, +or a single data lake is accessed by multiple customers. +Thus, it is a common situation to have a big data pipeline where data and services belong to various organizations, +posing a serious risk of potential privacy and security violation. +In the following of this section, we present our system model (Section \ref{sec:systemmodel}), +the requirements driving our work (Section \ref{sec:accesscontrol_req}), +and our reference scenario (Section \ref{sec:reference}). \subsection{System Model}\label{sec:systemmodel} Our system is a coalition of organizations that collaboratively execute a Big Data pipeline where \emph{i)} organizations join without necessarily integrating their cloud-based or on-premises ICT infrastructures, \emph{ii)} collaborative processes are carried out involving multi-party data collection and analytics, iii) the pipeline can be executed in a centralized or distributed deployment. diff --git a/system_model.tex b/system_model.tex index 915d1a9..5cb7f5e 100644 --- a/system_model.tex +++ b/system_model.tex @@ -62,12 +62,35 @@ \subsection{Service Pipeline and Reference Scenario}\label{sec:service_definitio We note that the template requires the execution of the entire service within a single country. If the data needs to be transmitted beyond the boundaries of Connecticut, data protection measures must be implemented. -A visual representation of the flow is presented in Figure \ref{fig:service_composition_example}. - -\begin{figure} - \includegraphics[width=0.98\columnwidth]{service_composition_example} - \caption{Service composition example.}\label{fig:service_composition_example} - +A visual representation of the flow is presented in Figure \ref{fig:reference_scenario}. +\begin{figure}[ht!] + \centering + \begin{tikzpicture}[scale=0.9] + % Nodes + \node[draw ] (node1) at (0,8) {$\s{r}$}; + \node[draw] (node2) at (0,7){Data Preparation }; + \node[draw] (node25) at (0,6){Data Enrichment}; + \node[draw] (node3) at (0,5) {$\timesOperator$}; + \node[draw] (node4) at (-2,4) {Data Analysis}; + \node[draw] (node5) at (2,4) {Machine Learning}; + \node[draw] (node6) at (0,3) {$\timesOperator$}; + \node[draw] (node7) at (-2,2) {Data Storage}; + \node[draw] (node8) at (2,2) {Data Visualization}; + \node[draw] (node9) at (0,1) {$\timesOperator$}; + \draw[->] (node1) -- (node2); + \draw[->] (node2) -- (node25); + \draw[->] (node25) -- (node3); + \draw[->] (node3) -- (node4); + \draw[->] (node3) -- (node5); + \draw[->] (node5) -- (node6); + \draw[->] (node4) -- (node6); + \draw[->] (node6) -- (node7); + \draw[->] (node6) -- (node8); + \draw[->] (node8) -- (node9); + \draw[->] (node7) -- (node9); + \end{tikzpicture} + \caption{Reference Scenario} + \label{fig:reference_scenario} \end{figure} The adopted dataset\footnote{https://data.ct.gov/Public-Safety/Accused-Pre-Trial-Inmates-in-Correctional-Faciliti/b674-jy6w} exhibits a straightforward row-and-column structure.