From f0463c1987936de1f2fd48e48979b816dd5e1a51 Mon Sep 17 00:00:00 2001 From: Antongiacomo Polimeno Date: Mon, 20 Nov 2023 12:30:48 +0100 Subject: [PATCH] some minor --- main.tex | 4 +- pipeline_instance_example.tex | 50 +++++++++++++++-- pipeline_template_example.tex | 101 ++++++++++++++++------------------ system_model.tex | 5 +- 4 files changed, 100 insertions(+), 60 deletions(-) diff --git a/main.tex b/main.tex index 347c4ec..173dc88 100644 --- a/main.tex +++ b/main.tex @@ -27,6 +27,7 @@ \usepackage[inline]{enumitem} \usepackage{xcolor} \usepackage{amssymb} +\usepackage{pifont} \graphicspath{{Images/}} \definecolor{commentsColor}{rgb}{0.497495, 0.497587, 0.497464} \definecolor{keywordsColor}{rgb}{0.000000, 0.000000, 0.635294} @@ -34,7 +35,8 @@ \theoremstyle{definition} \newtheorem{definition}{Definition}[section] \newtheorem{example}{Example}[section] - +\newcommand{\xmark}{\ding{55}}% +\newcommand{\cmark}{\ding{51}}% \input{macro} \begin{document} diff --git a/pipeline_instance_example.tex b/pipeline_instance_example.tex index bc5e125..483c48d 100644 --- a/pipeline_instance_example.tex +++ b/pipeline_instance_example.tex @@ -1,15 +1,55 @@ +% \begin{example}\label{ex:instance} + +% As an example, let us consider the pipeline template \tChartFunction in \cref{sec:example}. +% It includes three key stages in our reference scenario: data anonymization (\vi{1}), data enrichment (\vi{2}), and data aggregation (\vi{3}), each stage with its policy $p$. + + + +% The filtering algorithm then returns the set $S'=\{s_1,s_2\}$. +% The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$. + +% The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$. + +% The same logic is applied to the \vi{2} and \vi{3}. + +% \end{example} + + \begin{example}\label{ex:instance} As an example, let us consider the pipeline template \tChartFunction in \cref{sec:example}. - It includes three key stages in our reference scenario: data anonymization (\vi{1}), data enrichment (\vi{2}), and data aggregation (\vi{3}), each stage with its policy $p$. + In this example, we consider a subset of the vertices, namely \vi{4}, \vi{6} and \vi{7}. + + + \begin{table*} + \def\arraystretch{1.5} + \caption{Instance example}\label{tab:instance_example} + \centering + \begin{tabular}{l|l|l|c|c} + \textbf{Vertex$\rightarrow$Policy} & \textbf{Candidate} & \textbf{Profile} & \textbf{Filtering} & \textbf{Ranking} \\ + \multirow{ 3}{*}{\vi{4} $\rightarrow$ \p{1},\p{2},\p{3} } & $\s{11}$ & service\_owner = "CT" & \cmark & 1 \\ + & $\s{12}$ & service\_owner = "NY" & \cmark & 2 \\ + & $\s{13}$ & service\_owner = "CA" & \cmark & 3 \\ + \hline + \multirow{ 3}{*}{\vi{6} $\rightarrow$ \p{5},\p{6} } & $\s{21}$ & region = "CA" & \xmark & -- \\ + & $\s{22}$ & region = "NY" & \cmark & 2 \\ + & $\s{23}$ & region = "CT" & \cmark & 1 \\ + \hline + \multirow{ 3}{*}{\vi{7} $\rightarrow$ \p{7},\p{8},\p{9} } & $\s{31}$ & region = "CA" & \xmark & -- \\ + & $\s{32}$ & region = "NY" & \cmark & 2 \\ + & $\s{33}$ & region = "CT" & \cmark & 1 \\ + \hline + \end{tabular} + \end{table*} + \begin{table*}[htbp] + \centering - The filtering algorithm then returns the set $S'=\{s_1,s_2\}$. - The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$. + \caption{A test caption} + \label{table2} + \end{table*} - The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$. - The same logic is applied to the \vi{2} and \vi{3}. \end{example} \ No newline at end of file diff --git a/pipeline_template_example.tex b/pipeline_template_example.tex index 257d41b..e7f1fa7 100644 --- a/pipeline_template_example.tex +++ b/pipeline_template_example.tex @@ -3,6 +3,40 @@ \subsection{Example}\label{sec:example} \newcommand{\ptwo}{$\langle service\_owner=partner(dataset\_owner) \rangle$} \newcommand{\pthree}{$\langle service\_owner \neq dataset\_owner AND owner \neq partner(dataset\_owner)$} +\begin{table*}[ht!] + \def\arraystretch{1.5} + \centering + \caption{Anonymization policies}\label{tab:anonymization} + + + + \begin{tabular}[t]{c|c|l} + \textbf{Vertex} & \textbf{Policy} & \policy{subject}{object}{action}{environment}{transformation} \\ \hline + \vi{1},\vi{2},\vi{3} & $\p{0}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{0}} \\ + \vi{4},\vi{6} & $\p{1}$ & \policy{\pone}{dataset}{READ}{ANY}{\tp{0}} \\ + \vi{4},\vi{6} & $\p{2}$ & \policy{\ptwo}{dataset}{READ}{ANY}{\tp{1}} \\ + \vi{4},\vi{6} & $\p{3}$ & \policy{\pthree}{dataset}{READ}{ANY}{\tp{2}} \\ + \vi{5} & $\p{4}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{2}} \\ + \vi{7} & $\p{5}$ & \policy{$\langle service\_region=``FACILITY"\rangle$}{dataset}{WRITE}{ANY}{\tp{0}} \\ + \vi{7} & $\p{6}$ & \policy{$\langle service\_region=``\{CT,NY,NH\}"\rangle$}{dataset}{WRITE}{ANY}{\tp{1}} \\ + \vi{8} & $\p{7}$ & \policy{$\langle user\_role=``Connecticut Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{0}} \\ + \vi{8} & $\p{8}$ & \policy{$\langle user\_role=``Partner Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{1}} \\ + \vi{8} & $\p{9}$ & \policy{$\langle user\_role=``Any"\rangle$}{dataset} {READ}{ANY}{\tp{2}} \\ + \end{tabular} + +\end{table*} +\begin{table*}[ht!] + \def\arraystretch{1.5} + \centering + \caption{Anonymization levels}\label{tab:levels} + \begin{tabular}[t]{c|c|l} + \textbf{\tf{i}} & \textbf{Level} & \textbf{Columns Anonymized} \\\hline + \tp{0} & Level0 & $anon(\varnothing)$ \\ + \tp{1} & level1 & $anon(FIRST\_NAME, LAST\_NAME)$ \\ + \tp{2} & level2 & $anon(FIRST\_NAME, LAST\_NAME, IDENTIFIER, AGE)$ \\ + \end{tabular} + +\end{table*} We present an example of pipeline template focusing on policy annotations. The pipeline template consists of five stages, and each stage is annotated with a policy presented in \cref{tab:anonymization}. \hl{Connecticut Prison (CTP) is the service user executing the pipeline. New York Prison and New Hampshire Prison are two partner DOC.}\hl{SPOSTARE NEL SYSTEM MODEL? SI, MA DATA OWNER DIPENDE DAL DATASET, HO MESSO SERVICE USER} We recall that \cref{tab:dataset} shows a sample of our reference dataset. @@ -20,7 +54,7 @@ \subsection{Example}\label{sec:example} The policy annotation \p{0} is linked with an empty transformation. The functional requirement necessitates a URI as input, and the output is the downloaded dataset. -The second stage incorporates a sole vertex, which merges the three datasets obtained from the previous stages and is associated with three policies (\p{1},\p{2},\p{3}). +The second stage incorporates a sole vertex (\vi{4}), which merges the three datasets obtained from the previous stages and is associated with three policies (\p{1},\p{2},\p{3}). The policies are evaluated during the node execution: %if the service profile matches with the data owner ($owner = ``CTP"$), \p{1} is satisfied and the data is not anonymized (\tf{1}); %if the service profile matches with a partner of the owner ($owner = ``CTP"$), \p{2} is satisfied and the data is partially anonymized (\tf{2}); @@ -28,77 +62,38 @@ \subsection{Example}\label{sec:example} % 2° NODO % %he second vertex is responsible for enriching the data. %The service downloads the dataset from partner facilities and enhances the dataset of the Connecticut facility. -if the service is by the data owner (\pone), which means that if the service owner is the same as the dataset owner, the dataset is not anonymized (\tf{0}). -if the service is a partner of the data owner (\ptwo), which means that if the service owner is a partner of the dataset owner, the dataset is anonymized level1 (\tf{1}). -if the service is a third party (\pthree), which means that if the service owner is neither the dataset owner nor a partner of the dataset owner, the dataset is anonymized level2 (\tf{2}). +if the service is by the data owner (\pone), which means that if the service owner is the same as the dataset owner, the dataset is not anonymized (\tp{0}). +if the service is a partner of the data owner (\ptwo), which means that if the service owner is a partner of the dataset owner, the dataset is anonymized level1 (\tp{1}). +if the service is a third party (\pthree), which means that if the service owner is neither the dataset owner nor a partner of the dataset owner, the dataset is anonymized level2 (\tp{2}). The functional requirement specifies $n$ datasets as input, and the output is the merged dataset. % 3° NODO % The third stage, is responsible both for data analysis/statistics and machine learning tasks. The stage is composed of two alternative vertices respectively \vi{4}, \vi{5}. Data analytics vertex adopts policies analogous to the second stage. The logic remains consistent: -if the service profile matches with the data owner (\pone), \p{1} is satisfied and the data computation is made on clean data (\tf{0}); -if the service profile matches with a partner of the owner (\ptwo), \p{2} is satisfied and the data computation is made on data anonymized level1 (\tf{1}); -if the service profile doesn't match with a partner nor with the owner (\pthree), \p{3} is satisfied and the data computation is made on data anonymized level2 (\tf{2}). +if the service profile matches with the data owner (\pone), \p{1} is satisfied and the data computation is made on clean data (\tp{0}); +if the service profile matches with a partner of the owner (\ptwo), \p{2} is satisfied and the data computation is made on data anonymized level1 (\tp{1}); +if the service profile doesn't match with a partner nor with the owner (\pthree), \p{3} is satisfied and the data computation is made on data anonymized level2 (\tp{2}). The functional requirement specifies a dataset as input, and the output is the computed statistics. % 4° NODO % -Machine Learning vertex adopts always a level2 anonymization (\p(4)) to prevent personal identifiers from entering into the machine learning algorithm/model (\tf{2}). +Machine Learning vertex adopts always a level2 anonymization (\p(4)) to prevent personal identifiers from entering into the machine learning algorithm/model (\tp{2}). The functional requirement specifies a dataset as input, and the output is the trained model or an inference. % 5° NODO % The fifth stage manages data storage. -If the service is within the facility itself ($\langle service,region=FACILITY"\rangle$), \p{5} is satisfied, resulting in data anonymization level1 (\tf{1}). -Otherwise, if the service is in a partner region ($\langle service,region={CT,NY,NH}"\rangle$), the data undergo anonymization level2 (\tf{2}). +If the service is within the facility itself ($\langle service,region=FACILITY"\rangle$), \p{5} is satisfied, resulting in data anonymization level1 (\tp{1}). +Otherwise, if the service is in a partner region ($\langle service,region={CT,NY,NH}"\rangle$), the data undergo anonymization level2 (\tp{2}). The functional requirement specifies some\hl{?} data as input, and the output is the URI of the stored data. % 6° NODO % The sixth stage is responsible for data visualization. -As stated in policy annotation \p{6}, if the user is member of the facility itself, the data are anonymized level0 (\tf{0}). -If the user is member of a partner facility, the data are anonymized level1 (\tf{2}). -If the user is not member of the facility nor a partner, the data are anonymized level2 (\tf{3}). +As stated in policy annotation \p{6}, if the user is member of the facility itself, the data are anonymized level0 (\tp{0}). +If the user is member of a partner facility, the data are anonymized level1 (\tp{2}). +If the user is not member of the facility nor a partner, the data are anonymized level2 (\tp{3}). The functional requirement specifies a dataset as input, and the output is the visualization of the data. %In summary, this section has delineated a comprehensive pipeline template. This illustrative pipeline serves as a blueprint, highlighting the role of policy implementation in safeguarding data protection across diverse operational stages. -\begin{table*}[ht!] - \centering - \caption{Anonymization policies}\label{tab:anonymization} - % \bgroup - \def\arraystretch{1.5} - \begin{tabular}[t]{c|c|l} - \textbf{Vertex} & \textbf{Policy} & \policy{subject}{object}{action}{environment}{transformation} \\ \hline - \vi{1},\vi{2},\vi{3} & $\p{0}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{0}} \\ - \vi{4} & $\p{1}$ & \policy{\pone}{dataset}{READ}{ANY}{\tp{1}} \\ - \vi{4} & $\p{2}$ & \policy{\ptwo}{dataset}{READ}{ANY}{\tp{2}} \\ - \vi{4} & $\p{3}$ & \policy{\pthree}{dataset}{READ}{ANY}{\tp{3}} \\ - \vi{5} & $\p{4}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{3}} \\ - \vi{6} & $\p{5}$ & \policy{$\langle service\_region=``FACILITY"\rangle$}{dataset}{WRITE}{ANY}{\tp{1}} \\ - \vi{6} & $\p{6}$ & \policy{$\langle service\_region=``\{CT,NY,NH\}"\rangle$}{dataset}{WRITE}{ANY}{\tp{2}} \\ - \vi{7} & $\p{7}$ & \policy{$\langle user\_role=``Connecticut Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{1}} \\ - \vi{7} & $\p{7}$ & \policy{$\langle user\_role=``Partner Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{2}} \\ - \vi{7} & $\p{8}$ & \policy{$\langle user\_role=``Any"\rangle$}{dataset} {READ}{ANY}{\tp{3}} \\ - \end{tabular} - \begin{tabular}[t]{c|c|c} - \textbf{\tf{i}} & \textbf{Level} & \textbf{Columns Anonymized} \\\hline - \tp{0} & Level0 & $anon(\varnothing)$ \\ - \tp{1} & level1 & $anon(FIRST\_NAME, LAST\_NAME)$ \\ - \tp{2} & level2 & $anon(FIRST\_NAME, LAST\_NAME, IDENTIFIER, AGE)$ \\ - \end{tabular} - % % \begin{tabular}[t]{ccc} - % % \toprule - % % \textbf{Stage} & \textbf{Policy} & \textbf{Service} \\ - % % \midrule - % % \vi{1} & $p_1$ & $s_1$ \\ - % % \vi{1} & $p_1$ & $s_2$ \\ - % % \vi{2} & $p_2$ & $s_3$ \\ - % % \vi{2} & $p_2$ & $s_4$ \\ - % % \vi{3} & $p_3$ & $s_5$ \\ - % % \vi{3} & $p_3$ & $s_6$ \\ - % % \bottomrule - % % \end{tabular} - % % \hspace{1em} - % \egroup -\end{table*} -\vspace{2em} + \begin{figure}[ht!] \centering diff --git a/system_model.tex b/system_model.tex index 9f5114e..df1f30c 100644 --- a/system_model.tex +++ b/system_model.tex @@ -50,7 +50,10 @@ \subsection{Service Pipeline and Reference Scenario}\label{sec:service_definitio % \hl{QUESTO E' MERGE (M). IO PENSAVO DIVENTASSE UN NODO $v_i$. NEL CASO CAMBIANDO LA DEFINIZIONE 3.1 DOVE NON ESISTONO PIU' I NODI MERGE E JOIN.} \item \emph{Data analysis}, including statistical measures like averages, medians, and clustering-based statistics; \item \emph{Machine learning task}, including training and inference; - \item \emph{Data storage}, including the storage of the results in the corresponding states. Specifically, one copy remains in Connecticut (where sensitive information in the source dataset is not protected), while two additional copies are distributed to New York and New Hampshire (with sensitive information from the source dataset being safeguarded).\hl{SPIEGHIAMO BENE LA PARENTESI} + \item \emph{Data storage}, including the storage of the results in the corresponding states. + Specifically, one copy remains in Connecticut (where sensitive information in the source dataset is not protected), + while two additional copies are distributed to New York and New Hampshire (with sensitive information from the source dataset being safeguarded) + .\hl{SPIEGHIAMO BENE LA PARENTESI} \item \emph{Data visualization}, including the visualization of the results.\hl{STORAGE E VISUALIZATION NON LI FACEVAMO ALTERNATIVE CON UN NODO FINE?} \end{enumerate*}