Skip to content

Commit

Permalink
some minor
Browse files Browse the repository at this point in the history
  • Loading branch information
antongiacomo committed Nov 20, 2023
1 parent 35df652 commit f0463c1
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 60 deletions.
4 changes: 3 additions & 1 deletion main.tex
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@
\usepackage[inline]{enumitem}
\usepackage{xcolor}
\usepackage{amssymb}
\usepackage{pifont}
\graphicspath{{Images/}}
\definecolor{commentsColor}{rgb}{0.497495, 0.497587, 0.497464}
\definecolor{keywordsColor}{rgb}{0.000000, 0.000000, 0.635294}
\definecolor{stringColor}{rgb}{0.558215, 0.000000, 0.135316}
\theoremstyle{definition}
\newtheorem{definition}{Definition}[section]
\newtheorem{example}{Example}[section]

\newcommand{\xmark}{\ding{55}}%
\newcommand{\cmark}{\ding{51}}%
\input{macro}

\begin{document}
Expand Down
50 changes: 45 additions & 5 deletions pipeline_instance_example.tex
Original file line number Diff line number Diff line change
@@ -1,15 +1,55 @@
% \begin{example}\label{ex:instance}

% As an example, let us consider the pipeline template \tChartFunction in \cref{sec:example}.
% It includes three key stages in our reference scenario: data anonymization (\vi{1}), data enrichment (\vi{2}), and data aggregation (\vi{3}), each stage with its policy $p$.



% The filtering algorithm then returns the set $S'=\{s_1,s_2\}$.
% The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$.

% The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$.

% The same logic is applied to the \vi{2} and \vi{3}.

% \end{example}


\begin{example}\label{ex:instance}

As an example, let us consider the pipeline template \tChartFunction in \cref{sec:example}.
It includes three key stages in our reference scenario: data anonymization (\vi{1}), data enrichment (\vi{2}), and data aggregation (\vi{3}), each stage with its policy $p$.
In this example, we consider a subset of the vertices, namely \vi{4}, \vi{6} and \vi{7}.


\begin{table*}
\def\arraystretch{1.5}
\caption{Instance example}\label{tab:instance_example}

\centering
\begin{tabular}{l|l|l|c|c}

\textbf{Vertex$\rightarrow$Policy} & \textbf{Candidate} & \textbf{Profile} & \textbf{Filtering} & \textbf{Ranking} \\
\multirow{ 3}{*}{\vi{4} $\rightarrow$ \p{1},\p{2},\p{3} } & $\s{11}$ & service\_owner = "CT" & \cmark & 1 \\
& $\s{12}$ & service\_owner = "NY" & \cmark & 2 \\
& $\s{13}$ & service\_owner = "CA" & \cmark & 3 \\
\hline
\multirow{ 3}{*}{\vi{6} $\rightarrow$ \p{5},\p{6} } & $\s{21}$ & region = "CA" & \xmark & -- \\
& $\s{22}$ & region = "NY" & \cmark & 2 \\
& $\s{23}$ & region = "CT" & \cmark & 1 \\
\hline
\multirow{ 3}{*}{\vi{7} $\rightarrow$ \p{7},\p{8},\p{9} } & $\s{31}$ & region = "CA" & \xmark & -- \\
& $\s{32}$ & region = "NY" & \cmark & 2 \\
& $\s{33}$ & region = "CT" & \cmark & 1 \\
\hline
\end{tabular}
\end{table*}
\begin{table*}[htbp]
\centering

The filtering algorithm then returns the set $S'=\{s_1,s_2\}$.
The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$.
\caption{A test caption}
\label{table2}
\end{table*}

The comparison algorithm is finally applied to $S'$ and returns a ranking of the services according to quality metrics, where $s_1$ is ranked first. $s_1$ is then selected and integrated in $\vii{1}\in \Vp$.

The same logic is applied to the \vi{2} and \vi{3}.

\end{example}
101 changes: 48 additions & 53 deletions pipeline_template_example.tex
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,40 @@ \subsection{Example}\label{sec:example}
\newcommand{\ptwo}{$\langle service\_owner=partner(dataset\_owner) \rangle$}
\newcommand{\pthree}{$\langle service\_owner \neq dataset\_owner AND owner \neq partner(dataset\_owner)$}

\begin{table*}[ht!]
\def\arraystretch{1.5}
\centering
\caption{Anonymization policies}\label{tab:anonymization}



\begin{tabular}[t]{c|c|l}
\textbf{Vertex} & \textbf{Policy} & \policy{subject}{object}{action}{environment}{transformation} \\ \hline
\vi{1},\vi{2},\vi{3} & $\p{0}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{0}} \\
\vi{4},\vi{6} & $\p{1}$ & \policy{\pone}{dataset}{READ}{ANY}{\tp{0}} \\
\vi{4},\vi{6} & $\p{2}$ & \policy{\ptwo}{dataset}{READ}{ANY}{\tp{1}} \\
\vi{4},\vi{6} & $\p{3}$ & \policy{\pthree}{dataset}{READ}{ANY}{\tp{2}} \\
\vi{5} & $\p{4}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{2}} \\
\vi{7} & $\p{5}$ & \policy{$\langle service\_region=``FACILITY"\rangle$}{dataset}{WRITE}{ANY}{\tp{0}} \\
\vi{7} & $\p{6}$ & \policy{$\langle service\_region=``\{CT,NY,NH\}"\rangle$}{dataset}{WRITE}{ANY}{\tp{1}} \\
\vi{8} & $\p{7}$ & \policy{$\langle user\_role=``Connecticut Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{0}} \\
\vi{8} & $\p{8}$ & \policy{$\langle user\_role=``Partner Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{1}} \\
\vi{8} & $\p{9}$ & \policy{$\langle user\_role=``Any"\rangle$}{dataset} {READ}{ANY}{\tp{2}} \\
\end{tabular}

\end{table*}
\begin{table*}[ht!]
\def\arraystretch{1.5}
\centering
\caption{Anonymization levels}\label{tab:levels}
\begin{tabular}[t]{c|c|l}
\textbf{\tf{i}} & \textbf{Level} & \textbf{Columns Anonymized} \\\hline
\tp{0} & Level0 & $anon(\varnothing)$ \\
\tp{1} & level1 & $anon(FIRST\_NAME, LAST\_NAME)$ \\
\tp{2} & level2 & $anon(FIRST\_NAME, LAST\_NAME, IDENTIFIER, AGE)$ \\
\end{tabular}

\end{table*}

We present an example of pipeline template focusing on policy annotations. The pipeline template consists of five stages, and each stage is annotated with a policy presented in \cref{tab:anonymization}. \hl{Connecticut Prison (CTP) is the service user executing the pipeline. New York Prison and New Hampshire Prison are two partner DOC.}\hl{SPOSTARE NEL SYSTEM MODEL? SI, MA DATA OWNER DIPENDE DAL DATASET, HO MESSO SERVICE USER} We recall that \cref{tab:dataset} shows a sample of our reference dataset.

Expand All @@ -20,85 +54,46 @@ \subsection{Example}\label{sec:example}
The policy annotation \p{0} is linked with an empty transformation.
The functional requirement necessitates a URI as input, and the output is the downloaded dataset.

The second stage incorporates a sole vertex, which merges the three datasets obtained from the previous stages and is associated with three policies (\p{1},\p{2},\p{3}).
The second stage incorporates a sole vertex (\vi{4}), which merges the three datasets obtained from the previous stages and is associated with three policies (\p{1},\p{2},\p{3}).
The policies are evaluated during the node execution:
%if the service profile matches with the data owner ($owner = ``CTP"$), \p{1} is satisfied and the data is not anonymized (\tf{1});
%if the service profile matches with a partner of the owner ($owner = ``CTP"$), \p{2} is satisfied and the data is partially anonymized (\tf{2});
%if the service profile doesn't match with a partner nor with the owner ($owner = ``CTP"$), \p{3} is satisfied and the data is fully anonymized (\tf{3}).
% 2° NODO %
%he second vertex is responsible for enriching the data.
%The service downloads the dataset from partner facilities and enhances the dataset of the Connecticut facility.
if the service is by the data owner (\pone), which means that if the service owner is the same as the dataset owner, the dataset is not anonymized (\tf{0}).
if the service is a partner of the data owner (\ptwo), which means that if the service owner is a partner of the dataset owner, the dataset is anonymized level1 (\tf{1}).
if the service is a third party (\pthree), which means that if the service owner is neither the dataset owner nor a partner of the dataset owner, the dataset is anonymized level2 (\tf{2}).
if the service is by the data owner (\pone), which means that if the service owner is the same as the dataset owner, the dataset is not anonymized (\tp{0}).
if the service is a partner of the data owner (\ptwo), which means that if the service owner is a partner of the dataset owner, the dataset is anonymized level1 (\tp{1}).
if the service is a third party (\pthree), which means that if the service owner is neither the dataset owner nor a partner of the dataset owner, the dataset is anonymized level2 (\tp{2}).
The functional requirement specifies $n$ datasets as input, and the output is the merged dataset.
% 3° NODO %
The third stage, is responsible both for data analysis/statistics and machine learning tasks.
The stage is composed of two alternative vertices respectively \vi{4}, \vi{5}.

Data analytics vertex adopts policies analogous to the second stage. The logic remains consistent:
if the service profile matches with the data owner (\pone), \p{1} is satisfied and the data computation is made on clean data (\tf{0});
if the service profile matches with a partner of the owner (\ptwo), \p{2} is satisfied and the data computation is made on data anonymized level1 (\tf{1});
if the service profile doesn't match with a partner nor with the owner (\pthree), \p{3} is satisfied and the data computation is made on data anonymized level2 (\tf{2}).
if the service profile matches with the data owner (\pone), \p{1} is satisfied and the data computation is made on clean data (\tp{0});
if the service profile matches with a partner of the owner (\ptwo), \p{2} is satisfied and the data computation is made on data anonymized level1 (\tp{1});
if the service profile doesn't match with a partner nor with the owner (\pthree), \p{3} is satisfied and the data computation is made on data anonymized level2 (\tp{2}).
The functional requirement specifies a dataset as input, and the output is the computed statistics.
% 4° NODO %
Machine Learning vertex adopts always a level2 anonymization (\p(4)) to prevent personal identifiers from entering into the machine learning algorithm/model (\tf{2}).
Machine Learning vertex adopts always a level2 anonymization (\p(4)) to prevent personal identifiers from entering into the machine learning algorithm/model (\tp{2}).
The functional requirement specifies a dataset as input, and the output is the trained model or an inference.
% 5° NODO %
The fifth stage manages data storage.
If the service is within the facility itself ($\langle service,region=FACILITY"\rangle$), \p{5} is satisfied, resulting in data anonymization level1 (\tf{1}).
Otherwise, if the service is in a partner region ($\langle service,region={CT,NY,NH}"\rangle$), the data undergo anonymization level2 (\tf{2}).
If the service is within the facility itself ($\langle service,region=FACILITY"\rangle$), \p{5} is satisfied, resulting in data anonymization level1 (\tp{1}).
Otherwise, if the service is in a partner region ($\langle service,region={CT,NY,NH}"\rangle$), the data undergo anonymization level2 (\tp{2}).
The functional requirement specifies some\hl{?} data as input, and the output is the URI of the stored data.
% 6° NODO %
The sixth stage is responsible for data visualization.
As stated in policy annotation \p{6}, if the user is member of the facility itself, the data are anonymized level0 (\tf{0}).
If the user is member of a partner facility, the data are anonymized level1 (\tf{2}).
If the user is not member of the facility nor a partner, the data are anonymized level2 (\tf{3}).
As stated in policy annotation \p{6}, if the user is member of the facility itself, the data are anonymized level0 (\tp{0}).
If the user is member of a partner facility, the data are anonymized level1 (\tp{2}).
If the user is not member of the facility nor a partner, the data are anonymized level2 (\tp{3}).
The functional requirement specifies a dataset as input, and the output is the visualization of the data.

%In summary, this section has delineated a comprehensive pipeline template. This illustrative pipeline serves as a blueprint, highlighting the role of policy implementation in safeguarding data protection across diverse operational stages.
\begin{table*}[ht!]
\centering
\caption{Anonymization policies}\label{tab:anonymization}
% \bgroup
\def\arraystretch{1.5}

\begin{tabular}[t]{c|c|l}
\textbf{Vertex} & \textbf{Policy} & \policy{subject}{object}{action}{environment}{transformation} \\ \hline
\vi{1},\vi{2},\vi{3} & $\p{0}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{0}} \\
\vi{4} & $\p{1}$ & \policy{\pone}{dataset}{READ}{ANY}{\tp{1}} \\
\vi{4} & $\p{2}$ & \policy{\ptwo}{dataset}{READ}{ANY}{\tp{2}} \\
\vi{4} & $\p{3}$ & \policy{\pthree}{dataset}{READ}{ANY}{\tp{3}} \\
\vi{5} & $\p{4}$ & \policy{ANY}{dataset}{READ}{ANY}{\tp{3}} \\
\vi{6} & $\p{5}$ & \policy{$\langle service\_region=``FACILITY"\rangle$}{dataset}{WRITE}{ANY}{\tp{1}} \\
\vi{6} & $\p{6}$ & \policy{$\langle service\_region=``\{CT,NY,NH\}"\rangle$}{dataset}{WRITE}{ANY}{\tp{2}} \\
\vi{7} & $\p{7}$ & \policy{$\langle user\_role=``Connecticut Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{1}} \\
\vi{7} & $\p{7}$ & \policy{$\langle user\_role=``Partner Prison Officer"\rangle$}{dataset} {READ}{ANY}{\tp{2}} \\
\vi{7} & $\p{8}$ & \policy{$\langle user\_role=``Any"\rangle$}{dataset} {READ}{ANY}{\tp{3}} \\
\end{tabular}
\begin{tabular}[t]{c|c|c}
\textbf{\tf{i}} & \textbf{Level} & \textbf{Columns Anonymized} \\\hline
\tp{0} & Level0 & $anon(\varnothing)$ \\
\tp{1} & level1 & $anon(FIRST\_NAME, LAST\_NAME)$ \\
\tp{2} & level2 & $anon(FIRST\_NAME, LAST\_NAME, IDENTIFIER, AGE)$ \\
\end{tabular}
% % \begin{tabular}[t]{ccc}
% % \toprule
% % \textbf{Stage} & \textbf{Policy} & \textbf{Service} \\
% % \midrule
% % \vi{1} & $p_1$ & $s_1$ \\
% % \vi{1} & $p_1$ & $s_2$ \\
% % \vi{2} & $p_2$ & $s_3$ \\
% % \vi{2} & $p_2$ & $s_4$ \\
% % \vi{3} & $p_3$ & $s_5$ \\
% % \vi{3} & $p_3$ & $s_6$ \\
% % \bottomrule
% % \end{tabular}
% % \hspace{1em}
% \egroup
\end{table*}

\vspace{2em}


\begin{figure}[ht!]
\centering
Expand Down
5 changes: 4 additions & 1 deletion system_model.tex
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ \subsection{Service Pipeline and Reference Scenario}\label{sec:service_definitio
% \hl{QUESTO E' MERGE (M). IO PENSAVO DIVENTASSE UN NODO $v_i$. NEL CASO CAMBIANDO LA DEFINIZIONE 3.1 DOVE NON ESISTONO PIU' I NODI MERGE E JOIN.}
\item \emph{Data analysis}, including statistical measures like averages, medians, and clustering-based statistics;
\item \emph{Machine learning task}, including training and inference;
\item \emph{Data storage}, including the storage of the results in the corresponding states. Specifically, one copy remains in Connecticut (where sensitive information in the source dataset is not protected), while two additional copies are distributed to New York and New Hampshire (with sensitive information from the source dataset being safeguarded).\hl{SPIEGHIAMO BENE LA PARENTESI}
\item \emph{Data storage}, including the storage of the results in the corresponding states.
Specifically, one copy remains in Connecticut (where sensitive information in the source dataset is not protected),
while two additional copies are distributed to New York and New Hampshire (with sensitive information from the source dataset being safeguarded)
.\hl{SPIEGHIAMO BENE LA PARENTESI}
\item \emph{Data visualization}, including the visualization of the results.\hl{STORAGE E VISUALIZATION NON LI FACEVAMO ALTERNATIVE CON UN NODO FINE?}
\end{enumerate*}

Expand Down

0 comments on commit f0463c1

Please sign in to comment.