added cover letter

SESARLab · Oct 31, 2024 · df555c5 · df555c5
1 parent 4b9c3bd
commit df555c5
Show file tree

Hide file tree

Showing 8 changed files with 24 additions and 140 deletions.
diff --git a/major review/macro.tex b/major review/macro.tex
@@ -1,5 +1,3 @@
-% please place your own definitions here and don't use \def but
-% \newcommand{}{}
 \newcommand\CH[1]{\textcolor{blue}{{CHIARA - }{#1}}}
 \newcommand\CL[1]{\textcolor{red}{{CLAUDIO - }{#1}}}
 \newcommand\MC[1]{\textcolor{green}{{MARCO - }{#1}}}
@@ -48,15 +46,13 @@
 \newcommand{\instanceChartAnnotation}{\ensuremath{\myLambda}}
 \renewcommand{\P}[1]{\ensuremath{P_{#1}}}
 \newcommand{\p}[1]{\ensuremath{p_{#1}}}
-% \newcommand{\TF}{\ensuremath{T_{\fChartFunction}}}
 \newcommand{\user}{user\,}
 \newcommand{\User}{User\,}
 \newcommand{\profile}{\emph{prf}}
 
 \newcommand{\fChartFunction}{\ensuremath{\myLambda{}}}
 
 \newcommand{\tChartFunction}{\ensuremath{G^{\myLambda,\myGamma}(\V,\E,\myLambda,\myGamma)}\xspace}
-% \newcommand{\iChartFunction}{\ensuremath{\fChartFunction,\myGamma{},\myLambda{}}}
 \newcommand{\iChartFunction}{\ensuremath{G'(\V',\E,\myLambda)}\xspace}
 \newcommand{\windowsize}{\ensuremath{|w|}}
 \newcommand{\bestcombination}{$G^*_w$}
@@ -74,8 +70,6 @@
 \newcommand{\pone}{$(service\_owner=dataset\_owner)$}
 \newcommand{\ptwo}{$(service\_owner=partner(dataset\_owner))$}
 \newcommand{\pthree}{$\langle service\_owner \neq dataset\_owner AND owner \neq partner(dataset\_owner)$}
-% \newcommand{\function}{$\instanceChartAnnotation{}$}
-% \newcommand{\function}{$\templateChartAnnotation$}
 \newcommand{\average}{\textit{average}\xspace}
 \newcommand{\wide}{\textit{wide}\xspace}
 \newcommand{\problem}{Pipeline Instantiation Process }

diff --git a/major review/main.tex b/major review/main.tex
@@ -1,7 +1,5 @@
 \documentclass[sn-mathphys-num,referee]{sn-jnl}
-\usepackage{natbib} % For citations
-
-%\usepackage{cite}
+\usepackage{natbib}
 \usepackage{graphicx}
 \usepackage{psfrag}
 \usepackage{anyfontsize}
@@ -44,11 +42,7 @@
 \begin{document}
 
 \title[Maximizing Data Quality While Ensuring Data Protection in Service-Based Data Pipelines]{Maximizing Data Quality While Ensuring Data Protection in Service-Based Data Pipelines}
-% \title[Maximizing Data Quality While Ensuring Data Protection in Service-Based Data Pipelines]{Maximizing Data Quality While Ensuring Data Protection in Service-Based Data Pipelines}
-% \title[Service-Based Data Pipelines: Maximizing Data Quality While Ensuring Data Protection Requirements]{Service-Based Data Pipelines: Maximizing Data Quality While Ensuring Data Protection Requirements}
-% \title[Service-Based Data Pipelines: Maximizing Data Quality While Ensuring Data Protection]{Service-Based Data Pipelines: Maximizing Data Quality While Ensuring Data Protection}
 \keywords{Access Control, Big Data, Data Pipelines, Data Protection, Data Quality, Privacy}
-%Data Transformation, Data Ingestion}
 
 \author[1]{\fnm{Antongiacomo} \sur{Polimeno}}\email{[email protected]}
 \author[1]{\fnm{Chiara} \sur{Braghin}}\email{[email protected]}
@@ -101,7 +95,7 @@ \section{Conclusions and Future Work}\label{sec:conclusions}
 \input{declarations}
 
 \clearpage
-\bibliography{bib_on_BigDataAccessControl}   % name your BibTeX data base
+\bibliography{bib_on_BigDataAccessControl}
 
 \end{document}
 
diff --git a/major review/metrics.tex b/major review/metrics.tex
@@ -88,7 +88,7 @@ \subsection{Heuristic}\label{subsec:heuristics}
 At each iteration $i$, a window of size \windowsize\ selects a subset of vertices in the pipeline template $\tChartFunction$, from vertices at depth $i$ to vertices at depth \windowsize$+$$i$$-$1.
 Service filtering and selection in \cref{sec:instance} are then executed to maximize quality $Q_w$ in window $w$. The heuristic returns as output the list of services instantiating all vertices at depth $i$. The sliding window $w$ is then shifted by 1 (i.e., $i$$=$$i$+1) and the filtering and selection process executed until \windowsize$+$$i$$-$1 is equal to length $l$ (max depth) of $\tChartFunction$, that is, the sliding window reaches the end of the template. In the latter case, the heuristic instantiates all remaining vertices and returns the pipeline instance $G'$.
 This strategy ensures that only services with low information loss are selected at each step, maximizing the pipeline quality \q.
-% \newenvironment{redtext}{\footnotesize	\color{gray}}{~~}
+
 \begin{figure}[!t]
         \hrule\vspace{3pt}
                 \begin{tabbing}

diff --git a/major review/pipeline_instance.tex b/major review/pipeline_instance.tex
@@ -46,12 +46,11 @@ \section{Pipeline Instance}\label{sec:instance}
       \begin{redtext}4\end{redtext}\com{for} \= each $v$ in $G^{\myLambda,\myGamma}$\\
       \begin{redtext}5\end{redtext}\tabone \vii{} = \funcname{Generate\_Vertex($v$)};\\
       \begin{redtext}6\end{redtext}\tabone $G'$ = $G'$ $\cup$ \vii;\\
-      % \begin{redtext}7\end{redtext}\commentall{For each service vertex, filter and select a service}\\
-      % \begin{redtext}8\end{redtext}\tabone\com{if} \= $v$ is a service vertex:\\
+
       \begin{redtext}7\end{redtext}\tabone $S'$ = \funcname{Filter\_Services($S^c[v]$, $v$.policies)};\\
       \begin{redtext}8\end{redtext}\tabone selectedService = \funcname{Select\_A\_Service($S'$)};\\
       \begin{redtext}9\end{redtext}\tabone \vii{}.service = selectedService;\\
-      % \begin{redtext}12\end{redtext}\tabone\com{endif};\\
+
       \begin{redtext}10\end{redtext}\com{endfor};\\
       \begin{redtext}11\end{redtext}\com{return} $G'$;\\
       \\
@@ -65,11 +64,9 @@ \section{Pipeline Instance}\label{sec:instance}
       \begin{redtext}18\end{redtext}\tabone\com{endif};\\
       \begin{redtext}18\end{redtext}\com{endfor};\\
       \begin{redtext}19\end{redtext}\com{return} $S'$;\\
-      % \\
-      % \begin{redtext}27\end{redtext}\funcname{selectService(compatibleServices)}\\
-      % \begin{redtext}28\end{redtext}\commentall{Select the best service based on a quality metric}\\
+
     \end{tabbing}
-          % \begin{redtext}29\end{redtext}\tabone\com{return} service with highest quality metric;\\
+
   \end{ourcolor}
   \hrule
   \vspace{10pt}
@@ -81,51 +78,4 @@ \section{Pipeline Instance}\label{sec:instance}
   \item \textit{Selection Algorithm} -- The selection algorithm selects one service $s'_i$ for each set $S'_{i}$ of compatible services, which instantiates the corresponding vertex $\vii{i}$$\in$$\Vp$ {\color{OurColor}(line 8-9)}. There are many ways of choosing $s'_i$, Section \ref{sec:heuristics} presents our approach based on the maximization of data \quality \emph{\q}.
 \end{enumerate}
 
-When all vertices $\vi{i}$$\in$$V$ in $G^{\myLambda,\myGamma}$ have been visited, the \pipelineInstance $G'$ is generated {\color{OurColor}(line 11)}, with a service instance $s'_i$ for each \vii{i}$\in$\Vp. Vertex \vii{i} is annotated with policies in \P{i} according to \myLambda, because policies in \P{i} are evaluated and enforced at runtime, only when the pipeline instance is triggered and before any service is executed. When policy evaluation returns \emph{true}, data transformation \TP$\in$\P{i} is applied, otherwise a default transformation that removes all data is applied.
-
-% \begin{figure}[!t]
-%   \centering
-%   \newcommand{\function}[1]{$\instanceChartAnnotation{}_{#1}$}
-%   \begin{tikzpicture}[scale=0.7]
-%     % vertexes
-%     \node[draw, circle, fill,text=white,minimum size=1 ] (sr) at (0,0) {};
-
-%     \node[draw, circle, plus,minimum size=1.5em] (plus) at (1.5,0) {};
-
-%     \node[draw, circle] (s2) at (3.5,-2) {$\sii{1}$};
-%     \node[draw, circle] (s3) at (3.5,0) {$\sii{2}$};
-%     \node[draw, circle] (s1) at (3.5,2) {$\sii{3}$};
-
-%     \node[draw, circle] (s4) at (5,0) {$\sii{4}$};
-%     \node[draw, circle] (s5) at (6.5,0) {$\sii{5}$};
-
-%     \node[draw, circle] (s6) at (8.0,0) {$\sii{6}$};
-%     \node[draw, circle] (s7) at (9.5,0) {$\sii{7}$};
-%     % Text on top
-%     \node[above] at (sr.north)  {$\vi{r}$};
-%     \node[above] at (s1.north)  {\function{3}};
-
-%     \node[above] at (s2.north)  {\function{1}};
-%     \node[above] at (s3.north)  {\function{2}};
-%     \node[above] at (s4.north)  {\function{4}};
-%     \node[above] at (s5.north)  {\function{5}};
-%     \node[above] at (s6.north)  {\function{6}};
-%     \node[above] at (s7.north)  {\function{7}};
-%     % Connection
-
-%     \draw[->] (sr) -- (plus);
-%     \draw[->] (plus) -- (s1);
-%     \draw[->] (plus) -- (s2);
-%     \draw[->] (plus) -- (s3);
-
-%     \draw[->] (s1) -- (s4);
-%     \draw[->] (s2) -- (s4);
-%     \draw[->] (s3) -- (s4);
-%     \draw[->] (s4) -- (s5);
-%     \draw[->] (s5) -- (s6);
-%     \draw[->] (s6) -- (s7);
-
-%   \end{tikzpicture}
-%   \caption{Service composition instance}
-%   \label{fig:service_composition_instance}
-% \end{figure}
+When all vertices $\vi{i}$$\in$$V$ in $G^{\myLambda,\myGamma}$ have been visited, the \pipelineInstance $G'$ is generated {\color{OurColor}(line 11)}, with a service instance $s'_i$ for each \vii{i}$\in$\Vp. Vertex \vii{i} is annotated with policies in \P{i} according to \myLambda, because policies in \P{i} are evaluated and enforced at runtime, only when the pipeline instance is triggered and before any service is executed. When policy evaluation returns \emph{true}, data transformation \TP$\in$\P{i} is applied, otherwise a default transformation that removes all data is applied.
diff --git a/major review/pipeline_template.tex b/major review/pipeline_template.tex
@@ -17,7 +17,7 @@ \subsection{Pipeline Template Definition}\label{sec:templatedefinition}
 
 \begin{definition}[Pipeline Template] \label{def:template}
   Given a service pipeline G(\V,\E), a Pipeline Template \tChartFunction is a direct acyclic graph extended with two annotation functions:
-  \begin{enumerate}%[label=\textit{\roman*}]
+  \begin{enumerate}
     \item \emph{Data Protection Annotation} \myLambda that assigns a label \myLambda(\vi{i}) to each vertex $\vi{i}\in\V_S$. Label \myLambda(\vi{i}) corresponds to a set \P{i} of policies $p_j$ to be satisfied by service $s_i$ represented by \vi{i};
     \item \emph{Functional Annotation} \myGamma that assigns a label \myGamma(\vi{i}) to each vertex $\vi{i}\in\V_S$. Label \myGamma(\vi{i}) corresponds to the functional description $F_i$ of service $s_i$ represented by \vi{i}.
   \end{enumerate}
@@ -27,8 +27,6 @@ \subsection{Pipeline Template Definition}\label{sec:templatedefinition}
 
 We note that, at this stage, the template is not yet linked to any service.
 We also note that policies $p_j$$\in$\P{i} in \myLambda(\vi{i}) are combined using logical OR, meaning that the access decision is positive if at least one policy $p_j$ evaluates to \emph{true}.
-% The pipeline template of the service pipeline of \cref{fig:reference_scenario} is depicted in \cref{fig:service_composition_template}.
-
       \subsection{Data Protection Annotation}\label{sec:nonfuncannotation}
       Data Protection Annotation \myLambda\ expresses data protection requirements in the form of access control policies. We consider an attribute-based access control model that offers flexible fine-grained authorization and adapts its standard key components to address the unique characteristics of a big data environment. Access requirements are expressed in the form of policy conditions that are defined as follows.
 
@@ -64,15 +62,15 @@ \subsection{Pipeline Template Definition}\label{sec:templatedefinition}
         \item level \emph{l2} (\tp{2}): full anonymization with first name, last name, identifier and age being anonymized.
       \end{enumerate*}
 
-Access control policies $p_j$$\in$\P{i} annotating a vertex \vi{i} in a pipeline template $G^{\myLambda,\myGamma}$ specify the data protection requirements that a candidate service must fulfill to be selected in the pipeline instance. Section~\ref{sec:instance} describes the selection process and the pipeline instance generation.
+      Access control policies $p_j$$\in$\P{i} annotating a vertex \vi{i} in a pipeline template $G^{\myLambda,\myGamma}$ specify the data protection requirements that a candidate service must fulfill to be selected in the pipeline instance. Section~\ref{sec:instance} describes the selection process and the pipeline instance generation.
 
-\subsection{Functional Annotations}\label{sec:funcannotation}
-A proper data management approach must track functional data manipulations across the entire pipeline execution, defining the functional requirements of each service operating on data.
-To this aim, each vertex \vi{i}$\in\V_S$ is annotated with a label \myGamma(\vi{i}), corresponding to the functional description $F_i$ of the service $s_i$ represented by \vi{i}.
-$F_i$ describes the functional requirements, such as API, inputs, expected outputs.
-It also specifies a set \TF{} of data transformation functions \tf{i}, which can be triggered during the execution of the corresponding service $s_i$.
+    \subsection{Functional Annotations}\label{sec:funcannotation}
+    A proper data management approach must track functional data manipulations across the entire pipeline execution, defining the functional requirements of each service operating on data.
+    To this aim, each vertex \vi{i}$\in\V_S$ is annotated with a label \myGamma(\vi{i}), corresponding to the functional description $F_i$ of the service $s_i$ represented by \vi{i}.
+  $F_i$ describes the functional requirements, such as API, inputs, expected outputs.
+    It also specifies a set \TF{} of data transformation functions \tf{i}, which can be triggered during the execution of the corresponding service $s_i$.
 
-Function $\tf{i}$$\in$$\TF{}$ can be:
+    Function $\tf{i}$$\in$$\TF{}$ can be:
 \begin{enumerate*}[label=\textit{\roman*})]
   \item an empty function \tf{\epsilon} that applies no transformation or processing on the data;
   \item an additive function \tf{a} that expands the amount of data received, for example, by integrating data from other sources;

diff --git a/major review/pipeline_template_example.tex b/major review/pipeline_template_example.tex
@@ -45,16 +45,9 @@
       \node[box, right=of s4] (s5) {$\vi{5}$};
       \node[box, right=of s5] (s7) {$\vi{6}$};
       \node[box, right=of s7] (s8) {$\vi{7}$};
-      % Text on top
+
       \node[above] at (sr.north)  {$\vi{r}$};
 
-      % \node[above] at (s1.north)  {\tp{3}};
-      % \node[above] at (s2.north)  {\tp{1}};
-      % \node[above] at (s3.north)  {\tp{2}};
-      % \node[above] at (s4.north)  {\tp{4}};
-      % \node[above] at (s5.north)  {\tp{5}};
-      % \node[above] at (s7.north)  {\tp{6}};
-      % \node[above] at (s8.north)  {\tp{7}};
 
       \node[above] at (s1.north)  {\function{v1}};
       \node[above] at (s2.north)  {\function{v2}};
@@ -89,69 +82,32 @@
   Let us consider the reference scenario introduced in \cref{sec:service_definition}.
   {\color{OurColor}\cref{fig:service_composition_template}(c) presents an example of pipeline template consisting of five stages, each one annotated with a policy in \cref{tab:anonymization}(a) and corresponding data transformations in \cref{tab:anonymization}(b).}
 
-  % 1° NODO %
+  % 1° NODE %
   The first stage in \cref{fig:service_composition_template}(c) consists of three parallel vertices \vi{1}, \vi{2}, \vi{3} for data collection.
   Data protection annotations \myLambda(\vi{1}), \myLambda(\vi{2}), \myLambda(\vi{3}) refer to policy \p{0} in \cref{fig:service_composition_template}(a) with an empty transformation \tp{0} in \cref{fig:service_composition_template}(b).
   Functional requirements \F{1}, \F{2}, \F{3}  prescribe a URI as input and the corresponding dataset as output.
 
   The second stage in \cref{fig:service_composition_template}(c) consists of vertex \vi{4}, merging the three datasets obtained at the first stage. Data protection annotation \myLambda(\vi{4}) refers to policies \p{1} and \p{2} in \cref{fig:service_composition_template}(a), which apply different data transformations depending on the relation between the dataset and the service owner.
-  % 2° NODO %
+  % 2° NODE %
   If the service owner is also the dataset owner (i.e., \pone), the dataset is not anonymized (\tp{0}). If the service owner is a partner of the dataset owner (i.e., \ptwo), the dataset is anonymized at \emph{level1} (\tp{1}). If the service owner has no partner relationship with the dataset owner, no policy applies.
   Functional requirement \F{4} prescribes $n$ datasets as input and the merged dataset as output.
 
-  % 3° NODO %
+  % 3° NODE %
   The third stage in \cref{fig:service_composition_template}(c) consists of vertex \vi{5}  for data analysis.
   Data protection annotation \myLambda(\vi{5}) refers to policies \p{1} and \p{2} in \cref{fig:service_composition_template}(a), as for the second stage.
   Functional requirement \F{5} prescribes a dataset as input and the results of the data analysis as output.
 
-  % 5° NODO %
+  % 4° NODE %
   The fourth stage in \cref{fig:service_composition_template}(c) consists of vertex \vi{6}, managing data storage. Data protection annotation \myLambda(\vi{6}) refers to policies \p{3} and \p{4} in \cref{fig:service_composition_template}(a), which apply different data transformations depending on the relation between the dataset and the service region.
   If the service region is the dataset origin (condition $(service\_region$$=$$dataset\_origin)$ in \p{3}), the dataset is anonymized at level $l_0$ (\tp{0}).
   If the service region is in a partner region (condition ($service\_region$=\{``$NY$",``$NH$"\}) in \p{4}), the dataset is anonymized at level $l_1$ (\tp{1}).
   Functional requirement \F{7} prescribes a dataset as input and the URI of the stored data as output.
 
-  % 6° NODO %
+  % 5° NODE %
   The last stage in \cref{fig:service_composition_template}(c) consists of vertex \vi{7}, responsible for data visualization.
   Data protection annotation \myLambda(\vi{7}) refers to policies \p{5} and \p{6} in \cref{fig:service_composition_template}(a), which anonymize data according to the environment where the service is executed.
   A \emph{risky} environment is defined as a region outside the owner or partner facility.
   If the environment is risky (\p{5}), the data are anonymized at level $r_0$ (\tp{3}).
   If the environment is not risky (\p{6}), the data are anonymized at level $r_1$ (\tp{4}).
   Functional requirement \F{8} prescribes a dataset as input and data visualization interface (possibly in the form of JSON file) as output.
-\end{example}
-
-% \begin{figure}
-%   \centering
-%   % \resizebox{\columnwidth}{!}{%
-%   \begin{tikzpicture}[
-%     box/.style={draw, circle},
-%     arrow/.style={->, >=stealth, thick},
-%     scale=0.9
-% ]
-% % Nodes for the stages
-% \node[box] (v1) {$\vi{1}$};
-% \node[box, below=of v1] (v2) {$\vi{2}$};
-% \node[box, below=of v2] (v3) {$\vi{3}$};
-% \node[box, right=of v2, xshift=1cm] (v4) {$\vi{4}$};
-% \node[box, right=of v4] (v5) {$\vi{5}$};
-% \node[box, right=of v5] (v6) {$\vi{6}$};
-% \node[box, right=of v6] (v7) {$\vi{7}$};
-
-% \node[above] at (v1.north)  {(\p{0}, \tp{0})};
-% \node[above] at (v2.north)  {(\p{0}, \tp{0})};
-% \node[above] at (v3.north)  {(\p{0}, \tp{0})};
-% \node[above] at (v4.north)  {(\p{1}, \tp{2})};
-% \node[above] at (v5.north)  {(\p{1}, \tp{2})};
-% \node[above] at (v6.north)  {(\p{3}, \tp{4})};
-% \node[above] at (v7.north)  {(\p{5}, \tp{6})};
-% % Arrows connecting the stages
-% \draw[arrow] (v1) -- (v4);
-% \draw[arrow] (v2) -- (v4);
-% \draw[arrow] (v3) -- (v4);
-% \draw[arrow] (v4) -- (v5);
-% \draw[arrow] (v5) -- (v6);
-% \draw[arrow] (v6) -- (v7);
-
-% \end{tikzpicture}
-%   % }
-%   \caption{\label{fig:pipeline_template_example}Example of pipeline template}
-% \end{figure}
+\end{example}