Skip to content

Commit 88cb086

Browse files
committed
minor instance
1 parent 42e91ad commit 88cb086

File tree

7 files changed

+298
-126
lines changed

7 files changed

+298
-126
lines changed

Big Data Access Control - extension.code-workspace

Lines changed: 196 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,201 @@
88
"cSpell.words": [
99
"Jaccard"
1010
],
11-
"window.commandCenter": false
11+
"window.commandCenter": false,
12+
"files.exclude": {
13+
"**/*.aux": true,
14+
"**/*.lof": true,
15+
"**/*.log": true,
16+
"**/*.lot": true,
17+
"**/*.fls": true,
18+
"**/*.out": true,
19+
"**/*.toc": true,
20+
"**/*.fmt": true,
21+
"**/*.fot": true,
22+
"**/*.cb": true,
23+
"**/*.cb2": true,
24+
"**/.*.lb": true,
25+
"**/*.dvi": true,
26+
"**/*.xdv": true,
27+
"**/*-converted-to.*": true,
28+
"**/main.pdf": true,
29+
"**/*.bbl": true,
30+
"**/*.bcf": true,
31+
"**/*.blg": true,
32+
"**/*-blx.aux": true,
33+
"**/*-blx.bib": true,
34+
"**/*.run.xml": true,
35+
"**/*.fdb_latexmk": true,
36+
"**/*.synctex": true,
37+
"**/*.synctex(busy)": true,
38+
"**/*.synctex.gz": true,
39+
"**/*.synctex.gz(busy)": true,
40+
"**/*.pdfsync": true,
41+
"**/latex.out/": true,
42+
"**/*.alg": true,
43+
"**/*.loa": true,
44+
"**/acs-*.bib": true,
45+
"**/*.thm": true,
46+
"**/*.nav": true,
47+
"**/*.pre": true,
48+
"**/*.snm": true,
49+
"**/*.vrb": true,
50+
"**/*.soc": true,
51+
"**/*.cut": true,
52+
"**/*.cpt": true,
53+
"**/*.spl": true,
54+
"**/*.ent": true,
55+
"**/*.lox": true,
56+
"**/*.mf": true,
57+
"**/*.mp": true,
58+
"**/*.t[1-9]": true,
59+
"**/*.t[1-9][0-9]": true,
60+
"**/*.tfm": true,
61+
"**/*.end": true,
62+
"**/*.?end": true,
63+
"**/*.[1-9]": true,
64+
"**/*.[1-9][0-9]": true,
65+
"**/*.[1-9][0-9][0-9]": true,
66+
"**/*.[1-9]R": true,
67+
"**/*.[1-9][0-9]R": true,
68+
"**/*.[1-9][0-9][0-9]R": true,
69+
"**/*.eledsec[1-9]": true,
70+
"**/*.eledsec[1-9]R": true,
71+
"**/*.eledsec[1-9][0-9]": true,
72+
"**/*.eledsec[1-9][0-9]R": true,
73+
"**/*.eledsec[1-9][0-9][0-9]": true,
74+
"**/*.eledsec[1-9][0-9][0-9]R": true,
75+
"**/*.acn": true,
76+
"**/*.acr": true,
77+
"**/*.glg": true,
78+
"**/*.glo": true,
79+
"**/*.gls": true,
80+
"**/*.glsdefs": true,
81+
"**/*.lzo": true,
82+
"**/*.lzs": true,
83+
"**/*.slg": true,
84+
"**/*.slo": true,
85+
"**/*.sls": true,
86+
"**/*.gnuplot": true,
87+
"**/*.table": true,
88+
"**/*-gnuplottex-*": true,
89+
"**/*.gaux": true,
90+
"**/*.glog": true,
91+
"**/*.gtex": true,
92+
"**/*.4ct": true,
93+
"**/*.4tc": true,
94+
"**/*.idv": true,
95+
"**/*.lg": true,
96+
"**/*.trc": true,
97+
"**/*.xref": true,
98+
"**/*.brf": true,
99+
"**/*-concordance.tex": true,
100+
"**/*-tikzDictionary": true,
101+
"**/*.lol": true,
102+
"**/*.ltjruby": true,
103+
"**/*.idx": true,
104+
"**/*.ilg": true,
105+
"**/*.ind": true,
106+
"**/*.maf": true,
107+
"**/*.mlf": true,
108+
"**/*.mlt": true,
109+
"**/*.mtc[0-9]*": true,
110+
"**/*.slf[0-9]*": true,
111+
"**/*.slt[0-9]*": true,
112+
"**/*.stc[0-9]*": true,
113+
"**/_minted*": true,
114+
"**/*.pyg": true,
115+
"**/*.mw": true,
116+
"**/*.newpax": true,
117+
"**/*.nlg": true,
118+
"**/*.nlo": true,
119+
"**/*.nls": true,
120+
"**/*.pax": true,
121+
"**/*.pdfpc": true,
122+
"**/*.sagetex.sage": true,
123+
"**/*.sagetex.py": true,
124+
"**/*.sagetex.scmd": true,
125+
"**/*.wrt": true,
126+
"**/svg-inkscape/": true,
127+
"**/*.sout": true,
128+
"**/*.sympy": true,
129+
"**/sympy-plots-for-*.tex/": true,
130+
"**/*.upa": true,
131+
"**/*.upb": true,
132+
"**/*.pytxcode": true,
133+
"**/pythontex-files-*/": true,
134+
"**/*.listing": true,
135+
"**/*.loe": true,
136+
"**/*.dpth": true,
137+
"**/*.md5": true,
138+
"**/*.auxlock": true,
139+
"**/*.ptc": true,
140+
"**/*.tdo": true,
141+
"**/*.hst": true,
142+
"**/*.ver": true,
143+
"**/*.lod": true,
144+
"**/*.xcp": true,
145+
"**/*.xmpi": true,
146+
"**/*.xdy": true,
147+
"**/*.xyc": true,
148+
"**/*.xyd": true,
149+
"**/*.ttt": true,
150+
"**/*.fff": true,
151+
"**/TSWLatexianTemp*": true,
152+
"**/*.bak": true,
153+
"**/*.sav": true,
154+
"**/.texpadtmp": true,
155+
"**/*.lyx~": true,
156+
"**/*.backup": true,
157+
"**/.*.swp": true,
158+
"**/*~[0-9]*": true,
159+
"**/*.tps": true,
160+
"./auto/*": true,
161+
"**/*.el": true,
162+
"**/*-tags.tex": true,
163+
"**/*.sta": true,
164+
"**/*.lpz": true,
165+
"**/*.xwm": true,
166+
"**/*.vtc": true,
167+
"**/*.glstex": true,
168+
"**/.DS_Store": true,
169+
"**/.AppleDouble": true,
170+
"**/.LSOverride": true,
171+
"**/Icon": true,
172+
"**/._*": true,
173+
"**/.DocumentRevisions-V100": true,
174+
"**/.fseventsd": true,
175+
"**/.Spotlight-V100": true,
176+
"**/.TemporaryItems": true,
177+
"**/.Trashes": true,
178+
"**/.VolumeIcon.icns": true,
179+
"**/.com.apple.timemachine.donotpresent": true,
180+
"**/.AppleDB": true,
181+
"**/.AppleDesktop": true,
182+
"**/Network Trash Folder": true,
183+
"**/Temporary Items": true,
184+
"**/.apdisk": true,
185+
"**/*.icloud": true,
186+
".vscode/*": true,
187+
".vscode/settings.json": false,
188+
".vscode/tasks.json": false,
189+
".vscode/launch.json": false,
190+
".vscode/extensions.json": false,
191+
".vscode/*.code-snippets": false,
192+
"**/.history/": true,
193+
"**/*.vsix": true,
194+
"**/.history": true,
195+
"**/.ionide": true,
196+
"**/.venv/": true,
197+
"*/*.csv": true,
198+
".venv/**/*": true,
199+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/linux_and_mac/attach_x86.dylib": true,
200+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/linux_and_mac/attach_x86_64.dylib": true,
201+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/linux_and_mac/attach_linux_x86.o": true,
202+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd_attach_to_process/linux_and_mac/attach_linux_x86_64.o": true,
203+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.*.so": true,
204+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.*.pyd": true,
205+
".venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/_pydevd_frame_eval/pydevd_frame_evaluator.pyx": true
206+
}
12207
}
13208
}

macro.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
\newcommand{\T}{\ensuremath{T}}
1717
\newcommand{\TF}[1]{\ensuremath{T^F_{#1}}}
18+
\newcommand{\tf}[1]{\ensuremath{t^f_{#1}}}
1819
\newcommand{\TP}{\ensuremath{T^{P}}}
1920

2021
\newcommand{\G}{\ensuremath{G}}

metrics.tex

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,5 @@
11
\section{Heuristics}\label{sec:coalition}
2-
% Coalition building is a crucial process having direct impact on the quality of the analytics results. %Figure~\ref{fig:smet}
3-
% Figure X shows how data lineage is impacted by the processing lineage and in particular by i) the \textit{coalition agreement} $\textit{CA}_C$ (i.e., the CA-driven transformations adopted for a give coalition) and by ii) the transformation produced by the different jobs (job-specific transformation) part of a given coalition \coalition{}.
4-
% Let us consider job $\job{1}^{\org{1}}$ of %Figure~\ref{fig:smet}
5-
% Figure X it receives as input the data \trans{1}(\dataset{1}) based on the dataset obtained by \dataset{1} after the transformation \trans{1} which is associated to the data lineage by our AC model. It then produce a data that is the job-specific transformation on the input data (i.e., \trans{1}(\dataset{1})) generating \dataset{2}.
6-
% We note that our Big Data Analytics pipeline models includes alternatives allowing different processing lineage (linear independent path in the Big data graph G) doing the same analytics but using different jobs (e.g., a lineage including k-means or a lineage using c-means). This will lead to different job-specific transformation on the data for the same Big Data pipeline.
7-
% In this paper, for the sake of simplicity we i) consider different coalitions for each processing lineage, ii) coalitions made of trustworthy organizations \org{i} providing candidate services for each job and iii) job-specific transformation not influenced by the organizations' behavior.
8-
% In this scenario, since any coalition of a given processing lineage will produce the same job-specific data transformation, the analytics pipeline quality is impacted only by the \textit{coalition agreement} $\textit{CA}_C$ or rather by the transformations \trans{i} imposed by the given coalition \coalition{} on the data lineage.
9-
% In the following we first present metrics to evaluate data quality across the data lineage, and then a set of solutions to build coalitions for given Big Data pipeline ensuring a given data quality.
10-
11-
% %\begin{example}\label{ex:p1j}
12-
% %The choice of the specific deployment has an impact on the way in which the coalition \coalition{} of organizations \org{i} is formed as discussed in the following of this section.
13-
% %Let us consider the following example where we have a pipeline made of just one ingestion job that can be offered by service provider $s_1$ or by the service provider $s_2\] In case the $s_1$ is selected the transformation $T_1$ is triggered according to the authorization $s_1$ has on the data, in this example $s_1$ has full control meaning that transformation $t_1$ is empty. In case the $s_2$ is selected the transformation $T_2$ is triggered according to the authorization $s_2$ has on the data and in this example data labelled as PII are removed.
14-
% %\end{example}
15-
% %Considering the two data lineage generated by the two different coalition in Example\ref{ex:p1j} the one involving $s_2$ produce a significant changes to data compared to the other one. This data changes can have direct impact on the quality of the analytics outcomes, therefore our goal is to build coalitions ensuring specific data quality. This coalition building problem can be assimilated to xxx showing an exponential complexity ...
16-
% %In the following we fist introduce our data quality metrics and then our heuristics to solve the problem of coalition building
17-
18-
% %\subsection{Data Quality metrics}
19-
%\subsection{Coalition Heuristics}
2+
203
\subsection{Metrics}\label{sec:metrics}
214

225
Data quality is a largely studied topic for the database management research communities,
@@ -29,7 +12,6 @@ \subsection{Metrics}\label{sec:metrics}
2912

3013

3114
In the following we present a set of metrics to evaluate the quality of the data at each step of the big data pipeline.
32-
We
3315

3416
The proposed metrics can be classified into two categories, namely quantitative and statistical.
3517
Initially, these metrics are applied to the original dataset (X) without any transformations, and subsequently, they are applied to the transformed dataset (Y).
@@ -48,29 +30,13 @@ \subsubsection{Jaccard coefficient}
4830
Unlike other similarity measures, such as Euclidean distance, Jaccard coefficient is not affected by the magnitude of the values in the dataset.
4931
This property makes it suitable for datasets with categorical variables or nominal data, where the values do not have a meaningful numerical interpretation.
5032

51-
\subsubsection{Jaccard coefficent with weights} Let us consider two dataset X and Y of the same size.
52-
The Jaccard coefficent is defined as:\[J(X,Y) = \frac{\sum_{i=1}^{n}w_i(x_i \cap y_i)}{\sum_{i=1}^{n}w_i(x_i \cup y_i)}\]
33+
\subsubsection{Jaccard coefficient with weights} Let us consider two dataset X and Y of the same size.
34+
The Jaccard coefficient is defined as:\[J(X,Y) = \frac{\sum_{i=1}^{n}w_i(x_i \cap y_i)}{\sum_{i=1}^{n}w_i(x_i \cup y_i)}\]
5335
Which is computed by dividing the cardinality of the intersection of two sets by the cardinality of their union, weighted by the weights assigned to the elements in the sets.
5436
Weights allow for the prioritization of certain features or elements in the datasets.
5537
This approach can be particularly useful when some elements in the dataset have more importance or relevance than others.
5638
By assigning weights to the elements, the weighted Jaccard similarity can account for this importance and provide a more accurate measure of similarity.
5739

58-
% \subsubsection{Kullback-Leibler divergence}
59-
% Let us consider two dataset X and Y of the same size.
60-
% The KL divergence is defined as:\[KL(X,Y) = \sum_{i=1}^{n}x_i \log \frac{x_i}{y_i}\]
61-
% Which is computed by taking the sum of the product of each element in the first dataset and the logarithm of the ratio of the same element in the second dataset.
62-
% The KL divergence is a measure of the difference between two probability distributions and is useful for comparing the dissimilarity of two datasets.
63-
64-
65-
% \subsubsection{Kullback-Leibler divergence with weights} Let us consider two dataset X and Y of the same size. The weighted KL divergence is defined as:
66-
67-
% \[KL(X,Y) = \sum_{i=1}^{n}w_i(x_i \log \frac{x_i}{y_i})\]
68-
69-
% The weighted KL divergence is a variant of the KL divergence that incorporates weights to the elements in the datasets being compared.
70-
% It allows for the prioritization of certain features or elements in the datasets.
71-
% This approach can be particularly useful when some elements in the dataset have more importance or relevance than others.
72-
% By assigning weights to the elements, the weighted KL divergence can account for this importance and provide a more accurate measure of dissimilarity.
73-
7440
\subsubsection{Jensen-Shannon Divergence}
7541

7642
Let us consider two datasets X and Y of the same size. The Jensen-Shannon divergence (JSD) is a symmetrized version of the KL divergence and can be used to measure the dissimilarity between the two probability distributions.

policy.tex

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)