-
Notifications
You must be signed in to change notification settings - Fork 530
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AI driven WatermarkingTechnique for Safegaurding Text Integrity
- Loading branch information
1 parent
dbb6614
commit c9e172b
Showing
13 changed files
with
105,432 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
100,348 changes: 100,348 additions & 0 deletions
100,348
papers/atharva_rasane/00_myst_template/BERT_WATER_MARKING.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+119 KB
papers/atharva_rasane/00_myst_template/Distribution_of_average_others.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+101 KB
papers/atharva_rasane/00_myst_template/Distribution_of_highest_ratio.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+69.7 KB
papers/atharva_rasane/00_myst_template/Distribution_of_t-statistics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4,001 changes: 4,001 additions & 0 deletions
4,001
papers/atharva_rasane/00_myst_template/Results.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
@book{book01, | ||
author = "William Shakespeare", | ||
year = "1998", | ||
title = "Romeo and Juliet", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/1513}, | ||
} | ||
|
||
@book{book02, | ||
author = "Herman Melville", | ||
year = "2001", | ||
title = "Moby Dick; Or, The Whale", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/2701}, | ||
} | ||
|
||
@book{book03, | ||
author = "Jane Austen", | ||
year = "1998", | ||
title = "Pride and Prejudice", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/1342}, | ||
} | ||
|
||
@book{book04, | ||
author = "Mary Wollstonecraft Shelley", | ||
year = "1993", | ||
title = "Frankenstein; Or, The Modern Prometheus", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/84}, | ||
} | ||
|
||
@book{book05, | ||
author = "George Eliot", | ||
year = "1994", | ||
title = "Middlemarch", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/145}, | ||
} | ||
|
||
@book{book06, | ||
author = "William Shakespeare", | ||
year = "1994", | ||
title = "The Complete Works of William Shakespeare", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/100}, | ||
} | ||
|
||
@book{book07, | ||
author = "E. M. Forster", | ||
year = "2001", | ||
title = "A Room with a View", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/2641}, | ||
} | ||
|
||
@book{book08, | ||
author = "Louisa May Alcott", | ||
year = "2011", | ||
title = "Little Women; Or, Meg, Jo, Beth, and Amy", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/37106}, | ||
} | ||
|
||
@book{book09, | ||
author = "L. M. Montgomery", | ||
year = "2022", | ||
title = "The Blue Castle", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/67979}, | ||
} | ||
|
||
@book{book10, | ||
author = "Elizabeth Von Arnim", | ||
year = "2005", | ||
title = "The Enchanted April", | ||
address = "USA", | ||
url = {https://www.gutenberg.org/ebooks/16389}, | ||
} | ||
|
||
@article{Atr01, | ||
author = "Kamaruddin, Nurul Shamimi and Kamsin, Amirrudin and Por, Lip Yee and Rahman, Hameedur", | ||
year = "2018", | ||
title = "A Review of Text Watermarking: Theory, Methods, and Applications", | ||
journal = "IEEE Access", | ||
volume = 6, | ||
issue = 3, | ||
pages = {}, | ||
doi = {10.1109/ACCESS.2018.2796585}} | ||
} | ||
@article{Atr02, | ||
author = "Lancaster, T", | ||
year = "2023", | ||
title = "Artificial intelligence, text generation tools and ChatGPT - does digital watermarking offer a solution?", | ||
journal = "Int J Educ Integr", | ||
volume = 19, | ||
issue = 10, | ||
pages = {8011-8028}, | ||
doi = {https://doi.org/10.1007/s40979-023-00131-6} | ||
} | ||
|
||
@article{Atr03, | ||
author = "Yichao Wu and Zhengyu Jin and Chenxi Shi and Penghao Liang and Tong Zhan", | ||
year = "2024", | ||
title = "Research on the Application of Deep Learning-based BERT Model in Sentiment Analysis", | ||
journal = "ArXiv", | ||
volume = {abs/2403.08217}, | ||
url = {https://api.semanticscholar.org/CorpusID:268379403} | ||
} | ||
|
||
@article{Atr04, | ||
title = {UniSpaCh: A text-based data hiding method using Unicode space characters}, | ||
journal = {Journal of Systems and Software}, | ||
volume = {85}, | ||
number = {5}, | ||
pages = {1075-1082}, | ||
year = {2012}, | ||
issn = {0164-1212}, | ||
doi = {https://doi.org/10.1016/j.jss.2011.12.023}, | ||
url = {https://www.sciencedirect.com/science/article/pii/S0164121211003177}, | ||
author = {Lip Yee Por and KokSheik Wong and Kok Onn Chee}, | ||
keywords = {UniSpaCh, DASH, Data hiding, Unicode character, Space manipulation}, | ||
abstract = {This paper proposes a text-based data hiding method to insert external information into Microsoft Word document. First, the drawback of low embedding efficiency in the existing text-based data hiding methods is addressed, and a simple attack, DASH, is proposed to reveal the information inserted by the existing text-based data hiding methods. Then, a new data hiding method, UniSpaCh, is proposed to counter DASH. The characteristics of Unicode space characters with respect to embedding efficiency and DASH are analyzed, and the selected Unicode space characters are inserted into inter-sentence, inter-word, end-of-line and inter-paragraph spacings to encode external information while improving embedding efficiency and imperceptivity of the embedded information. UniSpaCh is also reversible where the embedded information can be removed to completely reconstruct the original Microsoft Word document. Experiments were carried out to verify the performance of UniSpaCh as well as comparing it to the existing space-manipulating data hiding methods. Results suggest that UniSpaCh offers higher embedding efficiency while exhibiting higher imperceptivity of white space manipulation when compared to the existing methods considered. In the best case scenario, UniSpaCh produces output document of size almost 9 times smaller than that of the existing method.} | ||
} | ||
|
||
@INPROCEEDINGS{Proc01, | ||
author={Jalil, Zunera and Mirza, Anwar M.}, | ||
booktitle={2009 International Conference on Information and Multimedia Technology}, | ||
title={A Review of Digital Watermarking Techniques for Text Documents}, | ||
year={2009}, | ||
volume={}, | ||
number={}, | ||
pages={230-234}, | ||
keywords={Watermarking;Copyright protection;Internet;Cryptography;Steganography;Computer science;Information security;Intellectual property;Data mining;Law;watermarking;copyright protection;information security;text structure}, | ||
doi={10.1109/ICIMT.2009.11}} | ||
|
||
# These references may be helpful: | ||
@inproceedings{jupyter, | ||
abstract = {It is increasingly necessary for researchers in all fields to write computer code, and in order to reproduce research results, it is important that this code is published. We present Jupyter notebooks, a document format for publishing code, results and explanations in a form that is both readable and executable. We discuss various tools and use cases for notebook documents.}, | ||
author = {Kluyver, Thomas and Ragan-Kelley, Benjamin and Pérez, Fernando and Granger, Brian and Bussonnier, Matthias and Frederic, Jonathan and Kelley, Kyle and Hamrick, Jessica and Grout, Jason and Corlay, Sylvain and Ivanov, Paul and Avila, Damián and Abdalla, Safia and Willing, Carol and {Jupyter development team}}, | ||
editor = {Loizides, Fernando and Scmidt, Birgit}, | ||
location = {Netherlands}, | ||
publisher = {IOS Press}, | ||
url = {https://eprints.soton.ac.uk/403913/}, | ||
booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, | ||
year = {2016}, | ||
pages = {87--90}, | ||
title = {Jupyter Notebooks - a publishing format for reproducible computational workflows}, | ||
} | ||
|
||
@article{matplotlib, | ||
abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting, and publication-quality image generation across user interfaces and operating systems.}, | ||
author = {Hunter, J. D.}, | ||
publisher = {IEEE COMPUTER SOC}, | ||
year = {2007}, | ||
doi = {https://doi.org/10.1109/MCSE.2007.55}, | ||
journal = {Computing in Science \& Engineering}, | ||
number = {3}, | ||
pages = {90--95}, | ||
title = {Matplotlib: A 2D graphics environment}, | ||
volume = {9}, | ||
} | ||
|
||
@article{numpy, | ||
author = {Harris, Charles R. and Millman, K. Jarrod and van der Walt, Stéfan J. and Gommers, Ralf and Virtanen, Pauli and Cournapeau, David and Wieser, Eric and Taylor, Julian and Berg, Sebastian and Smith, Nathaniel J. and Kern, Robert and Picus, Matti and Hoyer, Stephan and van Kerkwijk, Marten H. and Brett, Matthew and Haldane, Allan and del Río, Jaime Fernández and Wiebe, Mark and Peterson, Pearu and Gérard-Marchant, Pierre and Sheppard, Kevin and Reddy, Tyler and Weckesser, Warren and Abbasi, Hameer and Gohlke, Christoph and Oliphant, Travis E.}, | ||
publisher = {Springer Science and Business Media {LLC}}, | ||
doi = {https://doi.org/10.1038/s41586-020-2649-2}, | ||
date = {2020-09}, | ||
year = {2020}, | ||
journal = {Nature}, | ||
number = {7825}, | ||
pages = {357--362}, | ||
title = {Array programming with {NumPy}}, | ||
volume = {585}, | ||
} | ||
|
||
@misc{pandas1, | ||
author = {{The Pandas Development Team}}, | ||
title = {pandas-dev/pandas: Pandas}, | ||
month = feb, | ||
year = {2020}, | ||
publisher = {Zenodo}, | ||
version = {latest}, | ||
url = {https://doi.org/10.5281/zenodo.3509134}, | ||
} | ||
|
||
@inproceedings{pandas2, | ||
author = {Wes McKinney}, | ||
title = {{D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython}, | ||
booktitle = {{P}roceedings of the 9th {P}ython in {S}cience {C}onference}, | ||
pages = {56 - 61}, | ||
year = {2010}, | ||
editor = {{S}t\'efan van der {W}alt and {J}arrod {M}illman}, | ||
doi = {https://doi.org/10.25080/Majora-92bf1922-00a}, | ||
} | ||
|
||
@article{scipy, | ||
author = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and | ||
Haberland, Matt and Reddy, Tyler and Cournapeau, David and | ||
Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and | ||
Bright, Jonathan and {van der Walt}, St{\'e}fan J. and | ||
Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and | ||
Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and | ||
Kern, Robert and Larson, Eric and Carey, C J and | ||
Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and | ||
{VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and | ||
Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and | ||
Harris, Charles R. and Archibald, Anne M. and | ||
Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and | ||
{van Mulbregt}, Paul and {SciPy 1.0 Contributors}}, | ||
title = {{{SciPy} 1.0: Fundamental Algorithms for Scientific | ||
Computing in Python}}, | ||
journal = {Nature Methods}, | ||
year = {2020}, | ||
volume = {17}, | ||
pages = {261--272}, | ||
adsurl = {https://rdcu.be/b08Wh}, | ||
doi = {https://doi.org/10.1038/s41592-019-0686-2}, | ||
} | ||
|
||
@article{sklearn1, | ||
author = {Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | ||
year = {2011}, | ||
journal = {Journal of Machine Learning Research}, | ||
pages = {2825--2830}, | ||
title = {Scikit-learn: Machine Learning in {P}ython}, | ||
volume = {12}, | ||
} | ||
|
||
@inproceedings{sklearn2, | ||
author = {Buitinck, Lars and Louppe, Gilles and Blondel, Mathieu and Pedregosa, Fabian and Mueller, Andreas and Grisel, Olivier and Niculae, Vlad and Prettenhofer, Peter and Gramfort, Alexandre and Grobler, Jaques and Layton, Robert and VanderPlas, Jake and Joly, Arnaud and Holt, Brian and Varoquaux, Gaël}, | ||
booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning}, | ||
year = {2013}, | ||
pages = {108--122}, | ||
title = {{API} design for machine learning software: experiences from the scikit-learn project}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
version: 1 | ||
project: | ||
# Update this to match `scipy-2024-<folder>` the folder should be `<firstname_surname>` | ||
id: scipy-2024-atharva_rasane | ||
# Ensure your title is the same as in your `main.md` | ||
title: AI driven Watermarking Technique for Safeguarding Text Integrity in the Digital Age | ||
subtitle: | ||
# Authors should have affiliations, emails and ORCIDs if available | ||
authors: | ||
- name: Atharva Rasane | ||
email: [email protected] | ||
affiliations: | ||
- KLE Technology University | ||
keywords: | ||
- physical watermark | ||
- logical watermark | ||
- HuggingFace Transformer Library | ||
- BERT | ||
# Add the abbreviations that you use in your paper here | ||
abbreviations: | ||
BERT: Bidirectional Encoder Representations from Transformers | ||
AI: Artificial Intelligence | ||
NLP: Natural Language Processing | ||
NSP: Next Sentence Prediction | ||
GPU: Graphics Processing Units | ||
T-statistics: Test Statistics | ||
P-value: Probability Value | ||
DRM: Digital Rights Management | ||
CMS: Content Management Systems | ||
# It is possible to explicitly ignore the `doi-exists` check for certain citation keys | ||
error_rules: | ||
- rule: doi-exists | ||
severity: ignore | ||
keys: | ||
- book01 | ||
- book02 | ||
- book03 | ||
- book04 | ||
- book05 | ||
- book06 | ||
- book07 | ||
- book08 | ||
- book09 | ||
- book10 | ||
- Atr01 | ||
- Atr02 | ||
- Atr03 | ||
# A banner will be generated for you on publication, this is a placeholder | ||
banner: banner.png | ||
# The rest of the information shouldn't be modified | ||
subject: Research Article | ||
open_access: true | ||
license: CC-BY-4.0 | ||
venue: Scipy 2024 | ||
date: 2024-07-10 | ||
site: | ||
template: article-theme |