From aa2af00a7b44edad8b43a082fc0eafd19c1e7e96 Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Thu, 27 Aug 2020 19:41:19 +0200 Subject: [PATCH] first version of the package --- HISTORY.md | 8 +++++--- MANIFEST.in | 4 +++- setup.py | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index a22fa55..05d0554 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,8 @@ ### 0.1.0 -- first packaged version -- basic URL cleaning and filtering -- sampling by domain name \ No newline at end of file + +- Cleaning and filtering targeting non-spam HTML pages with primarily text +- URL validation +- Sampling by domain name +- Command-line interface (CLI) and Python tool diff --git a/MANIFEST.in b/MANIFEST.in index 622bbd9..32392a3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ include CONTRIBUTING.md HISTORY.md README.rst LICENSE tox.ini -include tests/unit_tests.py \ No newline at end of file +include tests/unit_tests.py +include courlan_harns-march.jpg +graft helpers/ diff --git a/setup.py b/setup.py index d23b287..c1012e8 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def readme(): long_description=readme(), classifiers=[ # As from http://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 2 - Pre-Alpha' + 'Development Status :: 2 - Pre-Alpha', #'Development Status :: 3 - Alpha', #'Development Status :: 4 - Beta', #'Development Status :: 5 - Production/Stable',