diff --git a/HISTORY.md b/HISTORY.md index a22fa55..05d0554 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,8 @@ ### 0.1.0 -- first packaged version -- basic URL cleaning and filtering -- sampling by domain name \ No newline at end of file + +- Cleaning and filtering targeting non-spam HTML pages with primarily text +- URL validation +- Sampling by domain name +- Command-line interface (CLI) and Python tool diff --git a/MANIFEST.in b/MANIFEST.in index 622bbd9..32392a3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ include CONTRIBUTING.md HISTORY.md README.rst LICENSE tox.ini -include tests/unit_tests.py \ No newline at end of file +include tests/unit_tests.py +include courlan_harns-march.jpg +graft helpers/ diff --git a/setup.py b/setup.py index d23b287..c1012e8 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def readme(): long_description=readme(), classifiers=[ # As from http://pypi.python.org/pypi?%3Aaction=list_classifiers - 'Development Status :: 2 - Pre-Alpha' + 'Development Status :: 2 - Pre-Alpha', #'Development Status :: 3 - Alpha', #'Development Status :: 4 - Beta', #'Development Status :: 5 - Production/Stable',