From fa30f8d8e48aeb4e35f101185953ba5c70e336ec Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Fri, 19 Feb 2021 18:28:04 +0100 Subject: [PATCH] version bump --- HISTORY.md | 5 +++++ README.rst | 10 ++++++++++ courlan/__init__.py | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 4c1aa18..ba39103 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,11 @@ ## History / Changelog +### 0.3.1 + +- improve filter precision + + ### 0.3.0 - reduced dependencies: replace requests with bare urllib3, and tldextract with tld for Python 3.6 upwards diff --git a/README.rst b/README.rst index 86f249f..c1b0664 100644 --- a/README.rst +++ b/README.rst @@ -229,3 +229,13 @@ This effort is part of methods to derive information from web documents in order - Barbaresi, A. "`Efficient construction of metadata-enhanced web corpora `_", Proceedings of the `10th Web as Corpus Workshop (WAC-X) `_, 2016. Contact: see `homepage `_ or `GitHub `_. + + +Similar work +------------ + +These Python libraries perform similar normalization tasks but don't entail language or content filters: + +- `ural `_ +- `urlnorm `_ +- `yarl `_ diff --git a/courlan/__init__.py b/courlan/__init__.py index 9ecad81..7a97396 100644 --- a/courlan/__init__.py +++ b/courlan/__init__.py @@ -8,7 +8,7 @@ __author__ = 'Adrien Barbaresi' __license__ = 'GNU GPL v3+' __copyright__ = 'Copyright 2020-2021, Adrien Barbaresi' -__version__ = '0.3.0' +__version__ = '0.3.1' # imports