diff --git a/CHANGES.rst b/CHANGES.rst index e5dd4813f..d2e92be3f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -105,7 +105,7 @@ Bug fixes --------- - Stopped dropping trailing slashes in :py:meth:`~yarl.URL.joinpath` -- by :user:`gmacon`. (:issue:`862`, :issue:`866`) -- Started accepting string subclasses in ``__truediv__()`` operations (``URL / segment``) -- by :user:`mjpieters`. (:issue:`871`, :issue:`884`) +- Started accepting string subclasses in :meth:`~yarl.URL.__truediv__` operations (``URL / segment``) -- by :user:`mjpieters`. (:issue:`871`, :issue:`884`) - Fixed the human representation of URLs with square brackets in usernames and passwords -- by :user:`mjpieters`. (:issue:`876`, :issue:`882`) - Updated type hints to include ``URL.missing_port()``, ``URL.__bytes__()`` and the ``encoding`` argument to :py:meth:`~yarl.URL.joinpath` @@ -174,7 +174,7 @@ Contributor-facing changes Bugfixes -------- -- Fix regression with ``__truediv__`` and absolute URLs with empty paths causing the raw path to lack the leading ``/``. +- Fix regression with :meth:`~yarl.URL.__truediv__` and absolute URLs with empty paths causing the raw path to lack the leading ``/``. (`#854 `_) @@ -196,7 +196,7 @@ Features -------- - Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements. (`#704 `_) -- Made ``URL.__truediv__()`` return ``NotImplemented`` if called with an +- Made :meth:`URL.__truediv__() ` return ``NotImplemented`` if called with an unsupported type — by :user:`michaeljpeters`. (`#832 `_) diff --git a/CHANGES/1026.bugfix.rst b/CHANGES/1026.bugfix.rst new file mode 100644 index 000000000..eeae24c2e --- /dev/null +++ b/CHANGES/1026.bugfix.rst @@ -0,0 +1,21 @@ +Joining URLs with empty segments has been changed +to match :rfc:`3986`. + +Previously empty segments would be removed from path, +breaking use-cases such as + +.. code-block:: python + + URL("https://web.archive.org/web/") / "https://github.com/" + +Now :meth:`/ operation ` and :meth:`URL.joinpath() ` +keep empty segments, but do not introduce new empty segments. +e.g. + +.. code-block:: python + + URL("https://example.org/") / "" + +does not introduce an empty segment. + +-- by :user:`commonism` and :user:`youtux` diff --git a/CHANGES/1026.doc.rst b/CHANGES/1026.doc.rst new file mode 100644 index 000000000..daabece4a --- /dev/null +++ b/CHANGES/1026.doc.rst @@ -0,0 +1,2 @@ +The pre-existing :meth:`/ magic method ` +has been documented in the API reference -- by :user:`commonism`. diff --git a/docs/api.rst b/docs/api.rst index 5a4c32596..e8e6eb53a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -844,6 +844,20 @@ The path is encoded if needed. .. versionadded:: 1.9 +.. method:: URL.__truediv__(url) + + Shortcut for :meth:`URL.joinpath` with a single element and ``encoded=False``. + + .. doctest:: + + >>> url = URL('http://example.com/path?arg#frag') / 'to' + >>> url + URL('http://example.com/path/to') + >>> url.parts + ('/', 'path', 'to') + + .. versionadded:: 0.9 + .. method:: URL.join(url) Construct a full (“absolute”) URL by combining a “base URL” diff --git a/tests/test_url.py b/tests/test_url.py index 725e9465b..a8f1351b2 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -799,6 +799,9 @@ def test_div_with_dots(): pytest.param( "/path/", ("to",), "http://example.com/path/to", id="path-with-slash" ), + pytest.param( + "/path", ("",), "http://example.com/path/", id="path-add-trailing-slash" + ), pytest.param( "/path?a=1#frag", ("to",), @@ -806,6 +809,15 @@ def test_div_with_dots(): id="cleanup-query-and-fragment", ), pytest.param("", ("path/",), "http://example.com/path/", id="trailing-slash"), + pytest.param( + "", + ( + "path", + "", + ), + "http://example.com/path/", + id="trailing-slash-empty-string", + ), pytest.param( "", ("path/", "to/"), "http://example.com/path/to/", id="duplicate-slash" ), @@ -827,6 +839,39 @@ def test_joinpath(base, to_join, expected): assert str(url.joinpath(*to_join)) == expected +@pytest.mark.parametrize( + "base,to_join,expected", + [ + pytest.param("path", "a", "path/a", id="default_default"), + pytest.param("path", "./a", "path/a", id="default_relative"), + pytest.param("path/", "a", "path/a", id="empty-segment_default"), + pytest.param("path/", "./a", "path/a", id="empty-segment_relative"), + pytest.param("path", ".//a", "path//a", id="default_empty-segment"), + pytest.param("path/", ".//a", "path//a", id="empty-segment_empty_segment"), + pytest.param("path//", "a", "path//a", id="empty-segments_default"), + pytest.param("path//", "./a", "path//a", id="empty-segments_relative"), + pytest.param("path//", ".//a", "path///a", id="empty-segments_empty-segment"), + pytest.param("path", "a/", "path/a/", id="default_trailing-empty-segment"), + pytest.param("path", "a//", "path/a//", id="default_trailing-empty-segments"), + pytest.param("path", "a//b", "path/a//b", id="default_embedded-empty-segment"), + ], +) +def test_joinpath_empty_segments(base, to_join, expected): + url = URL(f"http://example.com/{base}") + assert ( + f"http://example.com/{expected}" == str(url.joinpath(to_join)) + and str(url / to_join) == f"http://example.com/{expected}" + ) + + +def test_joinpath_single_empty_segments(): + """joining standalone empty segments does not create empty segments""" + a = URL("/1//2///3") + assert a.parts == ("/", "1", "", "2", "", "", "3") + b = URL("scheme://host").joinpath(*a.parts[1:]) + assert b.path == "/1/2/3" + + @pytest.mark.parametrize( "url,to_join,expected", [ diff --git a/yarl/_url.py b/yarl/_url.py index 9cca27ef8..84b9519bd 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -713,28 +713,34 @@ def _validate_authority_uri_abs_path(host, path): "Path in a URL with authority should start with a slash ('/') if set" ) - def _make_child(self, segments, encoded=False): - """add segments to self._val.path, accounting for absolute vs relative paths""" - # keep the trailing slash if the last segment ends with / - parsed = [""] if segments and segments[-1][-1:] == "/" else [] - for seg in reversed(segments): - if not seg: - continue - if seg[0] == "/": + def _make_child(self, paths, encoded=False): + """ + add paths to self._val.path, accounting for absolute vs relative paths, + keep existing, but do not create new, empty segments + """ + parsed = [] + for idx, path in enumerate(reversed(paths)): + # empty segment of last is not removed + last = idx == 0 + if path and path[0] == "/": raise ValueError( - f"Appending path {seg!r} starting from slash is forbidden" - ) - seg = seg if encoded else self._PATH_QUOTER(seg) - if "/" in seg: - parsed += ( - sub for sub in reversed(seg.split("/")) if sub and sub != "." + f"Appending path {path!r} starting from slash is forbidden" ) - elif seg != ".": - parsed.append(seg) + path = path if encoded else self._PATH_QUOTER(path) + segments = [ + segment for segment in reversed(path.split("/")) if segment != "." + ] + if not segments: + continue + # remove trailing empty segment for all but the last path + segment_slice_start = int(not last and segments[0] == "") + parsed += segments[segment_slice_start:] parsed.reverse() - old_path = self._val.path - if old_path: - parsed = [*old_path.rstrip("/").split("/"), *parsed] + + if self._val.path and (old_path_segments := self._val.path.split("/")): + old_path_cutoff = -1 if old_path_segments[-1] == "" else None + parsed = [*old_path_segments[:old_path_cutoff], *parsed] + if self.is_absolute(): parsed = _normalize_path_segments(parsed) if parsed and parsed[0] != "":