Migrate ExtractResult from namedtuple to dataclass (#306)

john-kurkowski · web-flow · commit e739ff873354 · 2023-10-11T01:06:32.000-07:00
diff --git a/README.md b/README.md
@@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
 ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
 ```
 
-`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
-
-```python
->>> ext = tldextract.extract('http://forums.bbc.co.uk')
->>> (ext.subdomain, ext.domain, ext.suffix)
-('forums', 'bbc', 'co.uk')
->>> # rejoin subdomain and domain
->>> '.'.join(ext[:2])
-'forums.bbc'
->>> # a common alias
->>> ext.registered_domain
-'bbc.co.uk'
-```
-
 Note subdomain and suffix are _optional_. Not all URL-like inputs have a
 subdomain or a valid suffix.
 
@@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
 ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
 ```
 
-If you want to rejoin the whole namedtuple, regardless of whether a subdomain
-or suffix were found:
+To rejoin the original hostname, if it was indeed a valid, registered hostname:
 
 ```python
->>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
->>> # this has unwanted dots
->>> '.'.join(ext[:3])
-'.127.0.0.1.'
->>> # join each part only if it's truthy
->>> '.'.join(part for part in ext[:3] if part)
-'127.0.0.1'
+>>> ext = tldextract.extract('http://forums.bbc.co.uk')
+>>> ext.registered_domain
+'bbc.co.uk'
+>>> ext.fqdn
+'forums.bbc.co.uk'
 ```
 
 By default, this package supports the public ICANN TLDs and their exceptions.
diff --git a/tests/custom_suffix_test.py b/tests/custom_suffix_test.py
@@ -4,6 +4,7 @@
 import tempfile
 
 import tldextract
+from tldextract.tldextract import ExtractResult
 
 FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
     os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
@@ -27,8 +28,8 @@ def test_private_extraction() -> None:
     """Test this library's uncached, offline, private domain extraction."""
     tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
 
-    assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False)
-    assert tld("foo.blogspot.com", include_psl_private_domains=True) == (
+    assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
+    assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
         "",
         "foo",
         "blogspot.com",
diff --git a/tests/main_test.py b/tests/main_test.py
@@ -412,20 +412,6 @@ def test_ipv4_lookalike() -> None:
     )
 
 
-def test_result_as_dict() -> None:
-    """Test that the result is a namedtuple."""
-    result = extract(
-        "http://admin:password1@www.google.com:666/secret/admin/interface?param1=42"
-    )
-    expected_dict = {
-        "subdomain": "www",
-        "domain": "google",
-        "suffix": "com",
-        "is_private": False,
-    }
-    assert result._asdict() == expected_dict
-
-
 def test_cache_permission(
     mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
 ) -> None:
diff --git a/tldextract/cli.py b/tldextract/cli.py
@@ -88,5 +88,5 @@ def main() -> None:
         sys.exit(1)
 
     for i in args.input:
-        subdomain, domain, suffix, _ = tld_extract(i)
-        print(f"{subdomain} {domain} {suffix}")
+        ext = tld_extract(i)
+        print(f"{ext.subdomain} {ext.domain} {ext.suffix}")
diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py
@@ -13,18 +13,6 @@
     >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
     ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
 
-`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
-
-    >>> ext = tldextract.extract('http://forums.bbc.co.uk')
-    >>> (ext.subdomain, ext.domain, ext.suffix)
-    ('forums', 'bbc', 'co.uk')
-    >>> # rejoin subdomain and domain
-    >>> '.'.join(ext[:2])
-    'forums.bbc'
-    >>> # a common alias
-    >>> ext.registered_domain
-    'bbc.co.uk'
-
 Note subdomain and suffix are _optional_. Not all URL-like inputs have a
 subdomain or a valid suffix.
 
@@ -37,16 +25,13 @@
     >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
     ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
 
-If you want to rejoin the whole namedtuple, regardless of whether a subdomain
-or suffix were found:
+To rejoin the original hostname, if it was indeed a valid, registered hostname:
 
-    >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
-    >>> # this has unwanted dots
-    >>> '.'.join(part for part in ext[:3])
-    '.127.0.0.1.'
-    >>> # join part only if truthy
-    >>> '.'.join(part for part in ext[:3] if part)
-    '127.0.0.1'
+    >>> ext = tldextract.extract('http://forums.bbc.co.uk')
+    >>> ext.registered_domain
+    'bbc.co.uk'
+    >>> ext.fqdn
+    'forums.bbc.co.uk'
 """
 
 from __future__ import annotations
@@ -55,10 +40,8 @@
 import os
 import urllib.parse
 from collections.abc import Collection, Sequence
+from dataclasses import dataclass
 from functools import wraps
-from typing import (
-    NamedTuple,
-)
 
 import idna
 
@@ -77,13 +60,17 @@
 )
 
 
-class ExtractResult(NamedTuple):
-    """namedtuple of a URL's subdomain, domain, suffix, and flag that indicates if URL has private suffix."""
+@dataclass(order=True)
+class ExtractResult:
+    """A URL's extracted subdomain, domain, and suffix.
+
+    Also contains metadata, like a flag that indicates if the URL has a private suffix.
+    """
 
     subdomain: str
     domain: str
     suffix: str
-    is_private: bool = False
+    is_private: bool
 
     @property
     def registered_domain(self) -> str:
@@ -110,7 +97,7 @@ def fqdn(self) -> str:
         ''
         """
         if self.suffix and (self.domain or self.is_private):
-            return ".".join(i for i in self[:3] if i)
+            return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i)
         return ""
 
     @property
@@ -291,7 +278,7 @@ def _extract_netloc(
             and netloc_with_ascii_dots[-1] == "]"
         ):
             if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
-                return ExtractResult("", netloc_with_ascii_dots, "")
+                return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
 
         labels = netloc_with_ascii_dots.split(".")