Skip to content

Commit

Permalink
validate netloc with port number
Browse files Browse the repository at this point in the history
  • Loading branch information
naz-theori committed Jun 13, 2024
1 parent 0549988 commit 9530f33
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
11 changes: 7 additions & 4 deletions courlan/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,18 @@
}

# https://github.com/python-validators/validators/blob/master/src/validators/domain.py
VALID_DOMAIN = re.compile(
VALID_DOMAIN_PORT = re.compile(
# First character of the domain
r"^(?:[a-zA-Z0-9]"
# Sub domain + hostname
+ r"(?:[a-zA-Z0-9-_]{0,61}[A-Za-z0-9])?\.)"
# First 61 characters of the gTLD
+ r"+[A-Za-z0-9][A-Za-z0-9-_]{0,61}"
# Last character of the gTLD
+ r"[A-Za-z]$",
+ r"[A-Za-z]"
# Port number
+ r"(\:(6553[0-5]|655[0-2][0-9]|65[0-4][0-9]{2}|"
+ r"6[0-4][0-9]{3}|[1-5][0-9]{4}|[1-9][0-9]{0,3}))?$",
re.IGNORECASE,
)

Expand Down Expand Up @@ -151,9 +154,9 @@ def domain_filter(domain: str) -> bool:
return True

# malformed domains
if not VALID_DOMAIN.match(domain):
if not VALID_DOMAIN_PORT.match(domain):
try:
if not VALID_DOMAIN.match(domain.encode("idna").decode("utf-8")):
if not VALID_DOMAIN_PORT.match(domain.encode("idna").decode("utf-8")):
return False
except UnicodeError:
return False
Expand Down
4 changes: 4 additions & 0 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,10 @@ def test_urlcheck():
assert check_url("http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]") is None
assert check_url("http://1:2:3:4:5:6:7:8:9") is None

# port
assert check_url("http://example.com:80") is not None
assert check_url("http://example.com:80:80") is None


def test_domain_filter():
"Test filters related to domain and hostnames."
Expand Down

0 comments on commit 9530f33

Please sign in to comment.