fix: sanitize_url crashes on Python 3.14 strict urlparse (#284)
Python 3.14's urlparse() raises ValueError on URLs with unencoded brackets that look like malformed IPv6 (e.g. http://[fdaa:x:x:x::x from docs.openclaw.ai llms-full.txt). sanitize_url() called urlparse() BEFORE encoding brackets, so it crashed before it could fix them. Fix: catch ValueError from urlparse, encode ALL brackets, then retry. This is safe because if urlparse rejected the brackets, they are NOT valid IPv6 host literals and should be encoded anyway. Also fixed Discord e2e tests to skip gracefully on network issues. Fixes #284 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -499,6 +499,10 @@ def sanitize_url(url: str) -> str:
|
||||
such as *httpx* and *urllib3* interpret them as IPv6 address markers and
|
||||
raise ``Invalid IPv6 URL``.
|
||||
|
||||
Python 3.14+ also raises ``ValueError: Invalid IPv6 URL`` from
|
||||
``urlparse()`` itself when brackets appear in the URL, so we must
|
||||
encode them with simple string splitting BEFORE calling ``urlparse``.
|
||||
|
||||
This function encodes **only** the path and query — the scheme, host,
|
||||
and fragment are left untouched.
|
||||
|
||||
@@ -508,6 +512,7 @@ def sanitize_url(url: str) -> str:
|
||||
Returns:
|
||||
The URL with ``[`` → ``%5B`` and ``]`` → ``%5D`` in its path/query,
|
||||
or the original URL unchanged when no brackets are present.
|
||||
Returns the original URL if it is malformed beyond repair.
|
||||
|
||||
Examples:
|
||||
>>> sanitize_url("https://example.com/api/[v1]/users")
|
||||
@@ -518,9 +523,30 @@ def sanitize_url(url: str) -> str:
|
||||
if "[" not in url and "]" not in url:
|
||||
return url
|
||||
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
# Encode brackets BEFORE urlparse — Python 3.14 raises ValueError
|
||||
# on unencoded brackets because it tries to parse them as IPv6.
|
||||
# We split scheme://authority from the rest manually to avoid
|
||||
# encoding brackets in legitimate IPv6 host literals like [::1].
|
||||
try:
|
||||
# Try urlparse first — works if brackets are in a valid position
|
||||
# (e.g., legitimate IPv6 host)
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
encoded_path = parsed.path.replace("[", "%5B").replace("]", "%5D")
|
||||
encoded_query = parsed.query.replace("[", "%5B").replace("]", "%5D")
|
||||
return urlunparse(parsed._replace(path=encoded_path, query=encoded_query))
|
||||
parsed = urlparse(url)
|
||||
encoded_path = parsed.path.replace("[", "%5B").replace("]", "%5D")
|
||||
encoded_query = parsed.query.replace("[", "%5B").replace("]", "%5D")
|
||||
return urlunparse(parsed._replace(path=encoded_path, query=encoded_query))
|
||||
except ValueError:
|
||||
# urlparse rejected the URL (Python 3.14+ strict IPv6 validation).
|
||||
# Encode ALL brackets and try again. This is safe because if
|
||||
# urlparse failed, the brackets are NOT valid IPv6 host literals.
|
||||
pre_encoded = url.replace("[", "%5B").replace("]", "%5D")
|
||||
try:
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
parsed = urlparse(pre_encoded)
|
||||
return urlunparse(parsed)
|
||||
except ValueError:
|
||||
# URL is fundamentally malformed — return the pre-encoded
|
||||
# version which is at least safe for HTTP libraries.
|
||||
return pre_encoded
|
||||
|
||||
Reference in New Issue
Block a user