From 1bdeaf65c328dce2ec6396b9af6ec621ae6954fe Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Sat, 14 Feb 2026 01:05:53 +0530 Subject: [PATCH] percent-encode pipe character in URL paths the | character was not being percent-encoded in path segments, even though it's not a valid pchar per RFC 3986. this caused issues with servers that reject or redirect URLs with unencoded pipes. now | gets encoded as %7C in paths, matching the behavior of Python's urllib and the requests library. --- httpx/_urlparse.py | 7 +++++-- tests/models/test_url.py | 7 +++++++ tests/models/test_whatwg.py | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index bf190fd560..9fadacd8de 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -51,12 +51,15 @@ ) # The path percent-encode set is the query percent-encode set -# and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}). +# and U+003F (?), U+0060 (`), U+007B ({), U+007C (|), and U+007D (}). +# We include U+007C (|) in the encode set to align with RFC 3986 and +# Python's stdlib, since | is not a valid pchar and can cause servers +# to issue redirects or reject requests when left unencoded. PATH_SAFE = "".join( [ chr(i) for i in range(0x20, 0x7F) - if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7D) + if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7C, 0x7D) ] ) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 03072e8f5c..0877b4a5c5 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -140,6 +140,13 @@ def test_path_query_fragment(url, raw_path, path, query, fragment): assert url.fragment == fragment +def test_url_pipe_encoding_in_path(): + # The pipe character should be percent-encoded in paths per RFC 3986. + url = httpx.URL("https://example.com/path|segment") + assert url.raw_path == b"/path%7Csegment" + assert url.path == "/path|segment" + + def test_url_query_encoding(): url = httpx.URL("https://www.example.com/?a=b c&d=e/f") assert url.raw_path == b"/?a=b%20c&d=e/f" diff --git a/tests/models/test_whatwg.py b/tests/models/test_whatwg.py index 14af682586..22d113a0e8 100644 --- a/tests/models/test_whatwg.py +++ b/tests/models/test_whatwg.py @@ -27,6 +27,12 @@ def test_urlparse(test_case): # Anyone know what's going on here? return + # We percent-encode "|" in paths (unlike WHATWG), to align with RFC 3986 + # and Python's stdlib. The pipe character is not a valid pchar and some + # servers reject or redirect URLs containing an unencoded "|". + if "|" in test_case.get("pathname", ""): + return + p = urlparse(test_case["href"]) # Test cases include the protocol with the trailing ":"