Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions httpx/_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,15 @@
)

# The path percent-encode set is the query percent-encode set
# and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
# and U+003F (?), U+0060 (`), U+007B ({), U+007C (|), and U+007D (}).
# We include U+007C (|) in the encode set to align with RFC 3986 and
# Python's stdlib, since | is not a valid pchar and can cause servers
# to issue redirects or reject requests when left unencoded.
PATH_SAFE = "".join(
[
chr(i)
for i in range(0x20, 0x7F)
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7D)
if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7C, 0x7D)
]
)

Expand Down
7 changes: 7 additions & 0 deletions tests/models/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ def test_path_query_fragment(url, raw_path, path, query, fragment):
assert url.fragment == fragment


def test_url_pipe_encoding_in_path():
# The pipe character should be percent-encoded in paths per RFC 3986.
url = httpx.URL("https://example.com/path|segment")
assert url.raw_path == b"/path%7Csegment"
assert url.path == "/path|segment"


def test_url_query_encoding():
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
assert url.raw_path == b"/?a=b%20c&d=e/f"
Expand Down
6 changes: 6 additions & 0 deletions tests/models/test_whatwg.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def test_urlparse(test_case):
# Anyone know what's going on here?
return

# We percent-encode "|" in paths (unlike WHATWG), to align with RFC 3986
# and Python's stdlib. The pipe character is not a valid pchar and some
# servers reject or redirect URLs containing an unencoded "|".
if "|" in test_case.get("pathname", ""):
return

p = urlparse(test_case["href"])

# Test cases include the protocol with the trailing ":"
Expand Down