|
from __future__ import annotations |
|
|
|
from collections.abc import Sequence |
|
from string import ascii_letters, digits, hexdigits |
|
from urllib.parse import quote as encode_uri_component |
|
|
|
ASCII_LETTERS_AND_DIGITS = ascii_letters + digits |
|
|
|
ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" |
|
ENCODE_COMPONENT_CHARS = "-_.!~*'()" |
|
|
|
encode_cache: dict[str, list[str]] = {} |
|
|
|
|
|
|
|
|
|
def get_encode_cache(exclude: str) -> Sequence[str]: |
|
if exclude in encode_cache: |
|
return encode_cache[exclude] |
|
|
|
cache: list[str] = [] |
|
encode_cache[exclude] = cache |
|
|
|
for i in range(128): |
|
ch = chr(i) |
|
|
|
if ch in ASCII_LETTERS_AND_DIGITS: |
|
|
|
cache.append(ch) |
|
else: |
|
cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) |
|
|
|
for i in range(len(exclude)): |
|
cache[ord(exclude[i])] = exclude[i] |
|
|
|
return cache |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def encode( |
|
string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True |
|
) -> str: |
|
result = "" |
|
|
|
cache = get_encode_cache(exclude) |
|
|
|
l = len(string) |
|
i = 0 |
|
while i < l: |
|
code = ord(string[i]) |
|
|
|
|
|
if keep_escaped and code == 0x25 and i + 2 < l: |
|
if all(c in hexdigits for c in string[i + 1 : i + 3]): |
|
result += string[i : i + 3] |
|
i += 2 |
|
i += 1 |
|
continue |
|
|
|
if code < 128: |
|
result += cache[code] |
|
i += 1 |
|
continue |
|
|
|
if code >= 0xD800 and code <= 0xDFFF: |
|
if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: |
|
next_code = ord(string[i + 1]) |
|
if next_code >= 0xDC00 and next_code <= 0xDFFF: |
|
result += encode_uri_component(string[i] + string[i + 1]) |
|
i += 1 |
|
i += 1 |
|
continue |
|
result += "%EF%BF%BD" |
|
i += 1 |
|
continue |
|
|
|
result += encode_uri_component(string[i]) |
|
i += 1 |
|
|
|
return result |
|
|