Spaces:

Steveeeeeeen
/

Open_ASR_Leaderboard

Running

App Files Files Community

Open_ASR_Leaderboard / .venv /lib /python3.10 /site-packages /mdurl /_decode.py

Steveeeeeeen HF staff

Upload folder using huggingface_hub

9f0d781 verified 21 days ago

raw

history blame

3 kB

	from __future__ import annotations

	from collections.abc import Sequence
	import functools
	import re

	DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
	DECODE_COMPONENT_CHARS = ""

	decode_cache: dict[str, list[str]] = {}


	def get_decode_cache(exclude: str) -> Sequence[str]:
	if exclude in decode_cache:
	return decode_cache[exclude]

	cache: list[str] = []
	decode_cache[exclude] = cache

	for i in range(128):
	ch = chr(i)
	cache.append(ch)

	for i in range(len(exclude)):
	ch_code = ord(exclude[i])
	cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]

	return cache


	# Decode percent-encoded string.
	#
	def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
	cache = get_decode_cache(exclude)
	repl_func = functools.partial(repl_func_with_cache, cache=cache)
	return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)


	def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
	seq = match.group()
	result = ""

	i = 0
	l = len(seq) # noqa: E741
	while i < l:
	b1 = int(seq[i + 1 : i + 3], 16)

	if b1 < 0x80:
	result += cache[b1]
	i += 3 # emulate JS for loop statement3
	continue

	if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
	# 110xxxxx 10xxxxxx
	b2 = int(seq[i + 4 : i + 6], 16)

	if (b2 & 0xC0) == 0x80:
	all_bytes = bytes((b1, b2))
	try:
	result += all_bytes.decode()
	except UnicodeDecodeError:
	result += "\ufffd" * 2

	i += 3
	i += 3 # emulate JS for loop statement3
	continue

	if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
	# 1110xxxx 10xxxxxx 10xxxxxx
	b2 = int(seq[i + 4 : i + 6], 16)
	b3 = int(seq[i + 7 : i + 9], 16)

	if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
	all_bytes = bytes((b1, b2, b3))
	try:
	result += all_bytes.decode()
	except UnicodeDecodeError:
	result += "\ufffd" * 3

	i += 6
	i += 3 # emulate JS for loop statement3
	continue

	if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
	# 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
	b2 = int(seq[i + 4 : i + 6], 16)
	b3 = int(seq[i + 7 : i + 9], 16)
	b4 = int(seq[i + 10 : i + 12], 16)

	if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
	all_bytes = bytes((b1, b2, b3, b4))
	try:
	result += all_bytes.decode()
	except UnicodeDecodeError:
	result += "\ufffd" * 4

	i += 9
	i += 3 # emulate JS for loop statement3
	continue

	result += "\ufffd"
	i += 3 # emulate JS for loop statement3

	return result