gpt-academic / multi_language.py
qingxu99's picture
rt
8d52819
raw
history blame
7.02 kB
import os
import functools
import re
import pickle
import time
CACHE_FOLDER = "gpt_log"
if not os.path.exists(CACHE_FOLDER):
os.makedirs(CACHE_FOLDER)
def lru_file_cache(maxsize=128, ttl=None, filename=None):
"""
Decorator that caches a function's return value after being called with given arguments.
It uses a Least Recently Used (LRU) cache strategy to limit the size of the cache.
maxsize: Maximum size of the cache. Defaults to 128.
ttl: Time-to-Live of the cache. If a value hasn't been accessed for `ttl` seconds, it will be evicted from the cache.
filename: Name of the file to store the cache in. If not supplied, the function name + ".cache" will be used.
"""
cache_path = os.path.join(CACHE_FOLDER, f"{filename}.cache") if filename is not None else None
def decorator_function(func):
cache = {}
_cache_info = {
"hits": 0,
"misses": 0,
"maxsize": maxsize,
"currsize": 0,
"ttl": ttl,
"filename": cache_path,
}
@functools.wraps(func)
def wrapper_function(*args, **kwargs):
key = str((args, frozenset(kwargs)))
if key in cache:
if _cache_info["ttl"] is None or (cache[key][1] + _cache_info["ttl"]) >= time.time():
_cache_info["hits"] += 1
print(f'Warning, reading cache, last read {(time.time()-cache[key][1])//60} minutes ago'); time.sleep(2)
cache[key][1] = time.time()
return cache[key][0]
else:
del cache[key]
result = func(*args, **kwargs)
cache[key] = [result, time.time()]
_cache_info["misses"] += 1
_cache_info["currsize"] += 1
if _cache_info["currsize"] > _cache_info["maxsize"]:
oldest_key = None
for k in cache:
if oldest_key is None:
oldest_key = k
elif cache[k][1] < cache[oldest_key][1]:
oldest_key = k
del cache[oldest_key]
_cache_info["currsize"] -= 1
if cache_path is not None:
with open(cache_path, "wb") as f:
pickle.dump(cache, f)
return result
def cache_info():
return _cache_info
wrapper_function.cache_info = cache_info
if cache_path is not None and os.path.exists(cache_path):
with open(cache_path, "rb") as f:
cache = pickle.load(f)
_cache_info["currsize"] = len(cache)
return wrapper_function
return decorator_function
def contains_chinese(string):
"""
Returns True if the given string contains Chinese characters, False otherwise.
"""
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
return chinese_regex.search(string) is not None
def extract_chinese_characters(file_path):
syntax = []
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
import ast
root = ast.parse(content)
for node in ast.walk(root):
if isinstance(node, ast.Name):
if contains_chinese(node.id):
print(node.id)
syntax.append(node)
return syntax
def extract_chinese_characters_from_directory(directory_path):
chinese_characters = []
for root, dirs, files in os.walk(directory_path):
for file in files:
if file.endswith('.py'):
file_path = os.path.join(root, file)
chinese_characters.extend(extract_chinese_characters(file_path))
return chinese_characters
directory_path = './'
chinese_characters = extract_chinese_characters_from_directory(directory_path)
word_to_translate = {}
for d in chinese_characters:
word_to_translate[d['word']] = "TRANS"
def break_dictionary(d, n):
items = list(d.items())
num_dicts = (len(items) + n - 1) // n
return [{k: v for k, v in items[i*n:(i+1)*n]} for i in range(num_dicts)]
N_EACH_REQ = 50
word_to_translate_split = break_dictionary(word_to_translate, N_EACH_REQ)
LANG = "English"
@lru_file_cache(maxsize=10, ttl=1e40, filename="translation_cache")
def trans(words):
# from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
# from toolbox import get_conf, ChatBotWithCookies
# proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
# get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
# llm_kwargs = {
# 'api_key': API_KEY,
# 'llm_model': LLM_MODEL,
# 'top_p':1.0,
# 'max_length': None,
# 'temperature':0.0,
# }
# plugin_kwargs = {}
# chatbot = ChatBotWithCookies(llm_kwargs)
# history = []
# for gpt_say in request_gpt_model_in_new_thread_with_ui_alive(
# inputs=words, inputs_show_user=words,
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
# sys_prompt=f"Translate following words to {LANG}, replace `TRANS` with translated result."
# ):
# gpt_say = gpt_say[1][0][1]
# return gpt_say
return '{}'
translated_result = {}
for d in word_to_translate_split:
res = trans(str(d))
try:
# convert translated result back to python dictionary
res_dict = eval(res)
except:
print('Unexpected output.')
translated_result.update(res_dict)
print('All Chinese characters:', chinese_characters)
# =================== create copy =====================
def copy_source_code():
"""
一键更新协议:备份和下载
"""
from toolbox import get_conf
import shutil
import os
import requests
import zipfile
try: shutil.rmtree(f'./multi-language/{LANG}/')
except: pass
os.makedirs(f'./multi-language', exist_ok=True)
backup_dir = f'./multi-language/{LANG}/'
shutil.copytree('./', backup_dir, ignore=lambda x, y: ['multi-language', 'gpt_log', '.git', 'private_upload'])
copy_source_code()
for d in chinese_characters:
d['file'] = f'./multi-language/{LANG}/' + d['file']
if d['word'] in translated_result:
d['trans'] = translated_result[d['word']]
else:
d['trans'] = None
chinese_characters = sorted(chinese_characters, key=lambda x: len(x['word']), reverse=True)
for d in chinese_characters:
if d['trans'] is None:
continue
with open(d['file'], 'r', encoding='utf-8') as f:
content = f.read()
content.replace(d['word'], d['trans'])
substring = d['trans']
substring_start_index = content.find(substring)
substring_end_index = substring_start_index + len(substring) - 1
if content[substring_start_index].isalpha() or content[substring_start_index].isdigit():
content = content[:substring_start_index+1]