import os from huggingface_hub import InferenceClient import gradio as gr import re hugging_face_model_path = "cofeg/Finetuned-Xunzi-Qwen2-1.5B-for-ancient-text-generation" client = InferenceClient(model=hugging_face_model_path, token=os.getenv('HUGGING_FACE_TOKEN')) def split_and_generate(modern_text): # Split the input text into sentences for the model is trained on sentence pairs sentences = re.findall(r'[^。!?]*[。!?]', modern_text) # If no sentences are found, treat the entire input as one sentence if not sentences: sentences = [modern_text] responses = "" for sentence in sentences: input = "现代文:" + sentence + " 古文:" for token in client.text_generation(input, max_new_tokens=128, stream=True): if token != "<|endoftext|>": responses += token yield responses demo = gr.Interface(fn=split_and_generate, inputs=[gr.Textbox(label="现代文", lines=10)], outputs=[gr.Textbox(label="古文", lines=10)], title="现代文转古文大模型", description="请在左边对话框输入你要转换的现代文并点击“Submit”按钮,右边的对话框将显示转换后的古文。首次使用较慢,后面很快。
一个句子不要太长,如果文本很长,可多分几个句子,模型会逐句转化。
详情请访问本项目[GitHub主页](https://github.com/JianXiao2021/ancient_text_generation_LLM)。" ) demo.launch()