import os
from huggingface_hub import InferenceClient
import gradio as gr
import re
hugging_face_model_path = "cofeg/Finetuned-Xunzi-Qwen2-1.5B-for-ancient-text-generation"
client = InferenceClient(model=hugging_face_model_path, token=os.getenv('HUGGING_FACE_TOKEN'))
def split_and_generate(modern_text):
# Split the input text into sentences for the model is trained on sentence pairs
sentences = re.findall(r'[^。!?]*[。!?]', modern_text)
# If no sentences are found, treat the entire input as one sentence
if not sentences:
sentences = [modern_text]
responses = ""
for sentence in sentences:
input = "现代文:" + sentence + " 古文:"
for token in client.text_generation(input, max_new_tokens=128, stream=True):
if token != "<|endoftext|>":
responses += token
yield responses
demo = gr.Interface(fn=split_and_generate,
inputs=[gr.Textbox(label="现代文", lines=10)],
outputs=[gr.Textbox(label="古文", lines=10)],
title="现代文转古文大模型",
description="请在左边对话框输入你要转换的现代文并点击“Submit”按钮,右边的对话框将显示转换后的古文。首次使用较慢,后面很快。
一个句子不要太长,如果文本很长,可多分几个句子,模型会逐句转化。
详情请访问本项目[GitHub主页](https://github.com/JianXiao2021/ancient_text_generation_LLM)。"
)
demo.launch()