apin / web.py_v2
sanbo
update sth. at 2024-11-15 14:22:58
f1b8715
raw
history blame
1.36 kB
import gradio as gr
from huggingface_hub import InferenceClient
def generate_response(user_prompt, system_instructions="You are a helpful assistant."):
try:
client = InferenceClient("microsoft/Phi-3-mini-4k-instruct")
generate_kwargs = dict(
max_new_tokens=100,
do_sample=True,
)
formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]"
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True,
return_full_text=False)
output = ""
for response in stream:
output += response.token.text
if output.endswith("<|assistant|>"):
output = output[:-13]
elif output.endswith("</s>"):
output = output[:-4]
return output
except Exception as e:
return str(e)
# 使用 Gradio 创建界面
with gr.Blocks() as demo:
gr.Markdown("## AI Text Generation")
user_input = gr.Textbox(label="Enter your prompt")
generate_button = gr.Button("Generate")
output_display = gr.Textbox(label="Generated Response")
generate_button.click(
fn=generate_response,
inputs=[user_input],
outputs=output_display
)
# 启动 Gradio 应用
demo.launch(server_port=7860)