import gradio as gr | |
from huggingface_hub import InferenceClient | |
def generate_response(user_prompt, system_instructions="You are a helpful assistant."): | |
try: | |
client = InferenceClient("microsoft/Phi-3-mini-4k-instruct") | |
generate_kwargs = dict( | |
max_new_tokens=100, | |
do_sample=True, | |
) | |
formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]" | |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, | |
return_full_text=False) | |
output = "" | |
for response in stream: | |
output += response.token.text | |
if output.endswith("<|assistant|>"): | |
output = output[:-13] | |
elif output.endswith("</s>"): | |
output = output[:-4] | |
return output | |
except Exception as e: | |
return str(e) | |
# 使用 Gradio 创建界面 | |
with gr.Blocks() as demo: | |
gr.Markdown("## AI Text Generation") | |
user_input = gr.Textbox(label="Enter your prompt") | |
generate_button = gr.Button("Generate") | |
output_display = gr.Textbox(label="Generated Response") | |
generate_button.click( | |
fn=generate_response, | |
inputs=[user_input], | |
outputs=output_display | |
) | |
# 启动 Gradio 应用 | |
demo.launch(server_port=7860) | |