import gradio as gr from huggingface_hub import InferenceClient def generate_response(user_prompt, system_instructions="You are a helpful assistant."): try: client = InferenceClient("microsoft/Phi-3-mini-4k-instruct") generate_kwargs = dict( max_new_tokens=100, do_sample=True, ) formatted_prompt = f"[SYSTEM] {system_instructions}[QUESTION]{user_prompt}[ANSWER]" stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text if output.endswith("<|assistant|>"): output = output[:-13] elif output.endswith(""): output = output[:-4] return output except Exception as e: return str(e) # 使用 Gradio 创建界面 with gr.Blocks() as demo: gr.Markdown("## AI Text Generation") user_input = gr.Textbox(label="Enter your prompt") generate_button = gr.Button("Generate") output_display = gr.Textbox(label="Generated Response") generate_button.click( fn=generate_response, inputs=[user_input], outputs=output_display ) # 启动 Gradio 应用 demo.launch(server_port=7860)