Qwen2-Audio-Instruct

Running

App Files Files Community

Qwen2-Audio-Instruct / app.py

K00B404

Update app.py

34a0b79 verified about 1 month ago

raw

history blame contribute delete

5.6 kB

	import gradio as gr
	import modelscope_studio as mgr
	from http import HTTPStatus
	import os
	from dashscope import MultiModalConversation
	import dashscope
	YOUR_API_TOKEN = os.getenv('HF_TOKEN')
	dashscope.api_key = YOUR_API_TOKEN

	def add_text(chatbot, task_history, input):
	text_content = input.text
	content = []
	if len (input.files) > 0:
	for i in input.files:
	content.append({'audio': i.path})
	if text_content:
	content.append({'text': text_content})
	task_history.append({"role": "user", "content": content})

	chatbot.append([{
	"text": input.text,
	"files": input.files,
	}, None])
	return chatbot, task_history, None


	# def add_mic(chatbot, task_history, mic):
	# """Add audio to the chat history."""
	# task_history.append({"role": "user", "content": [{"audio": mic}]})
	# chatbot.append((f"[Audio input: {mic}]", None))
	# return chatbot, task_history

	def add_file(chatbot, task_history, audio_file):
	"""Add audio file to the chat history."""
	task_history.append({"role": "user", "content": [{"audio": audio_file.name}]})
	chatbot.append((f"[Audio file: {audio_file.name}]", None))
	return chatbot, task_history


	def reset_user_input():
	"""Reset the user input field."""
	return gr.Textbox.update(value='')

	def reset_state(task_history):
	"""Reset the chat history."""
	return [], []

	def regenerate(chatbot, task_history):
	"""Regenerate the last bot response."""
	if task_history and task_history[-1]['role'] == 'assistant':
	task_history.pop()
	chatbot.pop()
	if task_history:
	chatbot, task_history = predict(chatbot, task_history)
	return chatbot, task_history


	def predict(chatbot, task_history):
	"""Generate a response from the model."""
	response = MultiModalConversation.call(model='qwen2-audio-instruct',
	messages=task_history)
	if response.status_code == HTTPStatus.OK:
	output_text = response.output.choices[0].message.content
	if isinstance(output_text, list):
	output_text = next((item.get('text') for item in output_text if 'text' in item), '')
	elif isinstance(output_text, dict):
	output_text = output_text.get('text', '')
	task_history.append({'role': response.output.choices[0].message.role,
	'content': [{'text': output_text}]})
	chatbot.append((None, output_text)) # Add the response to chatbot
	return chatbot, task_history
	else:
	error_message = f"Failed to get a response: {response.code} - {response.message}"
	chatbot.append((None, error_message)) # Add the error message to chatbot
	return chatbot, task_history



	with gr.Blocks() as demo:
	gr.Markdown("""<p align="center"><img src="https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png" style="height: 80px"/><p>""") ## todo
	gr.Markdown("""<center><font size=8>Qwen2-Audio-Instruct Bot</center>""")
	gr.Markdown(
	"""\
	<center><font size=3>This WebUI is based on Qwen2-Audio-Instruct, developed by Alibaba Cloud. \
	(本WebUI基于Qwen2-Audio-Instruct打造，实现聊天机器人功能。)</center>""")
	gr.Markdown("""\
	<center><font size=4>Qwen2-Audio <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B">🤖 </a>
	\| <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B">🤗</a>&nbsp ｜
	Qwen2-Audio-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct">🤖 </a> \|
	<a href="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct">🤗</a>&nbsp ｜
	&nbsp<a href="https://github.com/QwenLM/Qwen2-Audio">Github</a></center>""")
	chatbot = mgr.Chatbot(label='Qwen2-Audio-7B-Instruct', elem_classes="control-height", height=750)
	# query = gr.Textbox(lines=2, label='Input')
	# mic = gr.Audio(source="microphone", type="filepath")
	user_input = mgr.MultimodalInput(
	interactive=True,
	sources=['microphone', 'upload'],
	submit_button_props=dict(value="🚀 Submit (发送)"),
	upload_button_props=dict(value="📁 Upload (上传文件)", show_progress=True),
	)
	task_history = gr.State([])

	with gr.Row():
	empty_bin = gr.Button("🧹 Clear History (清除历史)")
	# submit_btn = gr.Button("🚀 Submit (发送)")
	regen_btn = gr.Button("🤔️ Regenerate (重试)")
	# addfile_btn = gr.UploadButton("📁 Upload (上传文件)", file_types=["audio"])

	# mic.change(add_mic, [chatbot, task_history, mic], [chatbot, task_history])
	# submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
	# predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
	# )

	# submit_btn.click(reset_user_input, [], [query])
	user_input.submit(fn=add_text,
	inputs=[chatbot, task_history, user_input],
	outputs=[chatbot, task_history, user_input],concurrency_limit = 40).then(
	predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
	)
	empty_bin.click(reset_state, outputs=[chatbot, task_history], show_progress=True,concurrency_limit = 40)
	regen_btn.click(regenerate, [chatbot, task_history], [chatbot, task_history], show_progress=True,concurrency_limit = 40)
	# addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)

	demo.queue().launch(
	share=False,
	inbrowser=True,
	server_port=7860,
	server_name="0.0.0.0",
	max_threads=40
	)