Spaces:
Sleeping
Sleeping
from openai import AsyncAssistantEventHandler | |
from openai import AsyncOpenAI | |
import gradio as gr | |
import asyncio | |
import os | |
# set the keys | |
client = AsyncOpenAI( | |
api_key=os.getenv("OPENAI_API_KEY") | |
) | |
assistantID = os.getenv("OPENAI_ASSISTANT_ID") | |
username = os.getenv("YOUR_ID") | |
password = os.getenv("YOUR_PASSWORD") | |
mytitle = "<h1 align=center>RTL AI News Reader : What happened in the country π±πΊ and in the world π ?</h1>" | |
mydescription=""" | |
<h3 align='center'>Which topic interests you : πΆ ππ»ββοΈ π π π π½οΈ π π βοΈ π©Ί </h3> | |
<table width=100%> | |
<tr> | |
<th width=50% bgcolor="Moccasin">Ask your questions in english or another language :</th> | |
<th bgcolor="Khaki">Response from the OpenAI File-Search Assistant :</th> | |
</tr> | |
</table> | |
""" | |
myarticle =""" | |
<h3>Background :</h3> | |
<p>This HuggingFace Space demo was created by <a href="https://github.com/mbarnig">Marco Barnig</a>. As an artificial intelligence, | |
the <a href="https://platform.openai.com/docs/models">OpenAI model</a> gpt-4o-mini-2024-07-18 is used via API, | |
which can utilize up to 128,000 tokens as context, provide an answer to a question with a maximum of 16,384 tokens, | |
and process up to 200,000 tokens per minute (TPM). All english content from RTL.lu from the beginning up to September 2024 has been split into 16 JSON files | |
and uploaded to a Vector Store by the OpenAI File-Search Assistant "RTL English News Reader." | |
Each file contains fewer than 5 million tokens, which is an upper limit for the AI model. It is possible to upload up to 10,000 files to an OpenAI Assistant. | |
The responses of the examples are cached and therefore displayed without delay.</p> | |
""" | |
myinput = gr.Textbox(lines=3, label=" What would you like to know ?") | |
myexamples = [ | |
"What happened in 2014 ?" | |
] | |
class EventHandler(AsyncAssistantEventHandler): | |
def __init__(self) -> None: | |
super().__init__() | |
self.response_text = "" | |
async def on_text_created(self, text) -> None: | |
self.response_text += str(text) | |
async def on_text_delta(self, delta, snapshot): | |
self.response_text += str(delta.value) | |
async def on_text_done(self, text): | |
pass | |
async def on_tool_call_created(self, tool_call): | |
self.response_text += f"\n[Tool Call]: {str(tool_call.type)}\n" | |
async def on_tool_call_delta(self, delta, snapshot): | |
if snapshot.id != getattr(self, "current_tool_call", None): | |
self.current_tool_call = snapshot.id | |
self.response_text += f"\n[Tool Call Delta]: {str(delta.type)}\n" | |
if delta.type == 'code_interpreter': | |
if delta.code_interpreter.input: | |
self.response_text += str(delta.code_interpreter.input) | |
if delta.code_interpreter.outputs: | |
self.response_text += "\n\n[Output]:\n" | |
for output in delta.code_interpreter.outputs: | |
if output.type == "logs": | |
self.response_text += f"\n{str(output.logs)}" | |
async def on_tool_call_done(self, text): | |
pass | |
# Initialize session variables | |
session_data = {"assistant_id": assistantID, "thread_id": None} | |
async def initialize_thread(): | |
# Create a Thread | |
thread = await client.beta.threads.create() | |
# Store thread ID in session_data for later use | |
session_data["thread_id"] = thread.id | |
async def generate_response(user_input): | |
assistant_id = session_data["assistant_id"] | |
thread_id = session_data["thread_id"] | |
# Add a Message to the Thread | |
oai_message = await client.beta.threads.messages.create( | |
thread_id=thread_id, | |
role="user", | |
content=user_input | |
) | |
# Create and Stream a Run | |
event_handler = EventHandler() | |
async with client.beta.threads.runs.stream( | |
thread_id=thread_id, | |
assistant_id=assistant_id, | |
instructions="Please assist the user with their query.", | |
event_handler=event_handler, | |
) as stream: | |
# Yield incremental updates | |
async for _ in stream: | |
await asyncio.sleep(0.1) # Small delay to mimic streaming | |
yield event_handler.response_text | |
# Gradio interface function (generator) | |
async def gradio_chat_interface(user_input): | |
# Create a new event loop if none exists (or if we are in a new thread) | |
try: | |
loop = asyncio.get_running_loop() | |
except RuntimeError: | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
# Initialize the thread if not already done | |
if session_data["thread_id"] is None: | |
await initialize_thread() | |
# Generate and yield responses | |
async for response in generate_response(user_input): | |
yield response | |
# Set up Gradio interface with streaming | |
interface = gr.Interface( | |
fn=gradio_chat_interface, | |
inputs=myinput, | |
outputs="markdown", | |
title=mytitle, | |
description=mydescription, | |
article=myarticle, | |
live=False, | |
allow_flagging="never", | |
examples=myexamples | |
) | |
# Launch the Gradio app | |
interface.launch(auth=(username, password), auth_message="<h1>RTL AI News Reader</h1><p>This HuggingFace Space is a prototype and is not yet accessible to everyone. The project is based on a file search assistant using OpenAI's API and employs the GPT-4o-mini model. You need to use a Chrome browser. AI specialists interested in accessing it can request a username and password by contacting [email protected].</p>") |