Spaces:
Runtime error
Runtime error
File size: 2,903 Bytes
45d60f0 fd397ae 43a032b fd397ae 45d60f0 fd397ae 45d60f0 fd397ae af0f40b fd397ae ccfa62c fd397ae 1e4339b fd397ae 45d60f0 fd397ae 45d60f0 fd397ae 45d60f0 ef90a85 45d60f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from openai import OpenAI, APIError
client = OpenAI(
base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key="hf_XXXXX"
)
DESCRIPTION = """
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants.
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page.
"""
LICENSE = """
This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)"""
@spaces.GPU
def generate(
message: str,
chat_history: list[tuple[str, str]],
) -> Iterator[str]:
conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}]
for user, assistant in chat_history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
try:
response = client.chat.completions.create(
model="tgi",
messages=conversation,
stop=["<|end_of_text|>", "<|eot_id|>"],
stream=True,
temperature=0.1,
max_tokens=1024,
)
outputs = []
for chunk in response:
outputs.append(chunk.choices[0].delta.content)
yield "".join(outputs)
except APIError as e:
# Handle API errors or network errors here
print(f"Error: {e}")
yield "API error. The model is currently unavailable/down. Please try again later."
demo = gr.ChatInterface(
fn=generate,
# additional_inputs=[
# gr.Textbox(label="System prompt", lines=6),
# gr.Slider(
# label="Max new tokens",
# minimum=1,
# maximum=MAX_MAX_NEW_TOKENS,
# step=1,
# value=DEFAULT_MAX_NEW_TOKENS,
# ),
# ],
stop_btn=None,
examples=[
["Hello there! How are you doing?"],
["Can you explain briefly to me what is the Python programming language?"],
["Explain the potential role of Conversational AIs in customer support."],
["How many hours does it take a man to eat a Helicopter?"],
["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
],
)
# with gr.Blocks() as demo:
# # gr.Markdown(DESCRIPTION)
# # gr.Markdown(LICENSE)
# # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
# chat_interface.render()
if __name__ == "__main__":
demo.queue(max_size=4).launch()
|