from openai import OpenAI import gradio as gr import os import json import functools api_key = os.environ.get('FEATHERLESS_API_KEY') client = OpenAI( base_url="https://api.featherless.ai/v1", api_key=api_key ) def respond(message, history, model): history_openai_format = [] for human, assistant in history: history_openai_format.append({"role": "user", "content": human }) history_openai_format.append({"role": "assistant", "content":assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model=model, messages= history_openai_format, temperature=1.0, stream=True, max_tokens=2000 ) partial_message = "" for chunk in response: if chunk.choices[0].delta.content is not None: partial_message = partial_message + chunk.choices[0].delta.content yield partial_message logo = open('./logo.svg').read() with open('./model-cache.json', 'r') as f_model_cache: model_cache = json.load(f_model_cache) def build_model_choices(): all_choices = [] for model_class in model_cache: if model_class in ['llama3-70b-8k', 'qwen2-72b-lc']: continue all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ] return all_choices model_choices = build_model_choices() def initial_model(referer=None): print(f"initial_model({referer})") if referer == 'http://127.0.0.1:7860/': return 'Sao10K/L3-70B-Euryale-v2.1' if referer and referer.startswith("https://huggingface.co/"): possible_model = referer[23:] full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), []) model_is_supported = possible_model in full_model_list if model_is_supported: return possible_model return 'anakin87/yo-Llama-3-8B-Instruct' title_text="HuggingFace's missing inference widget" with gr.Blocks(title_text, css='.logo-mark { fill: #ffe184; }') as demo: gr.HTML("""
Test any <=15B LLM from the hub.
Inference by {logo}
""") def update_initial_model_choice(request: gr.Request): return initial_model(request.headers.get('referer')) demo.load(update_initial_model_choice, outputs=model_selector) demo.launch()