Spaces:

featherless-ai
/

try-this-model

Running

App Files Files Community

wxgeorge commited on Sep 27

Commit

a9b1f7f

•

1 Parent(s): 674f62d

:lock: don't accept inference requests for models not on the list

Browse files

This is gradio api hardening. We're not opening inference for larger models.

Files changed (1) hide show

app.py +46 -36

app.py CHANGED Viewed

@@ -16,41 +16,6 @@ client = OpenAI(
     api_key=api_key
 )
-REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
-def respond(message, history, model):
-    history_openai_format = []
-    for human, assistant in history:
-        history_openai_format.append({"role": "user", "content": human })
-        history_openai_format.append({"role": "assistant", "content":assistant})
-    history_openai_format.append({"role": "user", "content": message})
-    if model == "mattshumer/Reflection-Llama-3.1-70B":
-        history_openai_format = [
-            {"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
-            *history_openai_format
-        ]
-    response = client.chat.completions.create(
-        model=model,
-        messages= history_openai_format,
-        temperature=1.0,
-        stream=True,
-        max_tokens=2000,
-        extra_headers={
-            'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
-            'X-Title': "HF's missing inference widget"
-        }
-    )
-    partial_message = ""
-    for chunk in response:
-        if chunk.choices[0].delta.content is not None:
-              content = chunk.choices[0].delta.content
-              escaped_content = html.escape(content)
-              partial_message += escaped_content
-              yield partial_message
 with open('./model-cache.json', 'r') as f_model_cache:
     model_cache = json.load(f_model_cache)
 model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
@@ -95,8 +60,13 @@ def build_model_choices():
     all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
     return all_choices
 model_choices = build_model_choices()
 # let's use a random but different model each day.
 key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
@@ -121,6 +91,46 @@ initial_model = o.choice(model_choices)[1]
 #     o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
 #     return o.choice(model_choices)[1]
 logo = open('./logo.svg').read()
 logo_small = open('./logo-small.svg').read()
 title_text="HuggingFace's missing inference widget"

     api_key=api_key
 )
 with open('./model-cache.json', 'r') as f_model_cache:
     model_cache = json.load(f_model_cache)
 model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
     all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
     return all_choices
 model_choices = build_model_choices()
+def model_in_list(model):
+    for label, id in model_choices:
+        if id == model:
+            return True
+    return False
 # let's use a random but different model each day.
 key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
 #     o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
 #     return o.choice(model_choices)[1]
+REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
+def respond(message, history, model):
+    # insist on that model is in model_choices
+    if not model_in_list(model):
+        raise RuntimeError(f"{model} is not supported in this hf space. Visit https://featherless.ai to see and use the complete model catalogue")
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human })
+        history_openai_format.append({"role": "assistant", "content":assistant})
+    history_openai_format.append({"role": "user", "content": message})
+    if model == "mattshumer/Reflection-Llama-3.1-70B":
+        history_openai_format = [
+            {"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
+            *history_openai_format
+        ]
+    response = client.chat.completions.create(
+        model=model,
+        messages= history_openai_format,
+        temperature=1.0,
+        stream=True,
+        max_tokens=2000,
+        extra_headers={
+            'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
+            'X-Title': "HF's missing inference widget"
+        }
+    )
+    partial_message = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+              content = chunk.choices[0].delta.content
+              escaped_content = html.escape(content)
+              partial_message += escaped_content
+              yield partial_message
 logo = open('./logo.svg').read()
 logo_small = open('./logo-small.svg').read()
 title_text="HuggingFace's missing inference widget"