Spaces:
Running
Running
:lock: don't accept inference requests for models not on the list
Browse filesThis is gradio api hardening. We're not opening inference for larger models.
app.py
CHANGED
@@ -16,41 +16,6 @@ client = OpenAI(
|
|
16 |
api_key=api_key
|
17 |
)
|
18 |
|
19 |
-
REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
|
20 |
-
|
21 |
-
def respond(message, history, model):
|
22 |
-
history_openai_format = []
|
23 |
-
for human, assistant in history:
|
24 |
-
history_openai_format.append({"role": "user", "content": human })
|
25 |
-
history_openai_format.append({"role": "assistant", "content":assistant})
|
26 |
-
history_openai_format.append({"role": "user", "content": message})
|
27 |
-
|
28 |
-
if model == "mattshumer/Reflection-Llama-3.1-70B":
|
29 |
-
history_openai_format = [
|
30 |
-
{"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
|
31 |
-
*history_openai_format
|
32 |
-
]
|
33 |
-
|
34 |
-
response = client.chat.completions.create(
|
35 |
-
model=model,
|
36 |
-
messages= history_openai_format,
|
37 |
-
temperature=1.0,
|
38 |
-
stream=True,
|
39 |
-
max_tokens=2000,
|
40 |
-
extra_headers={
|
41 |
-
'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
|
42 |
-
'X-Title': "HF's missing inference widget"
|
43 |
-
}
|
44 |
-
)
|
45 |
-
|
46 |
-
partial_message = ""
|
47 |
-
for chunk in response:
|
48 |
-
if chunk.choices[0].delta.content is not None:
|
49 |
-
content = chunk.choices[0].delta.content
|
50 |
-
escaped_content = html.escape(content)
|
51 |
-
partial_message += escaped_content
|
52 |
-
yield partial_message
|
53 |
-
|
54 |
with open('./model-cache.json', 'r') as f_model_cache:
|
55 |
model_cache = json.load(f_model_cache)
|
56 |
model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
|
@@ -95,8 +60,13 @@ def build_model_choices():
|
|
95 |
all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
|
96 |
|
97 |
return all_choices
|
98 |
-
|
99 |
model_choices = build_model_choices()
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
# let's use a random but different model each day.
|
102 |
key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
|
@@ -121,6 +91,46 @@ initial_model = o.choice(model_choices)[1]
|
|
121 |
# o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
|
122 |
# return o.choice(model_choices)[1]
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
logo = open('./logo.svg').read()
|
125 |
logo_small = open('./logo-small.svg').read()
|
126 |
title_text="HuggingFace's missing inference widget"
|
|
|
16 |
api_key=api_key
|
17 |
)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
with open('./model-cache.json', 'r') as f_model_cache:
|
20 |
model_cache = json.load(f_model_cache)
|
21 |
model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
|
|
|
60 |
all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
|
61 |
|
62 |
return all_choices
|
|
|
63 |
model_choices = build_model_choices()
|
64 |
+
def model_in_list(model):
|
65 |
+
for label, id in model_choices:
|
66 |
+
if id == model:
|
67 |
+
return True
|
68 |
+
|
69 |
+
return False
|
70 |
|
71 |
# let's use a random but different model each day.
|
72 |
key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
|
|
|
91 |
# o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
|
92 |
# return o.choice(model_choices)[1]
|
93 |
|
94 |
+
|
95 |
+
REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
|
96 |
+
|
97 |
+
def respond(message, history, model):
|
98 |
+
# insist on that model is in model_choices
|
99 |
+
if not model_in_list(model):
|
100 |
+
raise RuntimeError(f"{model} is not supported in this hf space. Visit https://featherless.ai to see and use the complete model catalogue")
|
101 |
+
|
102 |
+
history_openai_format = []
|
103 |
+
for human, assistant in history:
|
104 |
+
history_openai_format.append({"role": "user", "content": human })
|
105 |
+
history_openai_format.append({"role": "assistant", "content":assistant})
|
106 |
+
history_openai_format.append({"role": "user", "content": message})
|
107 |
+
|
108 |
+
if model == "mattshumer/Reflection-Llama-3.1-70B":
|
109 |
+
history_openai_format = [
|
110 |
+
{"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
|
111 |
+
*history_openai_format
|
112 |
+
]
|
113 |
+
|
114 |
+
response = client.chat.completions.create(
|
115 |
+
model=model,
|
116 |
+
messages= history_openai_format,
|
117 |
+
temperature=1.0,
|
118 |
+
stream=True,
|
119 |
+
max_tokens=2000,
|
120 |
+
extra_headers={
|
121 |
+
'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
|
122 |
+
'X-Title': "HF's missing inference widget"
|
123 |
+
}
|
124 |
+
)
|
125 |
+
|
126 |
+
partial_message = ""
|
127 |
+
for chunk in response:
|
128 |
+
if chunk.choices[0].delta.content is not None:
|
129 |
+
content = chunk.choices[0].delta.content
|
130 |
+
escaped_content = html.escape(content)
|
131 |
+
partial_message += escaped_content
|
132 |
+
yield partial_message
|
133 |
+
|
134 |
logo = open('./logo.svg').read()
|
135 |
logo_small = open('./logo-small.svg').read()
|
136 |
title_text="HuggingFace's missing inference widget"
|