wxgeorge commited on
Commit
a9b1f7f
1 Parent(s): 674f62d

:lock: don't accept inference requests for models not on the list

Browse files

This is gradio api hardening. We're not opening inference for larger models.

Files changed (1) hide show
  1. app.py +46 -36
app.py CHANGED
@@ -16,41 +16,6 @@ client = OpenAI(
16
  api_key=api_key
17
  )
18
 
19
- REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
20
-
21
- def respond(message, history, model):
22
- history_openai_format = []
23
- for human, assistant in history:
24
- history_openai_format.append({"role": "user", "content": human })
25
- history_openai_format.append({"role": "assistant", "content":assistant})
26
- history_openai_format.append({"role": "user", "content": message})
27
-
28
- if model == "mattshumer/Reflection-Llama-3.1-70B":
29
- history_openai_format = [
30
- {"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
31
- *history_openai_format
32
- ]
33
-
34
- response = client.chat.completions.create(
35
- model=model,
36
- messages= history_openai_format,
37
- temperature=1.0,
38
- stream=True,
39
- max_tokens=2000,
40
- extra_headers={
41
- 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
42
- 'X-Title': "HF's missing inference widget"
43
- }
44
- )
45
-
46
- partial_message = ""
47
- for chunk in response:
48
- if chunk.choices[0].delta.content is not None:
49
- content = chunk.choices[0].delta.content
50
- escaped_content = html.escape(content)
51
- partial_message += escaped_content
52
- yield partial_message
53
-
54
  with open('./model-cache.json', 'r') as f_model_cache:
55
  model_cache = json.load(f_model_cache)
56
  model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
@@ -95,8 +60,13 @@ def build_model_choices():
95
  all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
96
 
97
  return all_choices
98
-
99
  model_choices = build_model_choices()
 
 
 
 
 
 
100
 
101
  # let's use a random but different model each day.
102
  key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
@@ -121,6 +91,46 @@ initial_model = o.choice(model_choices)[1]
121
  # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
122
  # return o.choice(model_choices)[1]
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  logo = open('./logo.svg').read()
125
  logo_small = open('./logo-small.svg').read()
126
  title_text="HuggingFace's missing inference widget"
 
16
  api_key=api_key
17
  )
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  with open('./model-cache.json', 'r') as f_model_cache:
20
  model_cache = json.load(f_model_cache)
21
  model_class_from_model_id = { model_id: model_class for model_class, model_ids in model_cache.items() for model_id in model_ids }
 
60
  all_choices += [ (f"{model_id}, {model_class_from_model_id[model_id]}", model_id) for model_id in bigger_whitelisted_models ]
61
 
62
  return all_choices
 
63
  model_choices = build_model_choices()
64
+ def model_in_list(model):
65
+ for label, id in model_choices:
66
+ if id == model:
67
+ return True
68
+
69
+ return False
70
 
71
  # let's use a random but different model each day.
72
  key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
 
91
  # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
92
  # return o.choice(model_choices)[1]
93
 
94
+
95
+ REFLECTION_SYSTEM_PROMPT = """You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
96
+
97
+ def respond(message, history, model):
98
+ # insist on that model is in model_choices
99
+ if not model_in_list(model):
100
+ raise RuntimeError(f"{model} is not supported in this hf space. Visit https://featherless.ai to see and use the complete model catalogue")
101
+
102
+ history_openai_format = []
103
+ for human, assistant in history:
104
+ history_openai_format.append({"role": "user", "content": human })
105
+ history_openai_format.append({"role": "assistant", "content":assistant})
106
+ history_openai_format.append({"role": "user", "content": message})
107
+
108
+ if model == "mattshumer/Reflection-Llama-3.1-70B":
109
+ history_openai_format = [
110
+ {"role": "system", "content": REFLECTION_SYSTEM_PROMPT},
111
+ *history_openai_format
112
+ ]
113
+
114
+ response = client.chat.completions.create(
115
+ model=model,
116
+ messages= history_openai_format,
117
+ temperature=1.0,
118
+ stream=True,
119
+ max_tokens=2000,
120
+ extra_headers={
121
+ 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
122
+ 'X-Title': "HF's missing inference widget"
123
+ }
124
+ )
125
+
126
+ partial_message = ""
127
+ for chunk in response:
128
+ if chunk.choices[0].delta.content is not None:
129
+ content = chunk.choices[0].delta.content
130
+ escaped_content = html.escape(content)
131
+ partial_message += escaped_content
132
+ yield partial_message
133
+
134
  logo = open('./logo.svg').read()
135
  logo_small = open('./logo-small.svg').read()
136
  title_text="HuggingFace's missing inference widget"