wxgeorge commited on
Commit
30bad6e
1 Parent(s): 68492c3

:sparkles: support mattshumer's Reflection

Browse files
Files changed (2) hide show
  1. app.py +67 -32
  2. model-cache.json +1 -0
app.py CHANGED
@@ -5,6 +5,9 @@ import json
5
  import functools
6
  import random
7
  import datetime
 
 
 
8
 
9
  api_key = os.environ.get('FEATHERLESS_API_KEY')
10
  client = OpenAI(
@@ -18,24 +21,50 @@ def respond(message, history, model):
18
  history_openai_format.append({"role": "user", "content": human })
19
  history_openai_format.append({"role": "assistant", "content":assistant})
20
  history_openai_format.append({"role": "user", "content": message})
21
-
22
- response = client.chat.completions.create(
23
- model=model,
24
- messages= history_openai_format,
25
- temperature=1.0,
26
- stream=True,
27
- max_tokens=2000,
28
- extra_headers={
29
- 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
30
- 'X-Title': "HF's missing inference widget"
31
- }
32
- )
33
 
34
- partial_message = ""
35
- for chunk in response:
36
- if chunk.choices[0].delta.content is not None:
37
- partial_message = partial_message + chunk.choices[0].delta.content
38
- yield partial_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  logo = open('./logo.svg').read()
41
 
@@ -69,26 +98,32 @@ def build_model_choices():
69
  continue
70
  all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]
71
 
 
 
 
 
 
72
  return all_choices
73
 
74
  model_choices = build_model_choices()
75
 
76
  def initial_model(referer=None):
77
-
78
- if referer == 'http://127.0.0.1:7860/':
79
- return 'Sao10K/Venomia-1.1-m7'
80
-
81
- if referer and referer.startswith("https://huggingface.co/"):
82
- possible_model = referer[23:]
83
- full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
84
- model_is_supported = possible_model in full_model_list
85
- if model_is_supported:
86
- return possible_model
87
-
88
- # let's use a random but different model each day.
89
- key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
90
- o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
91
- return o.choice(model_choices)[1]
 
92
 
93
  title_text="HuggingFace's missing inference widget"
94
  css = """
 
5
  import functools
6
  import random
7
  import datetime
8
+ from transformers import AutoTokenizer
9
+
10
+ reflection_tokenizer = AutoTokenizer.from_pretrained("mattshumer/Reflection-Llama-3.1-70B")
11
 
12
  api_key = os.environ.get('FEATHERLESS_API_KEY')
13
  client = OpenAI(
 
21
  history_openai_format.append({"role": "user", "content": human })
22
  history_openai_format.append({"role": "assistant", "content":assistant})
23
  history_openai_format.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ if model == "mattshumer/Reflection-Llama-3.1-70B":
26
+ # chat/completions not working for this model;
27
+ # apply chat template locally
28
+ response = client.completions.create(
29
+ model=model,
30
+ prompt=reflection_tokenizer.apply_chat_template(history_openai_format, tokenize=False),
31
+ temperature=1.0,
32
+ stream=True,
33
+ max_tokens=2000,
34
+ extra_headers={
35
+ 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
36
+ 'X-Title': "HF's missing inference widget"
37
+ }
38
+ )
39
+
40
+ # debugger_ran = False
41
+ partial_message = ""
42
+ for chunk in response:
43
+ # if not debugger_ran:
44
+ # import code
45
+ # code.InteractiveConsole(locals=locals()).interact()
46
+ # debugger_ran = True
47
+ if chunk.choices[0].text is not None:
48
+ partial_message = partial_message + chunk.choices[0].text
49
+ yield partial_message
50
+ else:
51
+ response = client.chat.completions.create(
52
+ model=model,
53
+ messages= history_openai_format,
54
+ temperature=1.0,
55
+ stream=True,
56
+ max_tokens=2000,
57
+ extra_headers={
58
+ 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/try-this-model',
59
+ 'X-Title': "HF's missing inference widget"
60
+ }
61
+ )
62
+
63
+ partial_message = ""
64
+ for chunk in response:
65
+ if chunk.choices[0].delta.content is not None:
66
+ partial_message = partial_message + chunk.choices[0].delta.content
67
+ yield partial_message
68
 
69
  logo = open('./logo.svg').read()
70
 
 
98
  continue
99
  all_choices += [ (f"{model_id} ({model_class})", model_id) for model_id in model_cache[model_class] ]
100
 
101
+ # and add one more ...
102
+ model_class = "llama3-70b-8k"
103
+ model_id = "mattshumer/Reflection-Llama-3.1-70B"
104
+ all_choices += [(f"{model_id} ({model_class})", model_id)]
105
+
106
  return all_choices
107
 
108
  model_choices = build_model_choices()
109
 
110
  def initial_model(referer=None):
111
+ return "mattshumer/Reflection-Llama-3.1-70B"
112
+
113
+ # if referer == 'http://127.0.0.1:7860/':
114
+ # return 'Sao10K/Venomia-1.1-m7'
115
+
116
+ # if referer and referer.startswith("https://huggingface.co/"):
117
+ # possible_model = referer[23:]
118
+ # full_model_list = functools.reduce(lambda x,y: x+y, model_cache.values(), [])
119
+ # model_is_supported = possible_model in full_model_list
120
+ # if model_is_supported:
121
+ # return possible_model
122
+
123
+ # # let's use a random but different model each day.
124
+ # key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e')
125
+ # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}")
126
+ # return o.choice(model_choices)[1]
127
 
128
  title_text="HuggingFace's missing inference widget"
129
  css = """
model-cache.json CHANGED
@@ -515,6 +515,7 @@
515
  "jondurbin/airoboros-70b-3.3",
516
  "jondurbin/airoboros-dpo-70b-3.3",
517
  "m42-health/Llama3-Med42-70B",
 
518
  "meta-llama/Meta-Llama-3-70B-Instruct",
519
  "meta-llama/Meta-Llama-3.1-70B-Instruct",
520
  "migtissera/Llama-3-70B-Synthia-v3.5",
 
515
  "jondurbin/airoboros-70b-3.3",
516
  "jondurbin/airoboros-dpo-70b-3.3",
517
  "m42-health/Llama3-Med42-70B",
518
+ "mattshumer/Reflection-LLama-3.1-70B",
519
  "meta-llama/Meta-Llama-3-70B-Instruct",
520
  "meta-llama/Meta-Llama-3.1-70B-Instruct",
521
  "migtissera/Llama-3-70B-Synthia-v3.5",