cetusian commited on
Commit
09630a2
1 Parent(s): 5ffb836

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -90
app.py CHANGED
@@ -1,18 +1,18 @@
1
  import os
2
  import gradio as gr
3
- from huggingface_hub import login, InferenceClient
 
4
  import spaces
5
 
6
- # Authenticate with Hugging Face API key
7
  api_key = os.getenv("LLAMA")
8
  login(api_key)
9
 
10
- # Initialize InferenceClients for multiple models
11
- client1 = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
12
- client2 = InferenceClient("bigscience/bloom")
13
 
14
  @spaces.GPU
15
- def compare_models(
16
  message,
17
  history: list[dict],
18
  system_message,
@@ -22,26 +22,17 @@ def compare_models(
22
  ):
23
  # Start with the system message
24
  messages = [{"role": "system", "content": system_message}]
25
- messages += history # Add conversation history
26
- messages.append({"role": "user", "content": message}) # Add user message
27
 
28
- # Fetch responses from both models
29
- response1 = ""
30
- response2 = ""
31
 
32
- # Stream responses for Model 1
33
- for message in client1.chat_completion(
34
- messages,
35
- max_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- token = message.choices[0].delta.content
41
- response1 += token
42
 
43
- # Stream responses for Model 2
44
- for message in client2.chat_completion(
45
  messages,
46
  max_tokens=max_tokens,
47
  stream=True,
@@ -49,73 +40,29 @@ def compare_models(
49
  top_p=top_p,
50
  ):
51
  token = message.choices[0].delta.content
52
- response2 += token
53
-
54
- # Return responses side-by-side
55
- return response1, response2
56
-
57
- def handle_vote(vote, current_votes):
58
- """Handle user votes."""
59
- current_votes[vote] += 1
60
- return f"Model 1: {current_votes['model1']} votes | Model 2: {current_votes['model2']} votes"
61
-
62
- # Initialize voting state
63
- votes = {"model1": 0, "model2": 0}
64
-
65
- # Create Gradio interface
66
- with gr.Blocks() as demo:
67
- gr.Markdown("# AI Model Comparison Tool")
68
- with gr.Row():
69
- system_message = gr.Textbox(
70
- value="You are a helpful assistant specializing in tech-related topics.",
71
- label="System message",
72
- )
73
- max_tokens = gr.Slider(
74
- minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
75
- )
76
- temperature = gr.Slider(
77
- minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
78
- )
79
- top_p = gr.Slider(
80
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"
81
- )
82
-
83
- with gr.Row():
84
- message = gr.Textbox(label="Enter your message")
85
-
86
- compare_btn = gr.Button("Compare Models")
87
-
88
- with gr.Row():
89
- response1 = gr.Textbox(label="Response from Model 1")
90
- response2 = gr.Textbox(label="Response from Model 2")
91
-
92
- with gr.Row():
93
- vote_model1 = gr.Button("Vote for Model 1")
94
- vote_model2 = gr.Button("Vote for Model 2")
95
-
96
- vote_status = gr.Textbox(
97
- value=f"Model 1: {votes['model1']} votes | Model 2: {votes['model2']} votes",
98
- label="Voting Results",
99
- )
100
-
101
- # Link components
102
- compare_btn.click(
103
- compare_models,
104
- inputs=[message, [], system_message, max_tokens, temperature, top_p],
105
- outputs=[response1, response2],
106
- )
107
-
108
- vote_model1.click(
109
- handle_vote,
110
- inputs=["model1", votes],
111
- outputs=vote_status,
112
- )
113
-
114
- vote_model2.click(
115
- handle_vote,
116
- inputs=["model2", votes],
117
- outputs=vote_status,
118
- )
119
 
120
  if __name__ == "__main__":
121
  demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ from huggingface_hub import login
4
+ from huggingface_hub import InferenceClient
5
  import spaces
6
 
7
+ # Retrieve API key and authenticate
8
  api_key = os.getenv("LLAMA")
9
  login(api_key)
10
 
11
+ # Initialize InferenceClient for the Llama model
12
+ client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct")
 
13
 
14
  @spaces.GPU
15
+ def respond(
16
  message,
17
  history: list[dict],
18
  system_message,
 
22
  ):
23
  # Start with the system message
24
  messages = [{"role": "system", "content": system_message}]
 
 
25
 
26
+ # Add the conversation history
27
+ messages += history
 
28
 
29
+ # Add the latest user message
30
+ messages.append({"role": "user", "content": message})
31
+
32
+ response = ""
 
 
 
 
 
 
33
 
34
+ # Send the conversation to the model and stream the response
35
+ for message in client.chat_completion(
36
  messages,
37
  max_tokens=max_tokens,
38
  stream=True,
 
40
  top_p=top_p,
41
  ):
42
  token = message.choices[0].delta.content
43
+ response += token
44
+ yield response
45
+
46
+ # Initialize the Gradio ChatInterface with the new format
47
+ demo = gr.ChatInterface(
48
+ respond,
49
+ type="messages", # Use the OpenAI-style format
50
+ additional_inputs=[
51
+ gr.Textbox(
52
+ value="You are a helpful Customer Support assistant that specializes in the low-code software company: 'Plant an App' and tech-related topics.",
53
+ label="System message"
54
+ ),
55
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
56
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
57
+ gr.Slider(
58
+ minimum=0.1,
59
+ maximum=1.0,
60
+ value=0.95,
61
+ step=0.05,
62
+ label="Top-p (nucleus sampling)"
63
+ ),
64
+ ],
65
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
  demo.launch()