David commited on
Commit
e23537b
1 Parent(s): ca5f876
Files changed (1) hide show
  1. app.py +94 -23
app.py CHANGED
@@ -2,19 +2,75 @@ import gradio as gr
2
  import requests
3
  import os
4
  import json
5
- import sseclient
 
 
 
 
 
6
 
7
  # Set up the API endpoint and key
8
- API_URL = os.getenv("RUNPOD_API_URL")
9
  API_KEY = os.getenv("RUNPOD_API_KEY")
 
10
 
11
  headers = {
12
  "Authorization": f"Bearer {API_KEY}",
13
  "Content-Type": "application/json"
14
  }
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Fixed system prompt
17
- SYSTEM_PROMPT = "You an advanced artificial intelligence system, capable of <thinking> <reflection> and you output a brief and small to the point <output>."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def stream_response(message, history, max_tokens, temperature, top_p):
20
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
@@ -31,43 +87,58 @@ def stream_response(message, history, max_tokens, temperature, top_p):
31
  "max_tokens": max_tokens,
32
  "temperature": temperature,
33
  "top_p": top_p,
34
- "stream": True
 
35
  }
36
 
 
 
 
37
  try:
38
  response = requests.post(API_URL, headers=headers, json=data, stream=True)
39
- response.raise_for_status()
40
- client = sseclient.SSEClient(response)
41
 
42
- full_response = ""
43
- for event in client.events():
44
- if event.data != "[DONE]":
45
- try:
46
- chunk = json.loads(event.data)
47
- if 'choices' in chunk and len(chunk['choices']) > 0:
48
- content = chunk['choices'][0]['delta'].get('content', '')
49
- full_response += content
50
- # Replace < and > with their HTML entities
51
- display_content = content.replace('<', '&lt;').replace('>', '&gt;')
52
- yield display_content
53
- except json.JSONDecodeError:
54
- print(f"Failed to decode JSON: {event.data}")
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except requests.exceptions.RequestException as e:
 
 
57
  yield f"Error: {str(e)}"
58
  except Exception as e:
 
 
59
  yield f"Unexpected error: {str(e)}"
60
 
61
  demo = gr.ChatInterface(
62
  stream_response,
63
  additional_inputs=[
64
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
65
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
66
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
67
  ],
68
  )
69
 
70
  if __name__ == "__main__":
71
- print(f"Starting application with API URL: {API_URL}")
72
- print(f"Using system prompt: {SYSTEM_PROMPT}")
 
73
  demo.launch()
 
2
  import requests
3
  import os
4
  import json
5
+ import traceback
6
+ import sys
7
+ import re
8
+
9
+ # Enable or disable tracing
10
+ ENABLE_TRACING = False
11
 
12
  # Set up the API endpoint and key
13
+ API_BASE_URL = os.getenv("RUNPOD_API_URL")
14
  API_KEY = os.getenv("RUNPOD_API_KEY")
15
+ API_URL = f"{API_BASE_URL}/chat/completions"
16
 
17
  headers = {
18
  "Authorization": f"Bearer {API_KEY}",
19
  "Content-Type": "application/json"
20
  }
21
 
22
+ import re
23
+
24
+ def style_xml_content(text):
25
+ def replace_content(match):
26
+ full_match = match.group(0)
27
+ tag = match.group(1)
28
+ content = match.group(2)
29
+
30
+ if tag == 'thinking':
31
+ styled_content = f'<i><b>{content}</b></i>'
32
+ return f'<details open><summary>&lt;thinking&gt;</summary>{styled_content}<br>&lt;/thinking&gt;</details>'
33
+ elif tag == 'reflection':
34
+ styled_content = f'<u><b>{content}</b></u>'
35
+ return f'<details open><summary>&lt;reflection&gt;</summary>{styled_content}<br>&lt;/reflection&gt;</details>'
36
+ else:
37
+ return full_match.replace('<', '&lt;').replace('>', '&gt;')
38
+
39
+ # First, escape all < and > characters
40
+ text = text.replace('<', '&lt;').replace('>', '&gt;')
41
+
42
+ # Then, unescape the specific tags we want to process
43
+ text = text.replace('&lt;thinking&gt;', '<thinking>').replace('&lt;/thinking&gt;', '</thinking>')
44
+ text = text.replace('&lt;reflection&gt;', '<reflection>').replace('&lt;/reflection&gt;', '</reflection>')
45
+
46
+ # Apply styling to content inside tags
47
+ styled_text = re.sub(r'<(\w+)>(.*?)</\1>', replace_content, text, flags=re.DOTALL)
48
+
49
+ # Remove blacklisted text
50
+ styled_text = styled_text.replace("&lt;|im_start|&gt;", "")
51
+
52
+ return styled_text
53
+
54
  # Fixed system prompt
55
+ SYSTEM_PROMPT = "You an advanced artificial intelligence system, capable of <thinking> and then creating a length <reflection>, where you ask if you were wrong? And then you correct yourself. Always use <reflection></reflection> unless it is a trivial or wikipedia question. Finally you output a brief and small to the point <output>."
56
+
57
+ def debug_print(*args, **kwargs):
58
+ if ENABLE_TRACING:
59
+ print(*args, file=sys.stderr, **kwargs)
60
+
61
+ def parse_sse(data):
62
+ if data:
63
+ data = data.decode('utf-8').strip()
64
+ debug_print(f"Raw SSE data: {data}")
65
+ if data.startswith('data: '):
66
+ data = data[6:] # Remove 'data: ' prefix
67
+ if data == '[DONE]':
68
+ return None
69
+ try:
70
+ return json.loads(data)
71
+ except json.JSONDecodeError:
72
+ debug_print(f"Failed to parse SSE data: {data}")
73
+ return None
74
 
75
  def stream_response(message, history, max_tokens, temperature, top_p):
76
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
 
87
  "max_tokens": max_tokens,
88
  "temperature": temperature,
89
  "top_p": top_p,
90
+ "stream": True,
91
+ "stop": ["</output>"] # Add stop sequence
92
  }
93
 
94
+ debug_print(f"Sending request to API: {API_URL}")
95
+ debug_print(f"Request data: {json.dumps(data, indent=2)}")
96
+
97
  try:
98
  response = requests.post(API_URL, headers=headers, json=data, stream=True)
99
+ debug_print(f"Response status code: {response.status_code}")
100
+ debug_print(f"Response headers: {response.headers}")
101
 
102
+ response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ accumulated_content = ""
105
+ for line in response.iter_lines():
106
+ if line:
107
+ debug_print(f"Received line: {line}")
108
+ parsed = parse_sse(line)
109
+ if parsed:
110
+ debug_print(f"Parsed SSE data: {parsed}")
111
+ if 'choices' in parsed and len(parsed['choices']) > 0:
112
+ content = parsed['choices'][0]['delta'].get('content', '')
113
+ if content:
114
+ accumulated_content += content
115
+ styled_content = style_xml_content(accumulated_content)
116
+ yield styled_content
117
+
118
+ # Check if we've reached the stop sequence
119
+ if accumulated_content.endswith("</output>"):
120
+ break
121
+
122
  except requests.exceptions.RequestException as e:
123
+ debug_print(f"Request exception: {str(e)}")
124
+ debug_print(f"Request exception traceback: {traceback.format_exc()}")
125
  yield f"Error: {str(e)}"
126
  except Exception as e:
127
+ debug_print(f"Unexpected error: {str(e)}")
128
+ debug_print(f"Error traceback: {traceback.format_exc()}")
129
  yield f"Unexpected error: {str(e)}"
130
 
131
  demo = gr.ChatInterface(
132
  stream_response,
133
  additional_inputs=[
134
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
135
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.4, step=0.1, label="Temperature"),
136
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.83, step=0.05, label="Top-p (nucleus sampling)"),
137
  ],
138
  )
139
 
140
  if __name__ == "__main__":
141
+ debug_print(f"Starting application with API URL: {API_URL}")
142
+ debug_print(f"Using system prompt: {SYSTEM_PROMPT}")
143
+ debug_print(f"Tracing enabled: {ENABLE_TRACING}")
144
  demo.launch()