seawolf2357
commited on
Commit
•
6ab04f4
1
Parent(s):
728fb17
Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ def respond(
|
|
18 |
message,
|
19 |
history: list[tuple[str, str]],
|
20 |
system_message="AI Assistant Role",
|
21 |
-
max_tokens=
|
22 |
temperature=0.7,
|
23 |
top_p=0.95,
|
24 |
):
|
@@ -30,7 +30,6 @@ def respond(
|
|
30 |
memory.append((message, None))
|
31 |
|
32 |
messages = [{"role": "system", "content": full_system_message}]
|
33 |
-
|
34 |
# 메모리에서 대화 기록을 가져와 메시지 목록에 추가
|
35 |
for val in memory:
|
36 |
if val[0]:
|
@@ -42,7 +41,6 @@ def respond(
|
|
42 |
"Authorization": f"Bearer {TOKEN}",
|
43 |
"Content-Type": "application/json"
|
44 |
}
|
45 |
-
|
46 |
payload = {
|
47 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
48 |
"max_tokens": max_tokens,
|
@@ -50,7 +48,6 @@ def respond(
|
|
50 |
"top_p": top_p,
|
51 |
"messages": messages
|
52 |
}
|
53 |
-
|
54 |
response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
|
55 |
|
56 |
# Stream 방식으로 데이터를 출력
|
@@ -79,9 +76,8 @@ demo = gr.ChatInterface(
|
|
79 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
80 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
81 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
82 |
-
]
|
83 |
-
streaming=True # 스트리밍 모드 활성화
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
-
demo.queue().launch(max_threads=20)
|
|
|
18 |
message,
|
19 |
history: list[tuple[str, str]],
|
20 |
system_message="AI Assistant Role",
|
21 |
+
max_tokens=512,
|
22 |
temperature=0.7,
|
23 |
top_p=0.95,
|
24 |
):
|
|
|
30 |
memory.append((message, None))
|
31 |
|
32 |
messages = [{"role": "system", "content": full_system_message}]
|
|
|
33 |
# 메모리에서 대화 기록을 가져와 메시지 목록에 추가
|
34 |
for val in memory:
|
35 |
if val[0]:
|
|
|
41 |
"Authorization": f"Bearer {TOKEN}",
|
42 |
"Content-Type": "application/json"
|
43 |
}
|
|
|
44 |
payload = {
|
45 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
46 |
"max_tokens": max_tokens,
|
|
|
48 |
"top_p": top_p,
|
49 |
"messages": messages
|
50 |
}
|
|
|
51 |
response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
|
52 |
|
53 |
# Stream 방식으로 데이터를 출력
|
|
|
76 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
77 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
78 |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
|
79 |
+
]
|
|
|
80 |
)
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
+
demo.queue().launch(max_threads=20)
|