keitokei1994 commited on
Commit
ff4f294
1 Parent(s): 810766c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -255
app.py CHANGED
@@ -1,270 +1,140 @@
1
  import spaces
 
 
 
 
 
 
 
2
  import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
- from llama_cpp import Llama
5
 
6
- REPO_ID = "keitokei1994/shisa-v1-qwen2-7b-GGUF"
7
- MODEL_NAME = "shisa-v1-qwen2-7b.Q8_0.gguf"
8
- MAX_CONTEXT_LENGTH = 32768
9
- CUDA = True
10
- SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
11
- TOKEN_STOP = ["<|eot_id|>"]
12
- SYS_MSG = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSYSTEM_PROMPT<|eot_id|>\n"
13
- USER_PROMPT = (
14
- "<|start_header_id|>user<|end_header_id|>\n\nUSER_PROMPT<|eot_id|>\n"
15
  )
16
- ASSIS_PROMPT = "<|start_header_id|>assistant<|end_header_id|>\n\n"
17
- END_ASSIS_PREVIOUS_RESPONSE = "<|eot_id|>\n"
18
-
19
- TASK_PROMPT = {
20
- "Assistant": SYSTEM_PROMPT,
21
- "Translate": "You are an expert translator. Translate the following text into English.",
22
- "Summarization": "Summarizing information is my specialty. Let me know what you'd like summarized.",
23
- "Grammar correction": "Grammar is my forte! Feel free to share the text you'd like me to proofread and correct.",
24
- "Stable diffusion prompt generator": "You are a stable diffusion prompt generator. Break down the user's text and create a more elaborate prompt.",
25
- "Play Trivia": "Engage the user in a trivia game on various topics.",
26
- "Share Fun Facts": "Share interesting and fun facts on various topics.",
27
- "Explain code": "You are an expert programmer guiding someone through a piece of code step by step, explaining each line and its function in detail.",
28
- "Paraphrase Master": "You have the knack for transforming complex or verbose text into simpler, clearer language while retaining the original meaning and essence.",
29
- "Recommend Movies": "Recommend movies based on the user's preferences.",
30
- "Offer Motivational Quotes": "Offer motivational quotes to inspire the user.",
31
- "Recommend Books": "Recommend books based on the user's favorite genres or interests.",
32
- "Philosophical discussion": "Engage the user in a philosophical discussion",
33
- "Music recommendation": "Tune time! What kind of music are you in the mood for? I'll find the perfect song for you.",
34
- "Generate a Joke": "Generate a witty joke suitable for a stand-up comedy routine.",
35
- "Roleplay as a Detective": "Roleplay as a detective interrogating a suspect in a murder case.",
36
- "Act as a News Reporter": "Act as a news reporter covering breaking news about an alien invasion.",
37
- "Play as a Space Explorer": "Play as a space explorer encountering a new alien civilization.",
38
- "Be a Medieval Knight": "Imagine yourself as a medieval knight embarking on a quest to rescue a princess.",
39
- "Act as a Superhero": "Act as a superhero saving a city from a supervillain's evil plot.",
40
- "Play as a Pirate Captain": "Play as a pirate captain searching for buried treasure on a remote island.",
41
- "Be a Famous Celebrity": "Imagine yourself as a famous celebrity attending a glamorous red-carpet event.",
42
- "Design a New Invention": "Imagine you're an inventor tasked with designing a revolutionary new invention that will change the world.",
43
- "Act as a Time Traveler": "You've just discovered time travel! Describe your adventures as you journey through different eras.",
44
- "Play as a Magical Girl": "You are a magical girl with extraordinary powers, battling dark forces to protect your city and friends.",
45
- "Act as a Shonen Protagonist": "You are a determined and spirited shonen protagonist on a quest for strength, friendship, and victory.",
46
- "Roleplay as a Tsundere Character": "You are a tsundere character, initially cold and aloof but gradually warming up to others through unexpected acts of kindness.",
47
- }
48
-
49
- css = ".gradio-container {background-image: url('file=./assets/background.png'); background-size: cover; background-position: center; background-repeat: no-repeat;}"
50
-
51
-
52
- class ChatLLM:
53
- def __init__(self, config_model):
54
- self.llm = None
55
- self.config_model = config_model
56
- # self.load_cpp_model()
57
-
58
- def load_cpp_model(self):
59
- self.llm = Llama(**config_model)
60
-
61
- def apply_chat_template(
62
- self,
63
- history,
64
- system_message,
65
- ):
66
- history = history or []
67
-
68
- messages = SYS_MSG.replace("SYSTEM_PROMPT", system_message.strip())
69
- for msg in history:
70
- messages += (
71
- USER_PROMPT.replace("USER_PROMPT", msg[0]) + ASSIS_PROMPT + msg[1]
72
- )
73
- messages += END_ASSIS_PREVIOUS_RESPONSE if msg[1] else ""
74
-
75
- print(messages)
76
-
77
- # messages = messages[:-1]
78
-
79
- return messages
80
-
81
- @spaces.GPU(duration=120)
82
- def response(
83
- self,
84
- history,
85
- system_message,
86
- max_tokens,
87
- temperature,
88
- top_p,
89
- top_k,
90
- repeat_penalty,
91
- ):
92
-
93
- messages = self.apply_chat_template(history, system_message)
94
-
95
- history[-1][1] = ""
96
-
97
- if not self.llm:
98
- print("Loading model")
99
- self.load_cpp_model()
100
-
101
- for output in self.llm(
102
- messages,
103
- echo=False,
104
- stream=True,
105
- max_tokens=max_tokens,
106
- temperature=temperature,
107
- top_p=top_p,
108
- top_k=top_k,
109
- repeat_penalty=repeat_penalty,
110
- stop=TOKEN_STOP,
111
- ):
112
- answer = output["choices"][0]["text"]
113
- history[-1][1] += answer
114
- # stream the response
115
- yield history, history
116
-
117
-
118
- def user(message, history):
119
- history = history or []
120
- # Append the user's message to the conversation history
121
- history.append([message, ""])
122
- return "", history
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- def clear_chat(chat_history_state, chat_message):
126
- chat_history_state = []
127
- chat_message = ""
128
- return chat_history_state, chat_message
 
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- def gui(llm_chat):
132
- with gr.Blocks(theme="NoCrypt/miku", css=css) as app:
133
- gr.Markdown("# shisa-v1-qwen2-7b.Q8_0.gguf")
134
- gr.Markdown(
135
- f"""
136
- ### This demo utilizes the repository ID {REPO_ID} with the model {MODEL_NAME}, powered by the LLaMA.cpp backend.
137
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
- with gr.Row():
140
- with gr.Column(scale=2):
141
- chatbot = gr.Chatbot(
142
- label="Chat",
143
- height=700,
144
- avatar_images=(
145
- "assets/avatar_user.jpeg",
146
- "assets/avatar_llama.jpeg",
147
- ),
148
- )
149
- with gr.Column(scale=1):
150
- with gr.Row():
151
- message = gr.Textbox(
152
- label="Message",
153
- placeholder="Ask me anything.",
154
- lines=3,
155
- )
156
- with gr.Row():
157
- submit = gr.Button(value="Send message", variant="primary")
158
- clear = gr.Button(value="New chat", variant="primary")
159
- stop = gr.Button(value="Stop", variant="secondary")
160
-
161
- with gr.Accordion("Contextual Prompt Editor"):
162
- default_task = "Assistant"
163
- task_prompts_gui = gr.Dropdown(
164
- TASK_PROMPT,
165
- value=default_task,
166
- label="Prompt selector",
167
- visible=True,
168
- interactive=True,
169
- )
170
- system_msg = gr.Textbox(
171
- TASK_PROMPT[default_task],
172
- label="System Message",
173
- placeholder="system prompt",
174
- lines=4,
175
- )
176
-
177
- def task_selector(choice):
178
- return gr.update(value=TASK_PROMPT[choice])
179
-
180
- task_prompts_gui.change(
181
- task_selector,
182
- [task_prompts_gui],
183
- [system_msg],
184
- )
185
-
186
- with gr.Accordion("Advanced settings", open=False):
187
- with gr.Column():
188
- max_tokens = gr.Slider(
189
- 20, 4096, label="Max Tokens", step=20, value=400
190
- )
191
- temperature = gr.Slider(
192
- 0.2, 2.0, label="Temperature", step=0.1, value=0.8
193
- )
194
- top_p = gr.Slider(
195
- 0.0, 1.0, label="Top P", step=0.05, value=0.95
196
- )
197
- top_k = gr.Slider(
198
- 0, 100, label="Top K", step=1, value=40
199
- )
200
- repeat_penalty = gr.Slider(
201
- 0.0,
202
- 2.0,
203
- label="Repetition Penalty",
204
- step=0.1,
205
- value=1.1,
206
- )
207
 
208
- chat_history_state = gr.State()
209
- clear.click(
210
- clear_chat,
211
- inputs=[chat_history_state, message],
212
- outputs=[chat_history_state, message],
213
- queue=False,
214
- )
215
- clear.click(lambda: None, None, chatbot, queue=False)
216
 
217
- submit_click_event = submit.click(
218
- fn=user,
219
- inputs=[message, chat_history_state],
220
- outputs=[message, chat_history_state],
221
- queue=True,
222
- ).then(
223
- fn=llm_chat.response,
224
- inputs=[
225
- chat_history_state,
226
- system_msg,
227
- max_tokens,
228
- temperature,
229
- top_p,
230
- top_k,
231
- repeat_penalty,
232
- ],
233
- outputs=[chatbot, chat_history_state],
234
- queue=True,
235
- )
236
- stop.click(
237
- fn=None,
238
- inputs=None,
239
- outputs=None,
240
- cancels=[submit_click_event],
241
- queue=False,
242
- )
243
- return app
244
 
 
 
245
 
246
  if __name__ == "__main__":
247
-
248
- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME)
249
-
250
- config_model = {
251
- "model_path": model_path,
252
- "n_ctx": MAX_CONTEXT_LENGTH,
253
- "n_gpu_layers": -1 if CUDA else 0,
254
- "flash_attn": True,
255
- }
256
-
257
- llm_chat = ChatLLM(config_model)
258
-
259
- app = gui(llm_chat)
260
-
261
- app.queue(default_concurrency_limit=40)
262
-
263
- app.launch(
264
- max_threads=40,
265
- share=False,
266
- show_error=True,
267
- quiet=False,
268
- debug=True,
269
- allowed_paths=["./assets/"],
270
- )
 
1
  import spaces
2
+ import json
3
+ import subprocess
4
+ from llama_cpp import Llama
5
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
7
+ from llama_cpp_agent.chat_history import BasicChatHistory
8
+ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
 
11
 
12
+ # モデルのダウンロード
13
+ hf_hub_download(
14
+ repo_id="bartowski/gemma-2-27b-it-GGUF",
15
+ filename="gemma-2-27b-it-Q4_K_M.gguf",
16
+ local_dir="./models"
 
 
 
 
17
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # 推論関数
20
+ @spaces.GPU(duration=120)
21
+ def respond(
22
+ message,
23
+ history: list[tuple[str, str]],
24
+ model,
25
+ system_message,
26
+ max_tokens,
27
+ temperature,
28
+ top_p,
29
+ top_k,
30
+ repeat_penalty,
31
+ ):
32
+ chat_template = MessagesFormatterType.GEMMA_2
33
+
34
+ llm = Llama(
35
+ model_path=f"models/{model}",
36
+ flash_attn=True,
37
+ n_gpu_layers=81,
38
+ n_batch=1024,
39
+ n_ctx=8192,
40
+ )
41
+ provider = LlamaCppPythonProvider(llm)
42
 
43
+ agent = LlamaCppAgent(
44
+ provider,
45
+ system_prompt=f"{system_message}",
46
+ predefined_messages_formatter_type=chat_template,
47
+ debug_output=True
48
+ )
49
 
50
+ settings = provider.get_provider_default_settings()
51
+ settings.temperature = temperature
52
+ settings.top_k = top_k
53
+ settings.top_p = top_p
54
+ settings.max_tokens = max_tokens
55
+ settings.repeat_penalty = repeat_penalty
56
+ settings.stream = True
57
+
58
+ messages = BasicChatHistory()
59
+
60
+ for msn in history:
61
+ user = {
62
+ 'role': Roles.user,
63
+ 'content': msn[0]
64
+ }
65
+ assistant = {
66
+ 'role': Roles.assistant,
67
+ 'content': msn[1]
68
+ }
69
+ messages.add_message(user)
70
+ messages.add_message(assistant)
71
+
72
+ stream = agent.get_chat_response(
73
+ message,
74
+ llm_sampling_settings=settings,
75
+ chat_history=messages,
76
+ returns_streaming_generator=True,
77
+ print_output=False
78
+ )
79
 
80
+ outputs = ""
81
+ for output in stream:
82
+ outputs += output
83
+ yield outputs
84
+
85
+ # Gradioのインターフェースを作成
86
+ def create_interface(model_name, description):
87
+ return gr.ChatInterface(
88
+ respond,
89
+ additional_inputs=[
90
+ gr.Textbox(value=model_name, label="Model", interactive=False),
91
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
92
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
93
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
94
+ gr.Slider(
95
+ minimum=0.1,
96
+ maximum=1.0,
97
+ value=0.95,
98
+ step=0.05,
99
+ label="Top-p",
100
+ ),
101
+ gr.Slider(
102
+ minimum=0,
103
+ maximum=100,
104
+ value=40,
105
+ step=1,
106
+ label="Top-k",
107
+ ),
108
+ gr.Slider(
109
+ minimum=0.0,
110
+ maximum=2.0,
111
+ value=1.1,
112
+ step=0.1,
113
+ label="Repetition penalty",
114
+ ),
115
+ ],
116
+ retry_btn="Retry",
117
+ undo_btn="Undo",
118
+ clear_btn="Clear",
119
+ submit_btn="Send",
120
+ title=f"{model_name}",
121
+ description=description,
122
+ chatbot=gr.Chatbot(
123
+ scale=1,
124
+ likeable=False,
125
+ show_copy_button=True
126
  )
127
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ # gemma-2-27b-it-Q4_K_Mのインターフェースのみを作成
130
+ description = """<p align="center"gemma-2-27b-it-Q4_K_M</p>"""
131
+ interface = create_interface('gemma-2-27b-it-Q4_K_M.gguf', description)
 
 
 
 
 
132
 
133
+ # Gradio Blocksで単一のインターフェースを表示
134
+ demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ with demo:
137
+ interface.render()
138
 
139
  if __name__ == "__main__":
140
+ demo.launch()