0xSynapse commited on
Commit
652f69c
1 Parent(s): f0d45d5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +438 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Run codes."""
2
+ # pylint: disable=line-too-long, broad-exception-caught, invalid-name, missing-function-docstring, too-many-instance-attributes, missing-class-docstring
3
+ # ruff: noqa: E501
4
+ import gc
5
+ import os
6
+ import platform
7
+ import random
8
+ import time
9
+ from dataclasses import asdict, dataclass
10
+ from pathlib import Path
11
+ from fpdf import FPDF
12
+
13
+ # from types import SimpleNamespace
14
+ import gradio as gr
15
+ import psutil
16
+ from about_time import about_time
17
+ from ctransformers import AutoModelForCausalLM
18
+ from dl_hf_model import dl_hf_model
19
+ from loguru import logger
20
+
21
+
22
+
23
+
24
+ # url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/blob/main/llama-2-13b-chat.ggmlv3.q2_K.bin"
25
+ #url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q2_K.bin" # 2.87G
26
+ url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/blob/main/llama-2-7b-chat.ggmlv3.q4_K_M.bin" # 2.87G
27
+
28
+
29
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
30
+
31
+ ### Instruction: {user_prompt}
32
+
33
+ ### Response:
34
+ """
35
+
36
+ prompt_template = """System: You are a helpful,
37
+ respectful and honest assistant. Always answer as
38
+ helpfully as possible, while being safe. Your answers
39
+ should not include any harmful, unethical, racist,
40
+ sexist, toxic, dangerous, or illegal content. Please
41
+ ensure that your responses are socially unbiased and
42
+ positive in nature. If a question does not make any
43
+ sense, or is not factually coherent, explain why instead
44
+ of answering something not correct. If you don't know
45
+ the answer to a question, please don't share false
46
+ information.
47
+ User: {prompt}
48
+ Assistant: """
49
+
50
+ prompt_template = """System: You are a helpful assistant.
51
+ User: {prompt}
52
+ Assistant: """
53
+
54
+ prompt_template = """Question: {question}
55
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
56
+
57
+ prompt_template = """[INST] <>
58
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
59
+ <>
60
+
61
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
62
+ [/INST]"""
63
+
64
+ prompt_template = """[INST] <<SYS>>
65
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
66
+
67
+ {question} [/INST]
68
+ """
69
+
70
+ prompt_template = """[INST] <<SYS>>
71
+ You are a helpful assistant.
72
+ <</SYS>>
73
+
74
+ {question} [/INST]
75
+ """
76
+
77
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
78
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
79
+
80
+ logger.debug(f"{stop_string=}")
81
+
82
+ _ = psutil.cpu_count(logical=False) - 1
83
+ cpu_count: int = int(_) if _ else 1
84
+ logger.debug(f"{cpu_count=}")
85
+
86
+ LLM = None
87
+ gc.collect()
88
+
89
+ try:
90
+ model_loc, file_size = dl_hf_model(url)
91
+ except Exception as exc_:
92
+ logger.error(exc_)
93
+ raise SystemExit(1) from exc_
94
+
95
+ LLM = AutoModelForCausalLM.from_pretrained(
96
+ model_loc,
97
+ model_type="llama",
98
+ # threads=cpu_count,
99
+ )
100
+
101
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
102
+
103
+ os.environ["TZ"] = "Asia/Shanghai"
104
+ try:
105
+ time.tzset() # type: ignore # pylint: disable=no-member
106
+ except Exception:
107
+ # Windows
108
+ logger.warning("Windows, cant run time.tzset()")
109
+
110
+ _ = """
111
+ ns = SimpleNamespace(
112
+ response="",
113
+ generator=(_ for _ in []),
114
+ )
115
+ # """
116
+
117
+
118
+
119
+ @dataclass
120
+ class GenerationConfig:
121
+ temperature: float = 0.7
122
+ top_k: int = 50
123
+ top_p: float = 0.9
124
+ repetition_penalty: float = 1.0
125
+ max_new_tokens: int = 512
126
+ seed: int = 42
127
+ reset: bool = False
128
+ stream: bool = True
129
+ # threads: int = cpu_count
130
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
131
+
132
+
133
+ def generate(
134
+ question: str,
135
+ llm=LLM,
136
+ config: GenerationConfig = GenerationConfig(),
137
+ ):
138
+ """Run model inference, will return a Generator if streaming is true."""
139
+ # _ = prompt_template.format(question=question)
140
+ # print(_)
141
+
142
+ prompt = prompt_template.format(question=question)
143
+
144
+ return llm(
145
+ prompt,
146
+ **asdict(config),
147
+ )
148
+
149
+
150
+ logger.debug(f"{asdict(GenerationConfig())=}")
151
+
152
+
153
+ def user(user_message, history):
154
+ # return user_message, history + [[user_message, None]]
155
+ history.append([user_message, None])
156
+ return user_message, history # keep user_message
157
+
158
+
159
+ def user1(user_message, history):
160
+ # return user_message, history + [[user_message, None]]
161
+ history.append([user_message, None])
162
+ return "", history # clear user_message
163
+
164
+
165
+ def bot_(history):
166
+ user_message = history[-1][0]
167
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
168
+ bot_message = user_message + ": " + resp
169
+ history[-1][1] = ""
170
+ for character in bot_message:
171
+ history[-1][1] += character
172
+ time.sleep(0.02)
173
+ yield history
174
+
175
+ history[-1][1] = resp
176
+ yield history
177
+
178
+
179
+ def bot(history):
180
+ user_message = history[-1][0]
181
+ response = []
182
+
183
+ logger.debug(f"{user_message=}")
184
+
185
+ with about_time() as atime: # type: ignore
186
+ flag = 1
187
+ prefix = ""
188
+ then = time.time()
189
+
190
+ logger.debug("about to generate")
191
+
192
+ config = GenerationConfig(reset=True)
193
+ for elm in generate(user_message, config=config):
194
+ if flag == 1:
195
+ logger.debug("in the loop")
196
+ prefix = f"({time.time() - then:.2f}s) "
197
+ flag = 0
198
+ print(prefix, end="", flush=True)
199
+ logger.debug(f"{prefix=}")
200
+ print(elm, end="", flush=True)
201
+ # logger.debug(f"{elm}")
202
+
203
+ response.append(elm)
204
+ history[-1][1] = prefix + "".join(response)
205
+ yield history
206
+
207
+ _ = (
208
+ f"(time elapsed: {atime.duration_human}, " # type: ignore
209
+ f"{atime.duration/len(''.join(response)):.2f}s/char)" # type: ignore
210
+ )
211
+
212
+ print(response)
213
+
214
+ history[-1][1] = "".join(response)
215
+ generate_pdf(history, "conversation.pdf")
216
+
217
+ yield history
218
+
219
+ def generate_pdf(bot, file_path):
220
+ pdf = FPDF()
221
+ pdf.add_page()
222
+ pdf.set_font("Arial", size=10)
223
+ pdf.set_display_mode(zoom='default', layout='default')
224
+ if bot is None:
225
+ return
226
+
227
+ with open(file_path, "w") as f:
228
+ for (user_message, response) in bot:
229
+ f.write("User: " + user_message + "\n")
230
+ f.write("Bot: " + response + "\n")
231
+
232
+ for (user_message, response) in bot:
233
+
234
+ pdf.multi_cell(w=210, h=6, txt="User: " + user_message, border=0, align='L', fill=False)
235
+ pdf.multi_cell(w=210, h=6, txt="Bot: " + response, border=0, align='L', fill=False)
236
+
237
+ pdf.output(file_path)
238
+
239
+
240
+ def predict_api(prompt):
241
+ logger.debug(f"{prompt=}")
242
+ try:
243
+ # user_prompt = prompt
244
+ config = GenerationConfig(
245
+ temperature=0.2,
246
+ top_k=10,
247
+ top_p=0.9,
248
+ repetition_penalty=1.0,
249
+ max_new_tokens=512, # adjust as needed
250
+ seed=42,
251
+ reset=True, # reset history (cache)
252
+ stream=False,
253
+ # threads=cpu_count,
254
+ # stop=prompt_prefix[1:2],
255
+ )
256
+
257
+ response = generate(
258
+ prompt,
259
+ config=config,
260
+ )
261
+
262
+ logger.debug(f"api: {response=}")
263
+ except Exception as exc:
264
+ logger.error(exc)
265
+ response = f"{exc=}"
266
+ # bot = {"inputs": [response]}
267
+ # bot = [(prompt, response)]
268
+
269
+ return response
270
+
271
+
272
+ css = """
273
+ .importantButton {
274
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
275
+ border: none !important;
276
+ }
277
+ .importantButton:hover {
278
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
279
+ border: none !important;
280
+ }
281
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
282
+ .xsmall {font-size: x-small;}
283
+ """
284
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
285
+ examples_list = [
286
+ ["What is the capital of India"],
287
+ ["How to play Chess? Provide detailed steps."],
288
+ ["If it takes 10 hours to dry 10 clothes, assuming all the clothes are hung together at the same time for drying , then how long will it take to dry a cloth?"],
289
+ ["is infinity + 1 bigger than infinity?"],
290
+ ["Explain the plot of Oppenheimer 2023 movie in a sentence."],
291
+ ["How long does it take to become proficient in French, and what are the best methods for retaining information?"],
292
+ ["What are some common mistakes to avoid when writing code?"],
293
+ ["Build a prompt to generate a beautiful portrait of a horse"],
294
+ ["Suggest four metaphors to describe the benefits of AI"],
295
+ ["Write most important points of Bhagavad Gita"],
296
+ ["Write a summary Why is it so hard to understand women"],
297
+
298
+ ]
299
+
300
+ logger.info("start block")
301
+
302
+ with gr.Blocks(
303
+ title="LlamaGPT🧠",
304
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
305
+ css=css,
306
+ ) as block:
307
+ # buff_var = gr.State("")
308
+ with gr.Accordion("LlamaGPT🧠", open=False,style={"text-align": "center", "font-weight": "bold"}):
309
+
310
+ gr.Markdown(
311
+ f"""<div style="text-align: center;">
312
+ <h5>Gradio Demo for Meta's Llama 2 7B-chat</h5><br>
313
+ Few examples are there as prompts to test the model. You probably should try on your own related prompts to test the bot.
314
+ </div>""",
315
+ elem_classes="xsmall",
316
+ )
317
+
318
+ # chatbot = gr.Chatbot().style(height=700) # 500
319
+ chatbot = gr.Chatbot(height=500)
320
+
321
+ # buff = gr.Textbox(show_label=False, visible=True)
322
+
323
+ with gr.Row():
324
+ with gr.Column(scale=5):
325
+ msg = gr.Textbox(
326
+ label="Chat Message Box",
327
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
328
+ show_label=False,
329
+ # container=False,
330
+ lines=6,
331
+ max_lines=30,
332
+ show_copy_button=True,
333
+ # ).style(container=False)
334
+ )
335
+ with gr.Column(scale=1, min_width=50):
336
+ with gr.Row():
337
+ submit = gr.Button("Submit", elem_classes="xsmall")
338
+ stop = gr.Button("Stop", visible=True)
339
+ clear = gr.Button("Clear History", visible=True)
340
+
341
+ download_button = gr.Button("Download Conversation (PDF)", elem_classes="xsmall")
342
+ download_button.click(generate_pdf)
343
+ with gr.Row(visible=False):
344
+ with gr.Accordion("Advanced Options:", open=False):
345
+ with gr.Row():
346
+ with gr.Column(scale=2):
347
+ system = gr.Textbox(
348
+ label="System Prompt",
349
+ value=prompt_template,
350
+ show_label=False,
351
+ container=False,
352
+ # ).style(container=False)
353
+ )
354
+ with gr.Column():
355
+ with gr.Row():
356
+ change = gr.Button("Change System Prompt")
357
+ reset = gr.Button("Reset System Prompt")
358
+
359
+ with gr.Accordion("Example Inputs", open=True):
360
+ examples = gr.Examples(
361
+ examples=examples_list,
362
+ inputs=[msg],
363
+ examples_per_page=40,
364
+ )
365
+
366
+ # with gr.Row():
367
+ with gr.Accordion("Disclaimer", open=False):
368
+ _ = Path(model_loc).name
369
+ gr.Markdown(
370
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
371
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
372
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
373
+ "biased, or otherwise offensive outputs.",
374
+ elem_classes=["disclaimer"],
375
+ )
376
+
377
+ msg_submit_event = msg.submit(
378
+ # fn=conversation.user_turn,
379
+ fn=user,
380
+ inputs=[msg, chatbot],
381
+ outputs=[msg, chatbot],
382
+ queue=True,
383
+ show_progress="full",
384
+ # api_name=None,
385
+ ).then(bot, chatbot, chatbot, queue=True)
386
+ submit_click_event = submit.click(
387
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
388
+ fn=user1, # clear msg
389
+ inputs=[msg, chatbot],
390
+ outputs=[msg, chatbot],
391
+ queue=True,
392
+ # queue=False,
393
+ show_progress="full",
394
+ # api_name=None,
395
+ ).then(bot, chatbot, chatbot, queue=True)
396
+ stop.click(
397
+ fn=None,
398
+ inputs=None,
399
+ outputs=None,
400
+ cancels=[msg_submit_event, submit_click_event],
401
+ queue=False,
402
+ )
403
+ clear.click(lambda: None, None, chatbot, queue=False)
404
+
405
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
406
+ input_text = gr.Text()
407
+ api_btn = gr.Button("Go", variant="primary")
408
+ out_text = gr.Text()
409
+
410
+ api_btn.click(
411
+ predict_api,
412
+ input_text,
413
+ out_text,
414
+ api_name="api",
415
+ )
416
+
417
+ # block.load(update_buff, [], buff, every=1)
418
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
419
+
420
+ # concurrency_count=5, max_size=20
421
+ # max_size=36, concurrency_count=14
422
+ # CPU cpu_count=2 16G, model 7G
423
+ # CPU UPGRADE cpu_count=8 32G, model 7G
424
+
425
+ # does not work
426
+ _ = """
427
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
428
+ # concurrency_count = max(_, 1)
429
+ if psutil.cpu_count(logical=False) >= 8:
430
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
431
+ else:
432
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
433
+ # """
434
+
435
+ concurrency_count = 1
436
+ logger.info(f"{concurrency_count=}")
437
+
438
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model
9
+ fpdf
10
+ pathlib