jeduardogruiz commited on
Commit
e39a2da
1 Parent(s): 33a234a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -155
app.py CHANGED
@@ -1,159 +1,8 @@
1
- import spaces
2
- import json
3
- import subprocess
4
- from llama_cpp import Llama
5
- from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
6
- from llama_cpp_agent.providers import LlamaCppPythonProvider
7
- from llama_cpp_agent.chat_history import BasicChatHistory
8
- from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
- from huggingface_hub import hf_hub_download
11
 
12
- hf_hub_download(
13
- repo_id="bartowski/gemma-2-9b-it-GGUF",
14
- filename="gemma-2-9b-it-Q5_K_M.gguf",
15
- local_dir="./models"
16
- )
17
 
18
-
19
-
20
- hf_hub_download(
21
- repo_id="bartowski/gemma-2-27b-it-GGUF",
22
- filename="gemma-2-27b-it-Q5_K_M.gguf",
23
- local_dir="./models"
24
- )
25
-
26
-
27
- llm = None
28
- llm_model = None
29
-
30
- @spaces.GPU(duration=120)
31
- def respond(
32
- message,
33
- history: list[tuple[str, str]],
34
- model,
35
- system_message,
36
- max_tokens,
37
- temperature,
38
- top_p,
39
- top_k,
40
- repeat_penalty,
41
- ):
42
- chat_template = MessagesFormatterType.GEMMA_2
43
-
44
- global llm
45
- global llm_model
46
-
47
- if llm is None or llm_model != model:
48
- llm = Llama(
49
- model_path=f"models/{model}",
50
- flash_attn=True,
51
- n_gpu_layers=81,
52
- n_batch=1024,
53
- n_ctx=8192,
54
- )
55
- llm_model = model
56
-
57
- provider = LlamaCppPythonProvider(llm)
58
-
59
- agent = LlamaCppAgent(
60
- provider,
61
- system_prompt=f"{system_message}",
62
- predefined_messages_formatter_type=chat_template,
63
- debug_output=True
64
- )
65
-
66
- settings = provider.get_provider_default_settings()
67
- settings.temperature = temperature
68
- settings.top_k = top_k
69
- settings.top_p = top_p
70
- settings.max_tokens = max_tokens
71
- settings.repeat_penalty = repeat_penalty
72
- settings.stream = True
73
-
74
- messages = BasicChatHistory()
75
-
76
- for msn in history:
77
- user = {
78
- 'role': Roles.user,
79
- 'content': msn[0]
80
- }
81
- assistant = {
82
- 'role': Roles.assistant,
83
- 'content': msn[1]
84
- }
85
- messages.add_message(user)
86
- messages.add_message(assistant)
87
 
88
- stream = agent.get_chat_response(
89
- message,
90
- llm_sampling_settings=settings,
91
- chat_history=messages,
92
- returns_streaming_generator=True,
93
- print_output=False
94
- )
95
-
96
- outputs = ""
97
- for output in stream:
98
- outputs += output
99
- yield outputs
100
-
101
- description = """<p align="center">Defaults to 27B it (you can switch to 9b it from additional inputs)</p>
102
- <p><center>
103
- <a href="https://huggingface.co/google/gemma-2-27b-it" target="_blank">[27B it Model]</a>
104
- <a href="https://huggingface.co/google/gemma-2-9b-it" target="_blank">[9B it Model]</a>
105
- <a href="https://huggingface.co/bartowski/gemma-2-27b-it-GGUF" target="_blank">[27B it Model GGUF]</a>
106
- <a href="https://huggingface.co/bartowski/gemma-2-9b-it-GGUF" target="_blank">[9B it Model GGUF]</a>
107
- </center></p>
108
- """
109
-
110
- demo = gr.ChatInterface(
111
- respond,
112
- additional_inputs=[
113
- gr.Dropdown([
114
- 'gemma-2-9b-it-Q5_K_M.gguf',
115
- 'gemma-2-27b-it-Q5_K_M.gguf'
116
- ],
117
- value="gemma-2-27b-it-Q5_K_M.gguf",
118
- label="Model"
119
- ),
120
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
121
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
122
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
123
- gr.Slider(
124
- minimum=0.1,
125
- maximum=1.0,
126
- value=0.95,
127
- step=0.05,
128
- label="Top-p",
129
- ),
130
- gr.Slider(
131
- minimum=0,
132
- maximum=100,
133
- value=40,
134
- step=1,
135
- label="Top-k",
136
- ),
137
- gr.Slider(
138
- minimum=0.0,
139
- maximum=2.0,
140
- value=1.1,
141
- step=0.1,
142
- label="Repetition penalty",
143
- ),
144
- ],
145
- retry_btn="Retry",
146
- undo_btn="Undo",
147
- clear_btn="Clear",
148
- submit_btn="Send",
149
- title="Chat with Gemma 2 using llama.cpp",
150
- description=description,
151
- chatbot=gr.Chatbot(
152
- scale=1,
153
- likeable=False,
154
- show_copy_button=True
155
- )
156
- )
157
-
158
- if __name__ == "__main__":
159
- demo.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
 
3
+ def greet(name):
4
+ return "Hello " + name + "!"
 
 
 
5
 
6
+ demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ demo.launch(share=True) # Share your demo with just 1 extra parameter 🚀