Sean-Case
commited on
Commit
•
9aef340
1
Parent(s):
febdc08
Setting gpu_layer slider to false to prevent accidental misuse
Browse files- .gitignore +1 -0
- app.py +4 -4
.gitignore
CHANGED
@@ -3,5 +3,6 @@
|
|
3 |
*.pdf
|
4 |
*.spec
|
5 |
*.toc
|
|
|
6 |
build/*
|
7 |
dist/*
|
|
|
3 |
*.pdf
|
4 |
*.spec
|
5 |
*.toc
|
6 |
+
bootstrapper.py
|
7 |
build/*
|
8 |
dist/*
|
app.py
CHANGED
@@ -83,7 +83,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
83 |
if model_type == "Orca Mini":
|
84 |
|
85 |
gpu_config.update_gpu(gpu_layers)
|
86 |
-
cpu_config.update_gpu(
|
87 |
|
88 |
print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
|
89 |
|
@@ -91,9 +91,9 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
91 |
print(vars(cpu_config))
|
92 |
|
93 |
try:
|
94 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(
|
95 |
except:
|
96 |
-
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(
|
97 |
|
98 |
tokenizer = []
|
99 |
|
@@ -220,7 +220,7 @@ with block:
|
|
220 |
|
221 |
with gr.Tab("Advanced features"):
|
222 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
223 |
-
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0)
|
224 |
|
225 |
gr.HTML(
|
226 |
"<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
|
|
|
83 |
if model_type == "Orca Mini":
|
84 |
|
85 |
gpu_config.update_gpu(gpu_layers)
|
86 |
+
cpu_config.update_gpu(0)
|
87 |
|
88 |
print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
|
89 |
|
|
|
91 |
print(vars(cpu_config))
|
92 |
|
93 |
try:
|
94 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
95 |
except:
|
96 |
+
model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
97 |
|
98 |
tokenizer = []
|
99 |
|
|
|
220 |
|
221 |
with gr.Tab("Advanced features"):
|
222 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
|
223 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0, visible=False)
|
224 |
|
225 |
gr.HTML(
|
226 |
"<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
|