cuda
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ DESCRIPTION = """
|
|
15 |
"""
|
16 |
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
|
18 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="
|
19 |
|
20 |
def format_prompt(user_message, system_message="You are an expert developer in all programming languages. Help me with my code. Answer any questions I have with code examples."):
|
21 |
prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
|
@@ -59,8 +59,8 @@ with gr.Blocks() as demo:
|
|
59 |
submit_button = gr.Button('Submit', variant='primary')
|
60 |
|
61 |
with gr.Accordion(label='Advanced options', open=False):
|
62 |
-
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=
|
63 |
-
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1
|
64 |
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
|
65 |
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
|
66 |
do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)
|
|
|
15 |
"""
|
16 |
|
17 |
tokenizer = AutoTokenizer.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
|
18 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True)
|
19 |
|
20 |
def format_prompt(user_message, system_message="You are an expert developer in all programming languages. Help me with my code. Answer any questions I have with code examples."):
|
21 |
prompt = f"<|im_start|>assistant\n{system_message}<|im_end|>\n<|im_start|>\nuser\n{user_message}<|im_end|>\nassistant\n"
|
|
|
59 |
submit_button = gr.Button('Submit', variant='primary')
|
60 |
|
61 |
with gr.Accordion(label='Advanced options', open=False):
|
62 |
+
max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=512)
|
63 |
+
temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=0.1)
|
64 |
top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
|
65 |
top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=40)
|
66 |
do_sample_checkbox = gr.Checkbox(label='Disable for faster inference', value=True)
|