Spaces:
Runtime error
Runtime error
from transformers import pipeline,GemmaForCausalLM,AutoTokenizer,BitsAndBytesConfig | |
import gradio as gr | |
import spaces | |
import torch | |
# ignore_mismatched_sizes=True | |
quantization_config = BitsAndBytesConfig(load_in_4bit=True) | |
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-9b') | |
model = GemmaForCausalLM.from_pretrained('google/gemma-2-9b', | |
quantization_config=quantization_config | |
) | |
# pipe = pipeline('text-generation', model=model,tokenizer = tokenizer) | |
MAX_MAX_NEW_TOKENS = 2048 | |
DEFAULT_MAX_NEW_TOKENS = 1024 | |
def generate( | |
message: str, | |
max_new_tokens: int = 1024, | |
temperature: float = 0.6, | |
top_p: float = 0.9, | |
top_k: int = 50, | |
repetition_penalty: float = 1.2, | |
): | |
input_ids = tokenizer(message, return_tensors="pt").to("cuda") | |
outputs = model.generate(**input_ids,top_p=top_p,max_new_tokens=max_new_tokens,top_k=top_k,repetition_penalty=repetition_penalty,temperature=temperature) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True); | |
# return pipe(prompt)[0]['generated_text'] | |
gr.Interface( | |
fn=generate, | |
inputs=[ | |
gr.Text(), | |
gr.Slider( | |
label="Max new tokens", | |
minimum=1, | |
maximum=MAX_MAX_NEW_TOKENS, | |
step=1, | |
value=DEFAULT_MAX_NEW_TOKENS, | |
), | |
gr.Slider( | |
label="Temperature", | |
minimum=0.1, | |
maximum=4.0, | |
step=0.1, | |
value=0.6, | |
), | |
gr.Slider( | |
label="Top-p (nucleus sampling)", | |
minimum=0.05, | |
maximum=1.0, | |
step=0.05, | |
value=0.9, | |
), | |
gr.Slider( | |
label="Top-k", | |
minimum=1, | |
maximum=1000, | |
step=1, | |
value=50, | |
), | |
gr.Slider( | |
label="Repetition penalty", | |
minimum=1.0, | |
maximum=2.0, | |
step=0.05, | |
value=1.2, | |
),], | |
outputs="text", | |
examples=[['Write me a poem about Machine Learning.']], | |
).launch() |