Spaces:
Running
on
Zero
Running
on
Zero
tonic
commited on
Commit
•
574defd
1
Parent(s):
ad35440
add zero gpu support
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
36 |
# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
37 |
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
38 |
# model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config).to("cuda")
|
39 |
-
|
40 |
def generate_text(prompt, temperature, max_length):
|
41 |
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
42 |
outputs = model.generate(inputs, max_length=max_length, top_p=0.9, temperature=temperature)
|
|
|
36 |
# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
37 |
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
38 |
# model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config).to("cuda")
|
39 |
+
@spaces.GPU
|
40 |
def generate_text(prompt, temperature, max_length):
|
41 |
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
42 |
outputs = model.generate(inputs, max_length=max_length, top_p=0.9, temperature=temperature)
|