Spaces:
Tonic
/
Running on Zero

Tonic commited on
Commit
2f65e28
1 Parent(s): cd08250

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -21,9 +21,21 @@ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
21
  model = AutoModelForCausalLM.from_pretrained( model_path, quantization_config=quantization_config)
22
 
23
  @spaces.GPU
24
- def generate_text(prompt, temperature, max_length):
25
- inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
26
- outputs = model.generate(inputs, max_length=max_length, top_p=0.9, temperature=temperature, do_sample=True)
 
 
 
 
 
 
 
 
 
 
 
 
27
  return tokenizer.decode(outputs[0])
28
 
29
  def gradio_app():
 
21
  model = AutoModelForCausalLM.from_pretrained( model_path, quantization_config=quantization_config)
22
 
23
  @spaces.GPU
24
+ def generate_text(prompt, temperature=0.9, max_length=1200):
25
+ # Encode the inputs
26
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
27
+ attention_mask = torch.ones(inputs.shape, dtype=torch.long)
28
+ inputs = inputs.to("cuda")
29
+ attention_mask = attention_mask.to("cuda")
30
+ outputs = model.generate(
31
+ inputs,
32
+ attention_mask=attention_mask,
33
+ max_length=max_length,
34
+ top_p=0.9,
35
+ temperature=temperature,
36
+ do_sample=True,
37
+ pad_token_id=tokenizer.eos_token_id
38
+ )
39
  return tokenizer.decode(outputs[0])
40
 
41
  def gradio_app():