Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ if not torch.cuda.is_available():
|
|
25 |
|
26 |
|
27 |
if torch.cuda.is_available():
|
28 |
-
model_id = "
|
29 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
|
30 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
31 |
tokenizer.padding_side = 'right'
|
@@ -61,6 +61,11 @@ def generate(
|
|
61 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
62 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
63 |
input_ids = input_ids.to(model.device)
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
66 |
generate_kwargs = dict(
|
@@ -71,8 +76,9 @@ def generate(
|
|
71 |
top_p=top_p,
|
72 |
#top_k=top_k,
|
73 |
temperature=temperature,
|
74 |
-
eos_token_id=
|
75 |
-
|
|
|
76 |
#num_beams=1,
|
77 |
#repetition_penalty=1.2,
|
78 |
)
|
|
|
25 |
|
26 |
|
27 |
if torch.cuda.is_available():
|
28 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
29 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
|
30 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
31 |
tokenizer.padding_side = 'right'
|
|
|
61 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
62 |
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
63 |
input_ids = input_ids.to(model.device)
|
64 |
+
|
65 |
+
terminators = [
|
66 |
+
tokenizer.eos_token_id,
|
67 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
68 |
+
]
|
69 |
|
70 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
71 |
generate_kwargs = dict(
|
|
|
76 |
top_p=top_p,
|
77 |
#top_k=top_k,
|
78 |
temperature=temperature,
|
79 |
+
eos_token_id=terminators,
|
80 |
+
#eos_token_id=tokenizer.eos_token_id,
|
81 |
+
#pad_token_id=tokenizer.pad_token_id,
|
82 |
#num_beams=1,
|
83 |
#repetition_penalty=1.2,
|
84 |
)
|