reach-vb HF staff commited on
Commit
625f637
1 Parent(s): 2479537
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -5,11 +5,12 @@ from typing import Iterator
5
  import gradio as gr
6
  import spaces
7
  import torch
8
- from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
9
 
10
  DESCRIPTION = """\
11
- # Gemma 2 2B IT
12
 
 
13
  Gemma 2 is Google's latest iteration of open LLMs.
14
  This is a demo of [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it), fine-tuned for instruction following.
15
  For more details, please check [our post](https://huggingface.co/blog/gemma2).
@@ -23,14 +24,13 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
23
 
24
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
25
 
26
- model_id = "google/gemma-2-2b-it"
27
- tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  device_map="auto",
31
  torch_dtype=torch.bfloat16,
32
  )
33
- model.config.sliding_window = 4096
34
  model.eval()
35
 
36
 
 
5
  import gradio as gr
6
  import spaces
7
  import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
  DESCRIPTION = """\
11
+ # L3.2 3B Instruct
12
 
13
+ Need to Update the below text
14
  Gemma 2 is Google's latest iteration of open LLMs.
15
  This is a demo of [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it), fine-tuned for instruction following.
16
  For more details, please check [our post](https://huggingface.co/blog/gemma2).
 
24
 
25
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
 
27
+ model_id = "nltpt/Llama-3.2-3B-Instruct"
28
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
  device_map="auto",
32
  torch_dtype=torch.bfloat16,
33
  )
 
34
  model.eval()
35
 
36