affandes commited on
Commit
d86eb18
1 Parent(s): 85e8e21

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -0
app.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import gradio as gr
3
+
4
+ checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
5
+ device = "cpu" # "cuda" or "cpu"
6
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
7
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
8
+
9
+ def predict(message, history):
10
+ history.append({"role": "user", "content": message})
11
+ input_text = tokenizer.apply_chat_template(history, tokenize=False)
12
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
13
+ outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True)
14
+ decoded = tokenizer.decode(outputs[0])
15
+ response = decoded.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
16
+ return response
17
+
18
+ demo = gr.ChatInterface(predict, type="messages")
19
+
20
+ demo.launch()