vilarin commited on
Commit
c4592e6
1 Parent(s): c73bd69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -34
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import os
2
- import threading as Thread
3
  import time
4
  import spaces
5
  import torch
6
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
  import gradio as gr
8
 
9
-
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL_ID = os.environ.get("MODEL_ID", None)
12
  MODEL_NAME = MODEL_ID.split("/")[-1]
@@ -44,40 +43,21 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
44
  model = model.eval()
45
 
46
  @spaces.GPU()
47
- def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
48
- conversation = []
49
- for prompt, answer in history:
50
- conversation.extend([
51
- {"role": "user", "content": prompt},
52
- {"role": "assistant", "content": answer},
53
- ])
54
- conversation.append({"role": "user", "content": message})
55
-
56
- print(f"Conversation is -\n{conversation}")
57
-
58
- input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
59
 
60
- streamer = TextIteratorStreamer(tokenizer, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces':False,})
61
-
62
- generate_kwargs = dict(
63
- input_ids=input_ids,
64
- streamer=streamer,
65
- max_new_tokens=max_new_tokens,
66
- top_p=top_p,
67
- top_k=top_k,
68
- repetition_penalty=penalty,
69
- do_sample=True,
70
- temperature=temperature,
71
- eos_token_id = [2,92542],
72
  )
73
-
74
- thread = Thread(target=model.generate, kwargs=generate_kwargs)
75
- thread.start()
76
 
77
- buffer = ""
78
- for new_text in streamer:
79
- buffer += new_text
80
- yield buffer
81
 
82
 
83
  chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 
1
  import os
 
2
  import time
3
  import spaces
4
  import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import gradio as gr
7
 
8
+ MODEL_LIST = ["internlm/internlm2_5-7b-chat", "internlm/internlm2_5-7b-chat-1m"]
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  MODEL_ID = os.environ.get("MODEL_ID", None)
11
  MODEL_NAME = MODEL_ID.split("/")[-1]
 
43
  model = model.eval()
44
 
45
  @spaces.GPU()
46
+ def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
47
+ print(history)
 
 
 
 
 
 
 
 
 
 
48
 
49
+ resp, hist = model.stream_chat(
50
+ tokenizer,
51
+ query = message,
52
+ history = history,
53
+ max_new_tokens = max_new_tokens,
54
+ do_sample = True if temperature == 0 else False
55
+ top_p = top_p,
56
+ top_k = top_k,
57
+ temperature = temperature,
 
 
 
58
  )
 
 
 
59
 
60
+ yield resp
 
 
 
61
 
62
 
63
  chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)