nisten commited on
Commit
3802faf
1 Parent(s): eb50195

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -6,12 +6,18 @@ import subprocess
6
  import sys
7
 
8
  # Force install the specific transformers version from the GitHub PR
9
- subprocess.check_call([sys.executable, "-m", "pip", "install", "-upgrade" "--force-reinstall", "torch", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
10
 
11
- model_name = "allenai/OLMoE-1B-7B-0924"
12
 
13
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval()
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
 
 
15
 
16
  system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
17
  "who is stuck inside a step function machine and remembers and counts everything he says "
@@ -24,23 +30,27 @@ prompt_suffix = "<|end|>\n"
24
 
25
  @spaces.GPU
26
  def generate_response(message, history):
 
 
 
27
  full_prompt = f"{system_prompt}\n{user_prompt}{message}{prompt_suffix}{assistant_prompt}"
28
 
29
  inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda:0")
30
- generate_ids = model.generate(
31
- **inputs,
32
- max_new_tokens=1000,
33
- do_sample=True,
34
- temperature=0.7,
35
- eos_token_id=tokenizer.eos_token_id,
36
- )
 
37
  response = tokenizer.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:],
38
  skip_special_tokens=True,
39
  clean_up_tokenization_spaces=False)[0]
40
  return response.strip()
41
 
42
  with gr.Blocks() as demo:
43
- gr.Markdown("#Karpathy Chatbot")
44
  chatbot = gr.Chatbot()
45
  msg = gr.Textbox()
46
  clear = gr.Button("Clear")
@@ -59,5 +69,6 @@ with gr.Blocks() as demo:
59
  )
60
  clear.click(lambda: None, None, chatbot, queue=False)
61
 
62
- demo.queue(api_open=False)
63
- demo.launch(debug=True, show_api=False)
 
 
6
  import sys
7
 
8
  # Force install the specific transformers version from the GitHub PR
9
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
10
 
11
+ model_name = "allenai/OLMoE-1B-7B-0924-Instruct"
12
 
13
+ # Wrap model loading in a try-except block to handle potential errors
14
+ try:
15
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).cuda().eval()
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
+ except Exception as e:
18
+ print(f"Error loading model: {e}")
19
+ model = None
20
+ tokenizer = None
21
 
22
  system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
23
  "who is stuck inside a step function machine and remembers and counts everything he says "
 
30
 
31
  @spaces.GPU
32
  def generate_response(message, history):
33
+ if model is None or tokenizer is None:
34
+ return "Model or tokenizer not loaded properly. Please check the logs."
35
+
36
  full_prompt = f"{system_prompt}\n{user_prompt}{message}{prompt_suffix}{assistant_prompt}"
37
 
38
  inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda:0")
39
+ with torch.no_grad():
40
+ generate_ids = model.generate(
41
+ **inputs,
42
+ max_new_tokens=1000,
43
+ do_sample=True,
44
+ temperature=0.7,
45
+ eos_token_id=tokenizer.eos_token_id,
46
+ )
47
  response = tokenizer.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:],
48
  skip_special_tokens=True,
49
  clean_up_tokenization_spaces=False)[0]
50
  return response.strip()
51
 
52
  with gr.Blocks() as demo:
53
+ gr.Markdown("# Pissed Off Karpathy Chatbot")
54
  chatbot = gr.Chatbot()
55
  msg = gr.Textbox()
56
  clear = gr.Button("Clear")
 
69
  )
70
  clear.click(lambda: None, None, chatbot, queue=False)
71
 
72
+ if __name__ == "__main__":
73
+ demo.queue(api_open=False)
74
+ demo.launch(debug=True, show_api=False)