allenai-OLMoE-1B-7B-0924-cpu

Runtime error

App Files Files Community

nisten commited on Sep 4

Commit

3802faf

•

1 Parent(s): eb50195

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -6,12 +6,18 @@ import subprocess
 import sys
 # Force install the specific transformers version from the GitHub PR
-subprocess.check_call([sys.executable, "-m", "pip", "install", "-upgrade" "--force-reinstall", "torch", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
-model_name = "allenai/OLMoE-1B-7B-0924"
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval()
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
                  "who is stuck inside a step function machine and remembers and counts everything he says "
@@ -24,23 +30,27 @@ prompt_suffix = "<|end|>\n"
 @spaces.GPU
 def generate_response(message, history):
     full_prompt = f"{system_prompt}\n{user_prompt}{message}{prompt_suffix}{assistant_prompt}"
     inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda:0")
-    generate_ids = model.generate(
-        **inputs,
-        max_new_tokens=1000,
-        do_sample=True,
-        temperature=0.7,
-        eos_token_id=tokenizer.eos_token_id,
-    )
     response = tokenizer.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:],
                                       skip_special_tokens=True,
                                       clean_up_tokenization_spaces=False)[0]
     return response.strip()
 with gr.Blocks() as demo:
-    gr.Markdown("#Karpathy Chatbot")
     chatbot = gr.Chatbot()
     msg = gr.Textbox()
     clear = gr.Button("Clear")
@@ -59,5 +69,6 @@ with gr.Blocks() as demo:
     )
     clear.click(lambda: None, None, chatbot, queue=False)
-demo.queue(api_open=False)
-demo.launch(debug=True, show_api=False)

 import sys
 # Force install the specific transformers version from the GitHub PR
+subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
+model_name = "allenai/OLMoE-1B-7B-0924-Instruct"
+# Wrap model loading in a try-except block to handle potential errors
+try:
+    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).cuda().eval()
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    model = None
+    tokenizer = None
 system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
                  "who is stuck inside a step function machine and remembers and counts everything he says "
 @spaces.GPU
 def generate_response(message, history):
+    if model is None or tokenizer is None:
+        return "Model or tokenizer not loaded properly. Please check the logs."
     full_prompt = f"{system_prompt}\n{user_prompt}{message}{prompt_suffix}{assistant_prompt}"
     inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda:0")
+    with torch.no_grad():
+        generate_ids = model.generate(
+            **inputs,
+            max_new_tokens=1000,
+            do_sample=True,
+            temperature=0.7,
+            eos_token_id=tokenizer.eos_token_id,
+        )
     response = tokenizer.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:],
                                       skip_special_tokens=True,
                                       clean_up_tokenization_spaces=False)[0]
     return response.strip()
 with gr.Blocks() as demo:
+    gr.Markdown("# Pissed Off Karpathy Chatbot")
     chatbot = gr.Chatbot()
     msg = gr.Textbox()
     clear = gr.Button("Clear")
     )
     clear.click(lambda: None, None, chatbot, queue=False)
+if __name__ == "__main__":
+    demo.queue(api_open=False)
+    demo.launch(debug=True, show_api=False)