Spaces:

5to9
/

bot-royale

Sleeping

App Files Files Community

5to9 commited on Sep 26

Commit

75b1a69

•

1 Parent(s): 2569b24

0.11 simplifying wo pharia

Browse files

Files changed (1) hide show

app.py +14 -25

app.py CHANGED Viewed

@@ -8,13 +8,15 @@ import os
 from threading import Thread
 logging.basicConfig(level=logging.DEBUG)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 login(token=HF_TOKEN)
 models_available = [
-    "Aleph-Alpha/Pharia-1-LLM-7B-control-hf",
     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
@@ -58,7 +60,6 @@ def load_model_a(model_id):
         device_map="auto",
         trust_remote_code=True,
     ).eval()
-    model_a.tie_weights()
     return gr.update(label=model_id)
 def load_model_b(model_id):
@@ -97,29 +98,17 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
     new_messages_a = system_prompt_list + chat_history_a + input_text_list
     new_messages_b = system_prompt_list + chat_history_b + input_text_list
-    if "Pharia" in model_id_a:
-        logging.debug("***** Model a is Pharia based, applying own template")
-        formatted_message_a = apply_chat_template(new_messages_a, add_generation_prompt=True)
-        logging.debug(f"***** formatted message is {formatted_message_a}")
-        input_ids_a = tokenizer_b(formatted_message_a, return_tensors="pt").input_ids.to(model_a.device)
-    else:
-        input_ids_a = tokenizer_a.apply_chat_template(
-            new_messages_a,
-            add_generation_prompt=True,
-            return_tensors="pt"
-        ).to(model_a.device)
-    if "Pharia" in model_id_b:
-        logging.debug("model b is Pharia based, applying own template")
-        formatted_message_b = apply_chat_template(new_messages_a, add_generation_prompt=True)
-        logging.debug(f"***** formatted message is {formatted_message_b}")
-        input_ids_b = tokenizer_b(formatted_message_b, return_tensors="pt").input_ids.to(model_b.device)
-    else:
-        input_ids_b = tokenizer_b.apply_chat_template(
-            new_messages_b,
-            add_generation_prompt=True,
-            return_tensors="pt"
-        ).to(model_b.device)
     generation_kwargs_a = dict(
         input_ids=input_ids_a,

 from threading import Thread
+# Status: Breaks during generation
 logging.basicConfig(level=logging.DEBUG)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 login(token=HF_TOKEN)
 models_available = [
+    "NousResearch/Meta-Llama-3.1-8B-Instruct",
     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
         device_map="auto",
         trust_remote_code=True,
     ).eval()
     return gr.update(label=model_id)
 def load_model_b(model_id):
     new_messages_a = system_prompt_list + chat_history_a + input_text_list
     new_messages_b = system_prompt_list + chat_history_b + input_text_list
+    input_ids_a = tokenizer_a.apply_chat_template(
+        new_messages_a,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model_a.device)
+    input_ids_b = tokenizer_b.apply_chat_template(
+        new_messages_b,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model_b.device)
     generation_kwargs_a = dict(
         input_ids=input_ids_a,