Llama-3.2-1b-CPU

Running

App Files Files Community

KingNish commited on Sep 26

Commit

ebdfef4

•

1 Parent(s): b47259c

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -38

app.py CHANGED Viewed

@@ -9,10 +9,8 @@ from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
-# Global variables to store the model and agent
 llm = None
 llm_model = None
-agent = None
 # Download the new model
 hf_hub_download(
@@ -24,31 +22,6 @@ hf_hub_download(
 def get_messages_formatter_type(model_name):
     return MessagesFormatterType.LLAMA_3
-def load_model(model_path):
-    global llm
-    global llm_model
-    if llm is None or llm_model != model_path:
-        llm = Llama(
-            model_path=model_path,
-            n_gpu_layers=0,  # Adjust based on your GPU
-            n_batch=32398,     # Adjust based on your RAM
-            n_ctx=512,      # Adjust based on your RAM and desired context length
-        )
-        llm_model = model_path
-    return llm
-def load_agent(llm, system_message, chat_template):
-    global agent
-    if agent is None:
-        provider = LlamaCppPythonProvider(llm)
-        agent = LlamaCppAgent(
-            provider,
-            system_prompt=system_message,
-            predefined_messages_formatter_type=chat_template,
-            debug_output=True
-        )
-    return agent
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -61,13 +34,29 @@ def respond(
     repeat_penalty,
 ):
     global llm
-    global agent
     chat_template = get_messages_formatter_type(model)
-    llm = load_model(f"models/{model}")
-    agent = load_agent(llm, system_message, chat_template)
-    settings = agent.provider.get_provider_default_settings()
     settings.temperature = temperature
     settings.top_k = top_k
     settings.top_p = top_p
@@ -129,9 +118,19 @@ demo = gr.ChatInterface(
             value="llama-3.2-1b-instruct-q4_k_m.gguf",
             label="Model"
         ),
-        gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta.""", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=2.0,
@@ -182,11 +181,11 @@ demo = gr.ChatInterface(
         ["Explain the difference between machine learning and deep learning."],
         ["Summarize the key points of climate change and its global impact."],
         ["Explain quantum computing to a 10-year-old."],
-        ["Design a step-by-Step Meal Plan for Weight Loss and Muscle Gain."],
     ],
     cache_examples=False,
-    autofocus = False,
-    concurrency_limit = None
 )
 if __name__ == "__main__":

 import gradio as gr
 from huggingface_hub import hf_hub_download
 llm = None
 llm_model = None
 # Download the new model
 hf_hub_download(
 def get_messages_formatter_type(model_name):
     return MessagesFormatterType.LLAMA_3
 def respond(
     message,
     history: list[tuple[str, str]],
     repeat_penalty,
 ):
     global llm
+    global llm_model
     chat_template = get_messages_formatter_type(model)
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            n_gpu_layers=0,  # Adjust based on your GPU
+            n_batch=32398,     # Adjust based on your RAM
+            n_ctx=512,      # Adjust based on your RAM and desired context length
+        )
+        llm_model = model
+    provider = LlamaCppPythonProvider(llm)
+    agent = LlamaCppAgent(
+        provider,
+        system_prompt=f"{system_message}",
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True
+    )
+    settings = provider.get_provider_default_settings()
     settings.temperature = temperature
     settings.top_k = top_k
     settings.top_p = top_p
             value="llama-3.2-1b-instruct-q4_k_m.gguf",
             label="Model"
         ),
+        gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
+1. Complex reasoning and problem-solving
+2. Multilingual understanding and generation
+3. Creative and analytical writing
+4. Code understanding and generation
+5. Task decomposition and step-by-step guidance
+6. Summarization and information extraction
+Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
+""", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=2.0,
         ["Explain the difference between machine learning and deep learning."],
         ["Summarize the key points of climate change and its global impact."],
         ["Explain quantum computing to a 10-year-old."],
+        ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
     ],
     cache_examples=False,
+    autofocus=False,
+    concurrency_limit=None
 )
 if __name__ == "__main__":