Medical-Llama3-v2

Running on Zero

ruslanmv commited on Jun 30

Commit

161002f

•

1 Parent(s): abc8993

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,16 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 # Define BitsAndBytesConfig
 bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                 bnb_4bit_quant_type="nf4",
@@ -19,7 +26,7 @@ model = AutoModelForCausalLM.from_pretrained(model_name, config=bnb_config)
 # Ensure model is on the correct device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Define the respond function
 def respond(
     message,

 import gradio as gr
 from huggingface_hub import InferenceClient
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
+import spaces
+import os
+IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
+IS_SPACE = os.environ.get("SPACE_ID", None) is not None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
+print(f"Using device: {device}")
+print(f"low memory: {LOW_MEMORY}")
 # Define BitsAndBytesConfig
 bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                 bnb_4bit_quant_type="nf4",
 # Ensure model is on the correct device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+@spaces.GPU
 # Define the respond function
 def respond(
     message,