ruslanmv commited on
Commit
161002f
1 Parent(s): abc8993

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -1,9 +1,16 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
-
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  import torch
 
 
 
 
6
 
 
 
 
 
7
  # Define BitsAndBytesConfig
8
  bnb_config = BitsAndBytesConfig(load_in_4bit=True,
9
  bnb_4bit_quant_type="nf4",
@@ -19,7 +26,7 @@ model = AutoModelForCausalLM.from_pretrained(model_name, config=bnb_config)
19
  # Ensure model is on the correct device
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
  model.to(device)
22
-
23
  # Define the respond function
24
  def respond(
25
  message,
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  import torch
5
+ import spaces
6
+ import os
7
+ IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
8
+ IS_SPACE = os.environ.get("SPACE_ID", None) is not None
9
 
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
12
+ print(f"Using device: {device}")
13
+ print(f"low memory: {LOW_MEMORY}")
14
  # Define BitsAndBytesConfig
15
  bnb_config = BitsAndBytesConfig(load_in_4bit=True,
16
  bnb_4bit_quant_type="nf4",
 
26
  # Ensure model is on the correct device
27
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
  model.to(device)
29
+ @spaces.GPU
30
  # Define the respond function
31
  def respond(
32
  message,