Spaces:

kietnt0603
/

ChatGLM4CS313

Sleeping

App Files Files Community

kietnt0603 commited on May 30

Commit

22eb5b6

•

1 Parent(s): 3c317fb

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -26

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import os
 import torch
 from datasets import DatasetDict, Dataset
 from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-    logging
 )
 logging.set_verbosity_error()
@@ -30,28 +30,29 @@ bnb_4bit_quant_type = 'nf4'
 use_nested_quant = False
 # device mapping
-device_map = {"": 0}
 compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
 bnb_config = BitsAndBytesConfig(
-    load_in_4bit=use_4bit,
-    bnb_4bit_quant_type=bnb_4bit_quant_type,
-    bnb_4bit_compute_dtype=compute_dtype,
-    bnb_4bit_use_double_quant=use_nested_quant,
 )
 if compute_dtype == torch.float16 and use_4bit:
-    major, _ = torch.cuda.get_device_capability()
-    if major >= 8:
-        print('='*80)
-        print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
-        print('='*80)
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    trust_remote_code=True,
-    quantization_config=bnb_config,
-    device_map=device_map,
 )
 model.config.use_cache = False
 model.config.pretraining_tp = 1
@@ -72,13 +73,13 @@ history = []
 # If the user has submitted input
 if st.button("Send"):
-    # Generate the chatbot's response
-    response, history = model.chat(tokenizer, user_input, history=history)
-    # Add the response to the conversation history
-    conversation_history.append(f"Bot: {response}")
-    # Update the conversation text
-    conversation_text.markdown("**Conversation:**\n")
-    for message in conversation_history:
-        conversation_text.markdown(f"- {message}")

 import torch
 from datasets import DatasetDict, Dataset
 from transformers import (
+  AutoModelForCausalLM,
+  AutoTokenizer,
+  BitsAndBytesConfig,
+  logging
 )
 logging.set_verbosity_error()
 use_nested_quant = False
 # device mapping
+device = torch.device("cpu")  # Set device to CPU
+device_map = {"": -1}  # Use -1 for CPU in bnb_config
 compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
 bnb_config = BitsAndBytesConfig(
+  load_in_4bit=use_4bit,
+  bnb_4bit_quant_type=bnb_4bit_quant_type,
+  bnb_4bit_compute_dtype=compute_dtype,
+  bnb_4bit_use_double_quant=use_nested_quant,
 )
 if compute_dtype == torch.float16 and use_4bit:
+  major, _ = torch.cuda.get_device_capability()
+  if major >= 8:
+    print('='*80)
+    print('Your GPU supports bfloat16, you can accelerate using the argument --fp16')
+    print('='*80)
 model = AutoModelForCausalLM.from_pretrained(
+  model_name,
+  trust_remote_code=True,
+  quantization_config=bnb_config,
+  device_map=device_map,
 )
 model.config.use_cache = False
 model.config.pretraining_tp = 1
 # If the user has submitted input
 if st.button("Send"):
+  # Generate the chatbot's response
+  response, history = model.chat(tokenizer, user_input, history=history)
+  # Add the response to the conversation history
+  conversation_history.append(f"Bot: {response}")
+  # Update the conversation text
+  conversation_text.markdown("**Conversation:**\n")
+  for message in conversation_history:
+    conversation_text.markdown(f"- {message}")