mohamedalcafory commited on
Commit
c19cdb4
1 Parent(s): 7db38a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -26,14 +26,24 @@ print('Retriever loaded successfully')
26
  from langchain_core.output_parsers import StrOutputParser
27
  from langchain_core.runnables import RunnablePassthrough
28
 
29
- from transformers import AutoTokenizer, AutoModelForCausalLM
30
- from transformers import AutoModel, AutoTokenizer
31
 
32
- base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit" # Replace with your actual base model path
33
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
34
- model = AutoModel.from_pretrained(base_model_name)
35
- adapter_path = "mohamedalcafory/PubMed_Llama3.1_Based_model" # Replace with your actual adapter path
 
 
 
 
 
 
 
 
 
 
36
  model.load_adapter(adapter_path)
 
37
  # tokenizer = AutoTokenizer.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
38
  # model = AutoModelForCausalLM.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
39
  print(f'Model loaded successfully: {model}')
 
26
  from langchain_core.output_parsers import StrOutputParser
27
  from langchain_core.runnables import RunnablePassthrough
28
 
29
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
30
 
31
+ base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
32
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
33
+ # Load in 4-bit with CPU offload using quantization_config
34
+ # Removed load_in_4bit as it's redundant when using quantization_config
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ base_model_name,
37
+ device_map="auto",
38
+ trust_remote_code=True, # Required for some models
39
+ quantization_config=BitsAndBytesConfig(
40
+ load_in_4bit=True, # Specify 4-bit quantization within BitsAndBytesConfig
41
+ load_in_8bit_fp32_cpu_offload=True # Enable CPU offload
42
+ )
43
+ )
44
+ adapter_path = "mohamedalcafory/PubMed_Llama3.1_Based_model"
45
  model.load_adapter(adapter_path)
46
+
47
  # tokenizer = AutoTokenizer.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
48
  # model = AutoModelForCausalLM.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
49
  print(f'Model loaded successfully: {model}')