Spaces:
Runtime error
Runtime error
mohamedalcafory
commited on
Commit
•
c19cdb4
1
Parent(s):
7db38a1
Update app.py
Browse files
app.py
CHANGED
@@ -26,14 +26,24 @@ print('Retriever loaded successfully')
|
|
26 |
from langchain_core.output_parsers import StrOutputParser
|
27 |
from langchain_core.runnables import RunnablePassthrough
|
28 |
|
29 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
30 |
-
from transformers import AutoModel, AutoTokenizer
|
31 |
|
32 |
-
base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
|
33 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
model.load_adapter(adapter_path)
|
|
|
37 |
# tokenizer = AutoTokenizer.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
|
38 |
# model = AutoModelForCausalLM.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
|
39 |
print(f'Model loaded successfully: {model}')
|
|
|
26 |
from langchain_core.output_parsers import StrOutputParser
|
27 |
from langchain_core.runnables import RunnablePassthrough
|
28 |
|
29 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
|
30 |
|
31 |
+
base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
|
32 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
33 |
+
# Load in 4-bit with CPU offload using quantization_config
|
34 |
+
# Removed load_in_4bit as it's redundant when using quantization_config
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(
|
36 |
+
base_model_name,
|
37 |
+
device_map="auto",
|
38 |
+
trust_remote_code=True, # Required for some models
|
39 |
+
quantization_config=BitsAndBytesConfig(
|
40 |
+
load_in_4bit=True, # Specify 4-bit quantization within BitsAndBytesConfig
|
41 |
+
load_in_8bit_fp32_cpu_offload=True # Enable CPU offload
|
42 |
+
)
|
43 |
+
)
|
44 |
+
adapter_path = "mohamedalcafory/PubMed_Llama3.1_Based_model"
|
45 |
model.load_adapter(adapter_path)
|
46 |
+
|
47 |
# tokenizer = AutoTokenizer.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
|
48 |
# model = AutoModelForCausalLM.from_pretrained("mohamedalcafory/PubMed_Llama3.1_Based_model")
|
49 |
print(f'Model loaded successfully: {model}')
|