Spaces:

LLM-DEMO
/

llama-2-medical-consultation

Paused

Ashishkr commited on Aug 30, 2023

Commit

3697a24

•

1 Parent(s): b608f8b

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -1,25 +1,45 @@
 from threading import Thread
 from typing import Iterator
 import torch
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import os
 token = os.environ.get("HF_API_TOKEN")
-model_id = 'Ashishkr/llama2_medical_consultation'
-from peft import PeftModel, PeftConfig
-from transformers import AutoModelForCausalLM
-from transformers import AutoTokenizer
-import torch
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-config = PeftConfig.from_pretrained("Ashishkr/llama2_medical_consultation")
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", use_auth_token = token)
-model = PeftModel.from_pretrained(model, "Ashishkr/llama2_medical_consultation").to(device)
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", use_auth_token = token)
 def get_prompt(message: str, chat_history: list[tuple[str, str]],

 from threading import Thread
 from typing import Iterator
 import torch
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import os
+import transformers
+from torch import cuda, bfloat16
+from peft import PeftModel, PeftConfig
 token = os.environ.get("HF_API_TOKEN")
+base_model_id = 'meta-llama/Llama-2-7b-chat-hf'
+device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
+bnb_config = transformers.BitsAndBytesConfig(
+    llm_int8_enable_fp32_cpu_offload = True
+)
+model_config = transformers.AutoConfig.from_pretrained(
+    base_model_id,
+    use_auth_token=token
+)
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    base_model_id,
+    trust_remote_code=True,
+    config=model_config,
+    quantization_config=bnb_config,
+    device_map='auto',
+    use_auth_token=hf_auth
+)
+config = PeftConfig.from_pretrained("Ashishkr/llama-2-medical-consultation")
+model = PeftModel.from_pretrained(model, "Ashishkr/llama-2-medical-consultation").to(device)
+model.eval()
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    base_model_id,
+    use_auth_token=hf_auth
+)
 def get_prompt(message: str, chat_history: list[tuple[str, str]],