Spaces:

itzjunayed
/

test-space-1

Runtime error

App Files Files Community

itzjunayed commited on Jun 9

Commit

92bf885

•

1 Parent(s): a6eaa57

updated

Browse files

Files changed (1) hide show

app.py +35 -56

app.py CHANGED Viewed

@@ -1,57 +1,36 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
-from transformers import pipeline
 import torch
-model_id = "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.2"
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    trust_remote_code=True,
-    # attn_implementation="flash_attention_2"
-)
-tokenizer = AutoTokenizer.from_pretrained(
-    model_id,
-    trust_remote_code=True
-)
-streamer = TextStreamer(tokenizer)
-pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    model_kwargs={"torch_dtype": torch.bfloat16},
-    streamer=streamer
-)
-# Then you can use the pipeline to generate text.
-messages = [
-    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
-    {"role": "user", "content": "Who are you?"},
-]
-prompt = tokenizer.apply_chat_template(
-    messages,
-    tokenize=False,
-    add_generation_prompt=True
-)
-terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("<|im_end|>"),
-    tokenizer.convert_tokens_to_ids("<|eot_id|>") # safer to have this too
-]
-outputs = pipeline(
-    prompt,
-    max_new_tokens=2048,
-    eos_token_id=terminators,
-    do_sample=True,
-    temperature=0.6,
-    top_p=0.95,
-)
-print(outputs[0]["generated_text"][len(prompt):])

+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
+model_name = "ruslanmv/Medical-Llama3-8B"
+device_map = 'auto'
+bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.float16,)
+model = AutoModelForCausalLM.from_pretrained( model_name,quantization_config=bnb_config, trust_remote_code=True,use_cache=False,device_map=device_map)
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+def askme(question):
+    sys_message = '''
+    You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
+    provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
+    '''
+    # Create messages structured for the chat template
+    messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": question}]
+    # Applying chat template
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
+    # Extract and return the generated text, removing the prompt
+    response_text = tokenizer.batch_decode(outputs)[0].strip()
+    answer = response_text.split('<|im_start|>assistant')[-1].strip()
+    return answer
+# Example usage
+# - Context: First describe your problem.
+# - Question: Then make the question.
+question = '''I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
+increased sensitivity to cold, and dry, itchy skin.
+Could these symptoms be related to hypothyroidism?
+If so, what steps should I take to get a proper diagnosis and discuss treatment options?'''
+print(askme(question))