itzjunayed commited on
Commit
92bf885
1 Parent(s): a6eaa57
Files changed (1) hide show
  1. app.py +35 -56
app.py CHANGED
@@ -1,57 +1,36 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
2
- from transformers import pipeline
3
  import torch
4
-
5
- model_id = "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.2"
6
-
7
- model = AutoModelForCausalLM.from_pretrained(
8
- model_id,
9
- torch_dtype=torch.bfloat16,
10
- device_map="auto",
11
- trust_remote_code=True,
12
- # attn_implementation="flash_attention_2"
13
- )
14
-
15
- tokenizer = AutoTokenizer.from_pretrained(
16
- model_id,
17
- trust_remote_code=True
18
- )
19
-
20
- streamer = TextStreamer(tokenizer)
21
-
22
- pipeline = pipeline(
23
- "text-generation",
24
- model=model,
25
- tokenizer=tokenizer,
26
- model_kwargs={"torch_dtype": torch.bfloat16},
27
- streamer=streamer
28
- )
29
-
30
- # Then you can use the pipeline to generate text.
31
-
32
- messages = [
33
- {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
34
- {"role": "user", "content": "Who are you?"},
35
- ]
36
-
37
- prompt = tokenizer.apply_chat_template(
38
- messages,
39
- tokenize=False,
40
- add_generation_prompt=True
41
- )
42
-
43
- terminators = [
44
- tokenizer.eos_token_id,
45
- tokenizer.convert_tokens_to_ids("<|im_end|>"),
46
- tokenizer.convert_tokens_to_ids("<|eot_id|>") # safer to have this too
47
- ]
48
-
49
- outputs = pipeline(
50
- prompt,
51
- max_new_tokens=2048,
52
- eos_token_id=terminators,
53
- do_sample=True,
54
- temperature=0.6,
55
- top_p=0.95,
56
- )
57
- print(outputs[0]["generated_text"][len(prompt):])
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
2
  import torch
3
+ model_name = "ruslanmv/Medical-Llama3-8B"
4
+ device_map = 'auto'
5
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.float16,)
6
+ model = AutoModelForCausalLM.from_pretrained( model_name,quantization_config=bnb_config, trust_remote_code=True,use_cache=False,device_map=device_map)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
+ tokenizer.pad_token = tokenizer.eos_token
9
+
10
+ def askme(question):
11
+ sys_message = '''
12
+ You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
13
+ provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
14
+ '''
15
+ # Create messages structured for the chat template
16
+ messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": question}]
17
+
18
+ # Applying chat template
19
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
+ outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
22
+
23
+ # Extract and return the generated text, removing the prompt
24
+ response_text = tokenizer.batch_decode(outputs)[0].strip()
25
+ answer = response_text.split('<|im_start|>assistant')[-1].strip()
26
+ return answer
27
+ # Example usage
28
+ # - Context: First describe your problem.
29
+ # - Question: Then make the question.
30
+
31
+ question = '''I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
32
+ increased sensitivity to cold, and dry, itchy skin.
33
+ Could these symptoms be related to hypothyroidism?
34
+ If so, what steps should I take to get a proper diagnosis and discuss treatment options?'''
35
+
36
+ print(askme(question))