Spaces:
Runtime error
Runtime error
artintel235
commited on
Commit
β’
627141f
1
Parent(s):
6ec444c
Delete app.py
Browse files
app.py
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from peft import PeftModel, PeftConfig
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
|
5 |
-
# config = PeftConfig.from_pretrained("/content/llama-2-7b-medichat")
|
6 |
-
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf", return_dict=True, load_in_8bit=True, device_map='auto')
|
7 |
-
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
|
8 |
-
model = PeftModel.from_pretrained(model, "maxspin/medichat")
|
9 |
-
|
10 |
-
import gradio as gr
|
11 |
-
|
12 |
-
|
13 |
-
iface.launch()
|
14 |
-
|
15 |
-
def query_handling(query, conversation):
|
16 |
-
if "thanks" in query.lower() or "thank you" in query.lower() or "thank you very much" in query.lower():
|
17 |
-
conversation=""
|
18 |
-
return conversation
|
19 |
-
|
20 |
-
def process_response(input_string):
|
21 |
-
# Find the indices of the first [INST] and last [/INST]
|
22 |
-
start_index = input_string.find("[INST]")
|
23 |
-
end_index = input_string.rfind("[/INST]")
|
24 |
-
|
25 |
-
# If both [INST] and [/INST] are found
|
26 |
-
if start_index != -1 and end_index != -1:
|
27 |
-
# Extract the substring between [INST] and [/INST]
|
28 |
-
inst_substring = input_string[start_index:end_index + len("[/INST]")]
|
29 |
-
# Remove the extracted substring from the original string
|
30 |
-
cleaned_string = input_string.replace(inst_substring, "")
|
31 |
-
else:
|
32 |
-
# If [INST] or [/INST] is not found, keep the original string
|
33 |
-
cleaned_string = input_string
|
34 |
-
|
35 |
-
# Remove the special characters <s> and </s>
|
36 |
-
cleaned_string = cleaned_string.replace("<s>", "").replace("</s>", "").replace("[INST]","").replace("[/INST]","")
|
37 |
-
|
38 |
-
return cleaned_string
|
39 |
-
|
40 |
-
|
41 |
-
conversation=""
|
42 |
-
def predict(prompt):
|
43 |
-
global conversation
|
44 |
-
conversation = conversation+f"[INST]{prompt}[/INST]"
|
45 |
-
input_sequense = "<s>"+conversation
|
46 |
-
batch = tokenizer(f"{input_sequense}", return_tensors='pt')
|
47 |
-
batch = batch.to('cuda')
|
48 |
-
with torch.cuda.amp.autocast():
|
49 |
-
output_tokens = model.generate(**batch, max_new_tokens=4000)
|
50 |
-
response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
51 |
-
print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
|
52 |
-
response = process_response(response)
|
53 |
-
conversation+=response
|
54 |
-
conversation = query_handling(prompt,conversation)
|
55 |
-
print(conversation)
|
56 |
-
return response
|
57 |
-
|
58 |
-
iface = gr.Interface(
|
59 |
-
fn=predict,
|
60 |
-
inputs="text", # Accepts a single text input
|
61 |
-
outputs="text" # Outputs a single text response
|
62 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|