Spaces:
Sleeping
Sleeping
jed-tiotuico
commited on
Commit
•
34c50f2
1
Parent(s):
49fb5e7
added printer generated question
Browse files- app.py +72 -0
- seed_tasks.jsonl +0 -0
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import datetime
|
2 |
from google.protobuf import message
|
3 |
import torch
|
|
|
4 |
import time
|
5 |
import threading
|
6 |
import streamlit as st
|
@@ -23,6 +24,55 @@ if device == "cpu":
|
|
23 |
# check if mps is available
|
24 |
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def get_model_tokenizer(sota_model_name):
|
27 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
28 |
model_name = "jed-tiotuico/twitter-llama",
|
@@ -293,6 +343,28 @@ if st.button("your website is straight up garbage. how do you sell high end tech
|
|
293 |
)
|
294 |
)
|
295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
# - Generate Customer Tweet
|
297 |
if st.button("Generate Customer Message using Few Shots"):
|
298 |
model, tokenizer = get_mistral_model_tokenizer(sota_model_name)
|
|
|
1 |
import datetime
|
2 |
from google.protobuf import message
|
3 |
import torch
|
4 |
+
import json
|
5 |
import time
|
6 |
import threading
|
7 |
import streamlit as st
|
|
|
24 |
# check if mps is available
|
25 |
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
26 |
|
27 |
+
printer_models = [
|
28 |
+
"HP Smart Tank 750",
|
29 |
+
"HP LaserJet Pro",
|
30 |
+
"HP LaserJet 4100",
|
31 |
+
"HP LaserJet 4000",
|
32 |
+
"HP Photosmart C4635",
|
33 |
+
"HP OfficeJet Pro 9015",
|
34 |
+
"HP Envy 6055",
|
35 |
+
"HP DeskJet 3755",
|
36 |
+
"HP Color LaserJet MFP M283fdw",
|
37 |
+
"HP DesignJet T630",
|
38 |
+
"HP PageWide Pro 477dw",
|
39 |
+
"HP LaserJet Enterprise M506",
|
40 |
+
"HP OfficeJet 5255",
|
41 |
+
"HP Envy Photo 7855",
|
42 |
+
"HP LaserJet Pro M404dn",
|
43 |
+
"HP DeskJet Plus 4155",
|
44 |
+
"HP LaserJet Enterprise MFP M528f",
|
45 |
+
"HP Neverstop Laser 1001nw",
|
46 |
+
"HP Tango X",
|
47 |
+
"HP Color LaserJet Pro M255dw",
|
48 |
+
"HP Smart Tank Plus 651",
|
49 |
+
"HP LaserJet Pro MFP M428fdw",
|
50 |
+
"HP OfficeJet Pro 8035",
|
51 |
+
"HP Envy 6075",
|
52 |
+
"HP DeskJet 2622",
|
53 |
+
"HP LaserJet Pro M15w"
|
54 |
+
]
|
55 |
+
|
56 |
+
def generate_printer_prompt(prompt_instructions):
|
57 |
+
"""Encode multiple prompt instructions into a single string."""
|
58 |
+
|
59 |
+
prompt = """
|
60 |
+
Come up with a printer related task or question that a person might ask for support.
|
61 |
+
no further text/explanation, no additional information.
|
62 |
+
Ensure the tasks/questions should follow the same style and complexity
|
63 |
+
Examples:
|
64 |
+
"""
|
65 |
+
for idx, instruction in enumerate(prompt_instructions):
|
66 |
+
instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":")
|
67 |
+
# pick one random printer model to replace the placeholder
|
68 |
+
printer_model = random.choice(printer_models)
|
69 |
+
instruction = re.sub(r"<\|hp-printer\|>", printer_model, instruction)
|
70 |
+
prompt += f"Q: {instruction}\n\n"
|
71 |
+
# prompt += f"{len(prompt_instructions) + 1}. Q:"
|
72 |
+
prompt += "Now it's your turn, come up with a printer task/question that a person might ask for support.\n"
|
73 |
+
prompt += "Q: (your task/question)"
|
74 |
+
return prompt
|
75 |
+
|
76 |
def get_model_tokenizer(sota_model_name):
|
77 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
78 |
model_name = "jed-tiotuico/twitter-llama",
|
|
|
343 |
)
|
344 |
)
|
345 |
|
346 |
+
if st.button("Generate printer task/question"):
|
347 |
+
seed_tasks = [json.loads(l) for l in open("printer-seed.jsonl", "r")]
|
348 |
+
seed_instructions = [t["text"] for t in seed_tasks]
|
349 |
+
prompt_instructions = []
|
350 |
+
prompt_instructions += random.sample(seed_instructions, num_prompt_instructions - len(prompt_instructions))
|
351 |
+
random.shuffle(prompt_instructions)
|
352 |
+
customer_msg = generate_printer_prompt(prompt_instructions)
|
353 |
+
st.session_state["user_msg_as_prompt"] = customer_msg
|
354 |
+
write_user_chat_message(user_chat, customer_msg)
|
355 |
+
model, tokenizer = get_model_tokenizer(sota_model_name)
|
356 |
+
input_text = alpaca_input_text_format.format(customer_msg)
|
357 |
+
st.write(f"```\n{input_text}```")
|
358 |
+
assistant_chat.write_stream(
|
359 |
+
stream_generation(
|
360 |
+
input_text,
|
361 |
+
show_prompt=False,
|
362 |
+
tokenizer=tokenizer,
|
363 |
+
model=model,
|
364 |
+
temperature=0.5,
|
365 |
+
)
|
366 |
+
)
|
367 |
+
|
368 |
# - Generate Customer Tweet
|
369 |
if st.button("Generate Customer Message using Few Shots"):
|
370 |
model, tokenizer = get_mistral_model_tokenizer(sota_model_name)
|
seed_tasks.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|