Spaces:
Sleeping
Sleeping
File size: 19,117 Bytes
ffa493c 34c50f2 ffa493c d47ab3c ffa493c d47ab3c ffa493c 92ad22c ffa493c 34c50f2 ffa493c d47ab3c ffa493c d47ab3c ffa493c 49fb5e7 ffa493c 01c683d d47ab3c ffa493c d47ab3c ffa493c 595c6e5 4696f14 595c6e5 ffa493c 49fb5e7 ffa493c 34c50f2 f08a09a 34c50f2 ffa493c 49fb5e7 ffa493c 01c683d 595c6e5 01c683d 49fb5e7 01c683d 595c6e5 01c683d ffa493c 01c683d 92ad22c 01c683d 974629b 01c683d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 |
import datetime
from google.protobuf import message
import torch
import json
import time
import threading
import streamlit as st
import random
from typing import Iterable
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, PreTrainedTokenizerFast
from datetime import datetime
from threading import Thread
fine_tuned_model_name = "jed-tiotuico/twitter-llama"
sota_model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
# fine_tuned_model_name = "MBZUAI/LaMini-GPT-124M"
# sota_model_name = "MBZUAI/LaMini-GPT-124M"
alpaca_input_text_format = "</s>### Instruction:\n{}\n\n### Response:\n"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# if device is cpu try mps?
if device == "cpu":
# check if mps is available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
printer_models = [
"HP Smart Tank 750",
"HP LaserJet Pro",
"HP LaserJet 4100",
"HP LaserJet 4000",
"HP Photosmart C4635",
"HP OfficeJet Pro 9015",
"HP Envy 6055",
"HP DeskJet 3755",
"HP Color LaserJet MFP M283fdw",
"HP DesignJet T630",
"HP PageWide Pro 477dw",
"HP LaserJet Enterprise M506",
"HP OfficeJet 5255",
"HP Envy Photo 7855",
"HP LaserJet Pro M404dn",
"HP DeskJet Plus 4155",
"HP LaserJet Enterprise MFP M528f",
"HP Neverstop Laser 1001nw",
"HP Tango X",
"HP Color LaserJet Pro M255dw",
"HP Smart Tank Plus 651",
"HP LaserJet Pro MFP M428fdw",
"HP OfficeJet Pro 8035",
"HP Envy 6075",
"HP DeskJet 2622",
"HP LaserJet Pro M15w"
]
def generate_printer_prompt(prompt_instructions):
"""Encode multiple prompt instructions into a single string."""
prompt = """
Come up with a printer related task or question that a person might ask for support.
no further text/explanation, no additional information.
Ensure the tasks/questions should follow the same style and complexity
Examples:
"""
for idx, instruction in enumerate(prompt_instructions):
instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":")
# pick one random printer model to replace the placeholder
printer_model = random.choice(printer_models)
instruction = re.sub(r"<\|hp-printer\|>", printer_model, instruction)
prompt += f"Q: {instruction}\n\n"
# prompt += f"{len(prompt_instructions) + 1}. Q:"
prompt += "Now it's your turn, come up with a printer task/question that a person might ask for support.\n"
prompt += "Q: (your task/question)"
return prompt
def get_model_tokenizer(sota_model_name):
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "jed-tiotuico/twitter-llama",
max_seq_length = 200,
dtype = None,
load_in_4bit = True,
cache_dir = "/data/.cache/hf-models",
token=st.secrets["HF_TOKEN"]
)
FastLanguageModel.for_inference(model)
return model, tokenizer
def write_user_chat_message(user_chat, customer_msg):
if customer_msg:
if user_chat == None:
user_chat = st.chat_message("user")
user_chat.write(customer_msg)
def write_stream_user_chat_message(user_chat, model, token, prompt):
if prompt:
if user_chat == None:
user_chat = st.chat_message("user")
new_customer_msg = user_chat.write_stream(
stream_generation(
prompt,
show_prompt=False,
tokenizer=tokenizer,
model=model,
temperature=0.5,
)
)
return new_customer_msg
def get_mistral_model_tokenizer(sota_model_name):
max_seq_length = 2048
dtype = torch.float16
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
cache_dir = "/data/.cache/hf-models",
)
FastLanguageModel.for_inference(model)
return model, tokenizer
class DeckPicker:
def __init__(self, items):
self.items = items[:] # Make a copy of the items to shuffle
self.original_items = items[:] # Keep the original order
random.shuffle(self.items) # Shuffle the items
self.index = -1 # Initialize the index
def pick(self):
"""Pick the next item from the deck. If all items have been picked, reshuffle."""
self.index += 1
if self.index >= len(self.items):
self.index = 0
random.shuffle(self.items) # Reshuffle if at the end
return self.items[self.index]
def get_state(self):
"""Return the current state of the deck and the last picked index."""
return self.items, self.index
# Example of usage
nouns = [
"service", "issue", "account", "support", "problem", "help", "team",
"request", "response", "email", "ticket", "update", "error", "system",
"connection", "downtime", "billing", "charge", "refund", "password",
"outage", "agent", "feature", "access", "status", "interface", "network",
"subscription", "upgrade", "notification", "data", "server", "log", "message",
"renewal", "setup", "security", "feedback", "confirmation", "printer"
]
verbs = [
"have", "print", "need", "help", "update", "resolve", "access", "contact",
"receive", "reset", "support", "experience", "report", "request", "process",
"check", "confirm", "explain", "manage", "handle", "disconnect", "renew",
"change", "fix", "cancel", "complete", "notify", "respond", "fail", "restore",
"review", "escalate", "submit", "configure", "troubleshoot", "log", "operate",
"suspend", "pay", "adjust"
]
adjectives = [
"quick", "immediate", "urgent", "unable", "detailed", "frequent", "technical",
"possible", "slow", "helpful", "unresponsive", "secure", "successful", "necessary",
"available", "scheduled", "regular", "interrupted", "automatic", "manual", "last",
"online", "offline", "new", "current", "prior", "due", "related", "temporary",
"permanent", "next", "previous", "complicated", "easy", "difficult", "major",
"minor", "alternative", "additional", "expired"
]
def create_few_shots(noun_picker, verb_picker, adjective_picker):
noun = noun_picker.pick()
verb = verb_picker.pick()
adjective = adjective_picker.pick()
context = f"""
Write a short realistic customer support tweet message by a customer for another company.
Avoid adding hashtags or mentions in the message.
Ensure that the sentiment is negative.
Ensure that the word count is around 15 to 25 words.
Ensure the message contains the noun: {noun}, verb: {verb}, and adjective: {adjective}.
Example of return messages 5/5:
1/5: your website is straight up garbage. how do you sell high end technology but you cant get a website right?
2/5: my phone is all static during calls and when i plug in headphones any audio still comes thru the speaks wtf
3/5: hi, i'm having trouble logging into my groceries account it keeps refreshing back to the log in page, any ideas?
4/5: please check you dms asap if you're really about customer service. 2 weeks since my accident and nothing.
5/5: I'm extremely disappointed with your service. You charged me for a temporary solution, and there's no adjustment in sight.
Now it's your turn, ensure to only generate one message
1/1:
"""
return context
st.header("ReplyCaddy")
st.write("AI-powered customer support assistant. Reduces anxiety when responding to customer support on social media.")
st.markdown("""
Instructions:
1. Click the Generate Customer Message using Few Shots button to generate a custom message
2. Then click Generate Polite and Friendly Response
3. Or Enter a custom message in the text box and click Generate Polite and Friendly Response
""")
# image https://github.com/unslothai/unsloth/blob/main/images/made%20with%20unsloth.png?raw=true
# st.write("Made with [Unsloth](https://github.com/unslothai/unsloth/blob/main/images/made%20with%20unsloth.png?raw=true")
def stream_generation(
prompt: str,
tokenizer: PreTrainedTokenizerFast,
model: AutoModelForCausalLM,
max_new_tokens: int = 2048,
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 100,
repetition_penalty: float = 1.1,
penalty_alpha: float = 0.25,
no_repeat_ngram_size: int = 3,
show_prompt: bool = False,
) -> Iterable[str]:
"""
Stream the generation of a prompt.
Args:
prompt (str): the prompt
max_new_tokens (int, optional): the maximum number of tokens to generate. Defaults to 32.
temperature (float, optional): the temperature of the generation. Defaults to 0.7.
top_p (float, optional): the top-p value of the generation. Defaults to 0.9.
top_k (int, optional): the top-k value of the generation. Defaults to 100.
repetition_penalty (float, optional): the repetition penalty of the generation. Defaults to 1.1.
penalty_alpha (float, optional): the penalty alpha of the generation. Defaults to 0.25.
no_repeat_ngram_size (int, optional): the no repeat ngram size of the generation. Defaults to 3.
show_prompt (bool, optional): whether to show the prompt or not. Defaults to False.
tokenizer (PreTrainedTokenizerFast): the tokenizer
model (AutoModelForCausalLM): the model
Yields:
str: the generated text
"""
# init the streaming object with tokenizer
# skip_prompt = not show_prompt, skip_special_tokens = True
streamer = TextIteratorStreamer(tokenizer, skip_prompt=not show_prompt, skip_special_tokens=True) # type: ignore
# setup kwargs for generation
generation_kwargs = dict(
input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(device),
streamer=streamer,
do_sample=True,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
penalty_alpha=penalty_alpha,
no_repeat_ngram_size=no_repeat_ngram_size,
max_new_tokens=max_new_tokens,
)
# start the generation in a separate thread
generation_thread = threading.Thread(
target=model.generate, kwargs=generation_kwargs # type: ignore
)
generation_thread.start()
blacklisted_tokens = ["<|url|>"]
for new_text in streamer:
# filter out blacklisted tokens
if any(token in new_text for token in blacklisted_tokens):
continue
yield new_text
# wait for the generation to finish
generation_thread.join()
twitter_llama_model = None
twitter_llama_tokenizer = None
streamer = None
# define state and the chat messages
def init_session_states(assistant_chat, user_chat):
if "user_msg_as_prompt" not in st.session_state:
st.session_state["user_msg_as_prompt"] = ""
user_chat = None
if "user_msg_as_prompt" in st.session_state:
user_chat = st.chat_message("user")
assistant_chat = st.chat_message("assistant")
if "greet" not in st.session_state:
st.session_state["greet"] = False
greeting_text = "Hello! I'm here to help. Copy and paste your customer's message, or generate using AI."
assistant_chat.write(greeting_text)
init_session_states(assistant_chat, user_chat)
# Generate Response Tweet
if user_chat:
if st.button("Generate Polite and Friendly Response"):
if "user_msg_as_prompt" in st.session_state:
customer_msg = st.session_state["user_msg_as_prompt"]
if customer_msg:
write_user_chat_message(user_chat, customer_msg)
model, tokenizer = get_model_tokenizer(sota_model_name)
input_text = alpaca_input_text_format.format(customer_msg)
st.markdown(f"""```\n{input_text}```""", unsafe_allow_html=True)
response_tweet = assistant_chat.write_stream(
stream_generation(
input_text,
show_prompt=False,
tokenizer=tokenizer,
model=model,
temperature=0.5,
)
)
else:
st.error("Please enter a customer message, or generate one for the ai to respond")
# below ui prompt
# - examples
# st.markdown("<b>Example:</b>", unsafe_allow_html=True)
if st.button("your website is straight up garbage. how do you sell high end technology but you cant get a website right?"):
customer_msg = "your website is straight up garbage. how do you sell high end technology but you cant get a website right?"
st.session_state["user_msg_as_prompt"] = customer_msg
write_user_chat_message(user_chat, customer_msg)
model, tokenizer = get_model_tokenizer(sota_model_name)
input_text = alpaca_input_text_format.format(customer_msg)
st.write(f"```\n{input_text}```")
assistant_chat.write_stream(
stream_generation(
input_text,
show_prompt=False,
tokenizer=tokenizer,
model=model,
temperature=0.5,
)
)
if st.button("Generate printer task/question"):
num_prompt_instructions = 8
seed_tasks = [json.loads(l) for l in open("/data/printer-seed.jsonl", "r")]
seed_instructions = [t["text"] for t in seed_tasks]
prompt_instructions = []
prompt_instructions += random.sample(seed_instructions, num_prompt_instructions - len(prompt_instructions))
random.shuffle(prompt_instructions)
customer_msg = generate_printer_prompt(prompt_instructions)
st.session_state["user_msg_as_prompt"] = customer_msg
write_user_chat_message(user_chat, customer_msg)
model, tokenizer = get_model_tokenizer(sota_model_name)
input_text = alpaca_input_text_format.format(customer_msg)
st.write(f"```\n{input_text}```")
assistant_chat.write_stream(
stream_generation(
input_text,
show_prompt=False,
tokenizer=tokenizer,
model=model,
temperature=0.5,
)
)
# - Generate Customer Tweet
if st.button("Generate Customer Message using Few Shots"):
model, tokenizer = get_mistral_model_tokenizer(sota_model_name)
noun_picker = DeckPicker(nouns)
verb_picker = DeckPicker(verbs)
adjective_picker = DeckPicker(adjectives)
few_shots = create_few_shots(noun_picker, verb_picker, adjective_picker)
few_shot_prompt = f"<s>[INST]{few_shots}[/INST]\n"
st.markdown("Prompt:")
st.markdown(f"""```\n{few_shot_prompt}```""", unsafe_allow_html=True)
new_customer_msg = write_stream_user_chat_message(user_chat, model, tokenizer, few_shot_prompt)
st.session_state["user_msg_as_prompt"] = new_customer_msg
# main ui prompt
# - text box
# - submit
with st.form(key="my_form"):
customer_msg = st.text_area("Customer Message")
write_user_chat_message(user_chat, customer_msg)
if st.form_submit_button("Submit and Generate Response"):
st.session_state["user_msg_as_prompt"] = customer_msg
write_user_chat_message(user_chat, customer_msg)
model, tokenizer = get_model_tokenizer(sota_model_name)
input_text = alpaca_input_text_format.format(customer_msg)
st.write(f"```\n{input_text}```")
assistant_chat.write_stream(
stream_generation(
input_text,
show_prompt=False,
tokenizer=tokenizer,
model=model,
temperature=0.5,
)
)
st.markdown("------------")
st.markdown("<p>Thanks to:</p>", unsafe_allow_html=True)
st.markdown("""Unsloth https://github.com/unslothai check out the [wiki](https://github.com/unslothai/unsloth/wiki)""")
st.markdown("""Georgi Gerganov's ggml https://github.com/ggerganov/ggml""")
st.markdown("""Meta's Llama https://github.com/meta-llama""")
st.markdown("""Mistral AI - https://github.com/mistralai""")
st.markdown("""Zhang Peiyuan's TinyLlama https://github.com/jzhang38/TinyLlama""")
st.markdown("""Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois,
Xuechen Li, Carlos Guestrin, Percy Liang, Tatsunori B. Hashimoto
- [Alpaca: A Strong, Replicable Instruction-Following Model](https://crfm.stanford.edu/2023/03/13/alpaca.html)""")
if True:
gpu_stats = torch.cuda.get_device_properties(0)
max_memory = gpu_stats.total_memory / 1024 ** 3
start_gpu_memory = torch.cuda.memory_reserved(0) / 1024 ** 3
st.write(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
st.write(f"{start_gpu_memory} GB of memory reserved.")
st.write("Packages:")
st.write(f"pytorch: {torch.__version__}")
st.markdown("""
## Overview
Small business owners, app developers, and freelance talent professionals need more time and money to compete for social media presence. Social media apps provide excellent reach on X, FaceBook, and Instagram, so customers sometimes request initial support from those. The correct response has always been to redirect them to the proper support funnels, which then we run a process of account validation, issue classification, and prioritization.
However, composing the right tone for the first response message has been challenging and time-consuming, especially when the business owner has yet to experience customer support in public. One has to match the right tone and clarity and instruct the customer to redirect to the proper support channels.
By providing diverse responses, we can ensure each support message feels unique and tailored to the customer's message, avoiding the impersonal feel of a canned response.
## Problems with existing solutions
We prompted ChatGPT to respond an irate, emotionally charged, and informal tone to match generation criteria which we will provide below.
"You are a customer support representative. compose a customer response to this tweet:
Your website is straight up garbage. how do you sell high end technology but you cant get a website right?"
[GenerationCriteria]
"Thank you for reaching out and sharing your feedback. We apologize for the trouble you're experiencing with our website. Please DM us or contact our support team at [support link] so we can help resolve any issues you're encountering."
First, the response is wordily composed and does not contain a request to DM; second, it has an apology and its context. Third, from our experience, ChatGPT values politeness more than a human would.
As a business, we want our brands to hold a high standard for these responses.
We present ReplyCaddy, an AI-powered first-response text message generator that will help us compose the right first-response message that composes personal messages and matches the customer's tone.
We tested the prompt above to ReplyCaddy, and it generated these examples:
"hi! let's talk about it."
"we'd love to help. we're here to help!"
"we understand that you are not happy with the website. please send us an email at <|url|>"
""")
|