Spaces:
Sleeping
Sleeping
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration | |
import torch | |
import gradio as gr | |
from datasets import load_dataset | |
# PersistDataset ----- | |
import os | |
import csv | |
from gradio import inputs, outputs | |
import huggingface_hub | |
from huggingface_hub import Repository, hf_hub_download, upload_file | |
from datetime import datetime | |
#fastapi is where its at: share your app, share your api | |
import fastapi | |
from typing import List, Dict | |
import httpx | |
import pandas as pd | |
import datasets as ds | |
UseMemory=True | |
HF_TOKEN=os.environ.get("HF_TOKEN") | |
def SaveResult(text, outputfileName): | |
basedir = os.path.dirname(__file__) | |
savePath = outputfileName | |
print("Saving: " + text + " to " + savePath) | |
from os.path import exists | |
file_exists = exists(savePath) | |
if file_exists: | |
with open(outputfileName, "a") as f: #append | |
f.write(str(text.replace("\n"," "))) | |
f.write('\n') | |
else: | |
with open(outputfileName, "w") as f: #write | |
f.write(str("time, message, text\n")) # one time only to get column headers for CSV file | |
f.write(str(text.replace("\n"," "))) | |
f.write('\n') | |
return | |
def store_message(name: str, message: str, outputfileName: str): | |
basedir = os.path.dirname(__file__) | |
savePath = outputfileName | |
# if file doesnt exist, create it with labels | |
from os.path import exists | |
file_exists = exists(savePath) | |
if (file_exists==False): | |
with open(savePath, "w") as f: #write | |
f.write(str("time, message, text\n")) # one time only to get column headers for CSV file | |
if name and message: | |
writer = csv.DictWriter(f, fieldnames=["time", "message", "name"]) | |
writer.writerow( | |
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() } | |
) | |
df = pd.read_csv(savePath) | |
df = df.sort_values(df.columns[0],ascending=False) | |
else: | |
if name and message: | |
with open(savePath, "a") as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ]) | |
writer.writerow( | |
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() } | |
) | |
df = pd.read_csv(savePath) | |
df = df.sort_values(df.columns[0],ascending=False) | |
return df | |
mname = "facebook/blenderbot-400M-distill" | |
model = BlenderbotForConditionalGeneration.from_pretrained(mname) | |
tokenizer = BlenderbotTokenizer.from_pretrained(mname) | |
def take_last_tokens(inputs, note_history, history): | |
if inputs['input_ids'].shape[1] > 128: | |
inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()]) | |
inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()]) | |
note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])] | |
history = history[1:] | |
return inputs, note_history, history | |
def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay. | |
note_history.append(note) | |
note_history = '</s> <s>'.join(note_history) | |
return [note_history] | |
title = "💬ChatBack🧠💾" | |
description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions. | |
Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """ | |
def get_base(filename): | |
basedir = os.path.dirname(__file__) | |
print(basedir) | |
#loadPath = basedir + "\\" + filename # works on windows | |
loadPath = basedir + filename | |
print(loadPath) | |
return loadPath | |
def chat(message, history): | |
history = history or [] | |
if history: | |
history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])] | |
else: | |
history_useful = [] | |
history_useful = add_note_to_history(message, history_useful) | |
inputs = tokenizer(history_useful, return_tensors="pt") | |
inputs, history_useful, history = take_last_tokens(inputs, history_useful, history) | |
reply_ids = model.generate(**inputs) | |
response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0] | |
history_useful = add_note_to_history(response, history_useful) | |
list_history = history_useful[0].split('</s> <s>') | |
history.append((list_history[-2], list_history[-1])) | |
df=pd.DataFrame() | |
if UseMemory: | |
#outputfileName = 'ChatbotMemory.csv' | |
outputfileName = 'ChatbotMemory3.csv' # Test first time file create | |
df = store_message(message, response, outputfileName) # Save to dataset | |
basedir = get_base(outputfileName) | |
return history, df, basedir | |
with gr.Blocks() as demo: | |
gr.Markdown("<h1><center>🍰Gradio chatbot backed by dataframe CSV memory🎨</center></h1>") | |
with gr.Row(): | |
t1 = gr.Textbox(lines=1, default="", label="Chat Text:") | |
b1 = gr.Button("Respond and Retrieve Messages") | |
with gr.Row(): # inputs and buttons | |
s1 = gr.State([]) | |
df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate") | |
with gr.Row(): # inputs and buttons | |
file = gr.File(label="File") | |
s2 = gr.Markdown() | |
b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1, file]) | |
demo.launch(debug=True, show_error=True) | |