Spaces:
Runtime error
Runtime error
File size: 6,167 Bytes
c18db37 2ef4006 c18db37 08af166 6266cf4 5455896 ff0ccdb 8c67835 6266cf4 de6d7ec 8c67835 68decb1 efe1021 9b06b1e a1b669a d434e57 85064b1 6481f63 9b06b1e f240a0c 9b06b1e 8f99b37 9b06b1e 8c67835 9b06b1e 08af166 5455896 6481f63 85064b1 c18db37 d434e57 c18db37 c60c8cf 48295f3 c60c8cf c18db37 dd5e8e8 f60697c c18db37 d434e57 c18db37 85064b1 8c67835 20415a9 8c67835 117b6a7 8c67835 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
import torch
import gradio as gr
from datasets import load_dataset
# PersistDataset -----
import os
import csv
from gradio import inputs, outputs
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
#fastapi is where its at: share your app, share your api
import fastapi
from typing import List, Dict
import httpx
import pandas as pd
import datasets as ds
# -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
#UseMemory=False
UseMemory=True
DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
DATA_FILENAME="ChatbotMemory.csv"
DATA_FILE=os.path.join("data", DATA_FILENAME)
HF_TOKEN=os.environ.get("HF_TOKEN")
if UseMemory:
try:
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=DATA_FILENAME,
cache_dir=DATA_DIRNAME,
force_filename=DATA_FILENAME
)
except:
print("file not found")
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
def get_df(name: str):
dataset = load_dataset(str, split="train")
return dataset
#def store_message(name: str, message: str) -> str:
def store_message(name: str, message: str):
if name and message:
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
writer.writerow(
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
)
#repo.git_pull(rebase=True)
commit_url = repo.push_to_hub()
# test api retrieval of any dataset that is saved, then return it...
# app = FastAPI()
# see: https://gradio.app/sharing_your_app/#api-page
# f=get_df(DATASET_REPO_ID)
# print(f)
#return commit_url
return ""
# ----------------------------------------------- For Memory
mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)
def take_last_tokens(inputs, note_history, history):
"""Filter the last 128 tokens"""
if inputs['input_ids'].shape[1] > 128:
inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
history = history[1:]
return inputs, note_history, history
def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
"""Add a note to the historical information"""
note_history.append(note)
note_history = '</s> <s>'.join(note_history)
return [note_history]
title = "💬ChatBack🧠💾"
description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
def chat(message, history):
history = history or []
if history:
history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
else:
history_useful = []
history_useful = add_note_to_history(message, history_useful)
inputs = tokenizer(history_useful, return_tensors="pt")
inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
reply_ids = model.generate(**inputs)
response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
history_useful = add_note_to_history(response, history_useful)
list_history = history_useful[0].split('</s> <s>')
history.append((list_history[-2], list_history[-1]))
df=pd.DataFrame()
if UseMemory:
store_message(message, response) # Save to dataset -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
df = ds.load_dataset("awacke1/ChatbotMemory.csv")
df = df["train"].to_pandas()
df = df.sort_values(by="time",ascending=False)
#df.sort_index(axis=1, ascending=False)
return history, df
#return df
#return history, df
#gr.Interface(
# fn=chat,
# theme="huggingface",
# css=".footer {display:none !important}",
# inputs=["text", "state"],
# #outputs=["chatbot", "state", "text"],
# outputs=["chatbot", "state", "dataframe"],
# title=title,
# allow_flagging="never",
# description=f"Gradio chatbot backed by memory in a dataset repository.",
# article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
# ).launch(debug=True)
with gr.Blocks() as demo:
gr.Markdown("<h1><center>🍰Gradio chatbot backed by memory in a dataset repository.🎨</center></h1>")
#gr.Markdown("The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest")
with gr.Row():
t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
b1 = gr.Button("Send Message")
with gr.Row(): # inputs and buttons
s1 = gr.State([])
s2 = gr.Markdown()
with gr.Row():
df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
#chatoutput = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate", datatype = ["markdown", "markdown"], headers=['url', 'prompt'])
b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1])
demo.launch(debug=True, show_error=True) |