Spaces:
Runtime error
Runtime error
File size: 12,194 Bytes
4451326 d8b9f2a 4451326 d8b9f2a 4451326 d8b9f2a 4451326 4ce11e4 4451326 4ce11e4 4451326 4ce11e4 4451326 4ce11e4 4451326 4ce11e4 4451326 d8b9f2a 4451326 d8b9f2a 4ce11e4 d8b9f2a 4ce11e4 d8b9f2a 4ce11e4 d8b9f2a 68cbcae d8b9f2a 4451326 68cbcae d8b9f2a 68cbcae 4451326 d8b9f2a 4451326 d8b9f2a 4ce11e4 d8b9f2a 4451326 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 |
import gradio as gr
import os
import json
import requests
#Chatbot2
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
import torch
from datasets import load_dataset
# PersistDataset -----
import os
import csv
from gradio import inputs, outputs
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
import fastapi
from typing import List, Dict
import httpx
import pandas as pd
import datasets as ds
#Chatbot2 constants
title = """<h1 align="center">💬ChatGPT ChatBack🧠💾</h1>"""
#description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions. """
UseMemory=True
HF_TOKEN=os.environ.get("HF_TOKEN")
#ChatGPT info
API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
OPENAI_API_KEY= os.environ["HF_TOKEN"] # Add a token to this space . Then copy it to the repository secret in this spaces settings panel. os.environ reads from there.
# Keys for Open AI ChatGPT API usage are created from here: https://platform.openai.com/account/api-keys
description = """
Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
## ChatGPT Datasets 📚
- WebText
- Common Crawl
- BooksCorpus
- English Wikipedia
- Toronto Books Corpus
- OpenWebText
## ChatGPT Datasets - Details 📚
- **WebText:** A dataset of web pages crawled from domains on the Alexa top 5,000 list. This dataset was used to pretrain GPT-2.
- [WebText: A Large-Scale Unsupervised Text Corpus by Radford et al.](https://paperswithcode.com/dataset/webtext)
- **Common Crawl:** A dataset of web pages from a variety of domains, which is updated regularly. This dataset was used to pretrain GPT-3.
- [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/common-crawl) by Brown et al.
- **BooksCorpus:** A dataset of over 11,000 books from a variety of genres.
- [Scalable Methods for 8 Billion Token Language Modeling](https://paperswithcode.com/dataset/bookcorpus) by Zhu et al.
- **English Wikipedia:** A dump of the English-language Wikipedia as of 2018, with articles from 2001-2017.
- [Improving Language Understanding by Generative Pre-Training](https://huggingface.co/spaces/awacke1/WikipediaUltimateAISearch?logs=build) Space for Wikipedia Search
- **Toronto Books Corpus:** A dataset of over 7,000 books from a variety of genres, collected by the University of Toronto.
- [Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond](https://paperswithcode.com/dataset/bookcorpus) by Schwenk and Douze.
- **OpenWebText:** A dataset of web pages that were filtered to remove content that was likely to be low-quality or spammy. This dataset was used to pretrain GPT-3.
- [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/openwebtext) by Brown et al.
"""
#ChatGPT predict
def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]): #repetition_penalty, top_k
# 1. Set up a payload
payload = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature" : 1.0,
"top_p":1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
# 2. Define your headers and add a key from https://platform.openai.com/account/api-keys
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
# 3. Create a chat counter loop that feeds [Predict next best anything based on last input and attention with memory defined by introspective attention over time]
print(f"chat_counter - {chat_counter}")
if chat_counter != 0 :
messages=[]
for data in chatbot:
temp1 = {}
temp1["role"] = "user"
temp1["content"] = data[0]
temp2 = {}
temp2["role"] = "assistant"
temp2["content"] = data[1]
messages.append(temp1)
messages.append(temp2)
temp3 = {}
temp3["role"] = "user"
temp3["content"] = inputs
messages.append(temp3)
#messages
payload = {
"model": "gpt-3.5-turbo",
"messages": messages, #[{"role": "user", "content": f"{inputs}"}],
"temperature" : temperature, #1.0,
"top_p": top_p, #1.0,
"n" : 1,
"stream": True,
"presence_penalty":0,
"frequency_penalty":0,
}
chat_counter+=1
# 4. POST it to OPENAI API
history.append(inputs)
print(f"payload is - {payload}")
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
#response = requests.post(API_URL, headers=headers, json=payload, stream=True)
token_counter = 0
partial_words = ""
# 5. Iterate through response lines and structure readable response
# TODO - make this parse out markdown so we can have similar interface
counter=0
for chunk in response.iter_lines():
#Skipping first chunk
if counter == 0:
counter+=1
continue
#counter+=1
# check whether each line is non-empty
if chunk.decode() :
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
#if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
# break
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
token_counter+=1
yield chat, history, chat_counter # resembles {chatbot: chat, state: history}
def take_last_tokens(inputs, note_history, history):
if inputs['input_ids'].shape[1] > 128:
inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
history = history[1:]
return inputs, note_history, history
def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
note_history.append(note)
note_history = '</s> <s>'.join(note_history)
return [note_history]
# ChatGPT clear
def reset_textbox():
return gr.update(value='')
#Chatbot2 Save Results
def SaveResult(text, outputfileName):
basedir = os.path.dirname(__file__)
savePath = outputfileName
print("Saving: " + text + " to " + savePath)
from os.path import exists
file_exists = exists(savePath)
if file_exists:
with open(outputfileName, "a") as f: #append
f.write(str(text.replace("\n"," ")))
f.write('\n')
else:
with open(outputfileName, "w") as f: #write
f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
f.write(str(text.replace("\n"," ")))
f.write('\n')
return
#Chatbot2 Store Message
def store_message(name: str, message: str, outputfileName: str):
basedir = os.path.dirname(__file__)
savePath = outputfileName
# if file doesnt exist, create it with labels
from os.path import exists
file_exists = exists(savePath)
if (file_exists==False):
with open(savePath, "w") as f: #write
f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
if name and message:
writer = csv.DictWriter(f, fieldnames=["time", "message", "name"])
writer.writerow(
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
)
df = pd.read_csv(savePath)
df = df.sort_values(df.columns[0],ascending=False)
else:
if name and message:
with open(savePath, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
writer.writerow(
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
)
df = pd.read_csv(savePath)
df = df.sort_values(df.columns[0],ascending=False)
return df
#Chatbot2 get base directory of saves
def get_base(filename):
basedir = os.path.dirname(__file__)
print(basedir)
loadPath = basedir + filename
print(loadPath)
return loadPath
#Chatbot2 - History
def chat(message, history):
history = history or []
if history:
history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
else:
history_useful = []
history_useful = add_note_to_history(message, history_useful)
inputs = tokenizer(history_useful, return_tensors="pt")
inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
reply_ids = model.generate(**inputs)
response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
history_useful = add_note_to_history(response, history_useful)
list_history = history_useful[0].split('</s> <s>')
history.append((list_history[-2], list_history[-1]))
df=pd.DataFrame()
if UseMemory:
outputfileName = 'ChatbotMemory3.csv' # Test first time file create
df = store_message(message, response, outputfileName) # Save to dataset
basedir = get_base(outputfileName)
return history, df, basedir
# 6. Use Gradio to pull it all together
with gr.Blocks(css = """#col_container {width: 1000px; margin-left: auto; margin-right: auto;} #chatbot {height: 520px; overflow: auto;}""") as demo:
gr.HTML(title)
# Chat bot memory - dataframe
gr.Markdown("<h1><center>🍰Gradio chatbot backed by dataframe CSV memory🎨</center></h1>")
with gr.Row():
t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
b1 = gr.Button("🍰 Respond and Retrieve Messages")
with gr.Row(): # inputs and buttons
s1 = gr.State([])
df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
with gr.Row(): # inputs and buttons
file = gr.File(label="File")
s2 = gr.Markdown()
b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1, file])
with gr.Column(elem_id = "col_container"):
chatbot = gr.Chatbot(elem_id='chatbot')
inputs = gr.Textbox(placeholder= "There is only one real true reward in life and this is existence or nonexistence. Everything else is a corollary.", label= "Type an input and press Enter") #t
state = gr.State([])
gpt = gr.Button()
with gr.Accordion("Parameters", open=False):
top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
chat_counter = gr.Number(value=0, visible=False, precision=0)
inputs.submit( predict, [inputs, top_p, temperature,chat_counter, chatbot, state], [chatbot, state, chat_counter],)
gpt.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter],)
gpt.click(reset_textbox, [], [inputs])
inputs.submit(reset_textbox, [], [inputs])
# Show ChatGPT Datasets information
gr.Markdown(description)
# Kickoff
demo.queue().launch(debug=True)
|