awacke1 commited on
Commit
d207399
Β·
1 Parent(s): f192b5f

Update backupapp.py

Browse files
Files changed (1) hide show
  1. backupapp.py +427 -194
backupapp.py CHANGED
@@ -1,209 +1,442 @@
1
- import gradio as gr
2
- import os
3
- import json
 
 
 
 
 
 
4
  import requests
 
 
 
5
 
6
- #Streaming endpoint
7
- API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"
8
- OPENAI_API_KEY= os.environ["HF_TOKEN"] # Add a token to this space . Then copy it to the repository secret in this spaces settings panel. os.environ reads from there.
9
- # Keys for Open AI ChatGPT API usage are created from here: https://platform.openai.com/account/api-keys
10
-
11
- def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]): #repetition_penalty, top_k
12
-
13
- # 1. Set up a payload
14
- payload = {
15
- "model": "gpt-3.5-turbo",
16
- "messages": [{"role": "user", "content": f"{inputs}"}],
17
- "temperature" : 1.0,
18
- "top_p":1.0,
19
- "n" : 1,
20
- "stream": True,
21
- "presence_penalty":0,
22
- "frequency_penalty":0,
23
- }
 
 
 
 
 
 
24
 
25
- # 2. Define your headers and add a key from https://platform.openai.com/account/api-keys
 
 
26
  headers = {
27
- "Content-Type": "application/json",
28
- "Authorization": f"Bearer {OPENAI_API_KEY}"
29
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # 3. Create a chat counter loop that feeds [Predict next best anything based on last input and attention with memory defined by introspective attention over time]
32
- print(f"chat_counter - {chat_counter}")
33
- if chat_counter != 0 :
34
- messages=[]
35
- for data in chatbot:
36
- temp1 = {}
37
- temp1["role"] = "user"
38
- temp1["content"] = data[0]
39
- temp2 = {}
40
- temp2["role"] = "assistant"
41
- temp2["content"] = data[1]
42
- messages.append(temp1)
43
- messages.append(temp2)
44
- temp3 = {}
45
- temp3["role"] = "user"
46
- temp3["content"] = inputs
47
- messages.append(temp3)
48
- payload = {
49
- "model": "gpt-3.5-turbo",
50
- "messages": messages, #[{"role": "user", "content": f"{inputs}"}],
51
- "temperature" : temperature, #1.0,
52
- "top_p": top_p, #1.0,
53
- "n" : 1,
54
- "stream": True,
55
- "presence_penalty":0,
56
- "frequency_penalty":0,
57
- }
58
- chat_counter+=1
59
-
60
- # 4. POST it to OPENAI API
61
- history.append(inputs)
62
- print(f"payload is - {payload}")
63
- response = requests.post(API_URL, headers=headers, json=payload, stream=True)
64
- token_counter = 0
65
- partial_words = ""
66
-
67
- # 5. Iterate through response lines and structure readable response
68
- counter=0
69
- for chunk in response.iter_lines():
70
- if counter == 0:
71
- counter+=1
72
- continue
73
- if chunk.decode() :
74
- chunk = chunk.decode()
75
- if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
76
- partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
77
- if token_counter == 0:
78
- history.append(" " + partial_words)
79
- else:
80
- history[-1] = partial_words
81
- chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
82
- token_counter+=1
83
- yield chat, history, chat_counter
84
-
85
-
86
- def reset_textbox():
87
- return gr.update(value='')
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Episodic and Semantic IO
93
- def list_files(file_path):
94
- import os
95
- icon_csv = "πŸ“„ "
96
- icon_txt = "πŸ“‘ "
97
- current_directory = os.getcwd()
98
- file_list = []
99
- for filename in os.listdir(current_directory):
100
- if filename.endswith(".csv"):
101
- file_list.append(icon_csv + filename)
102
- elif filename.endswith(".txt"):
103
- file_list.append(icon_txt + filename)
104
- if file_list:
105
- return "\n".join(file_list)
 
 
 
 
 
 
106
  else:
107
- return "No .csv or .txt files found in the current directory."
108
-
109
- # Function to read a file
110
- def read_file(file_path):
111
- try:
112
- with open(file_path, "r") as file:
113
- contents = file.read()
114
- return f"{contents}"
115
- #return f"Contents of {file_path}:\n{contents}"
116
- except FileNotFoundError:
117
- return "File not found."
118
-
119
- # Function to delete a file
120
- def delete_file(file_path):
121
- try:
122
- import os
123
- os.remove(file_path)
124
- return f"{file_path} has been deleted."
125
- except FileNotFoundError:
126
- return "File not found."
127
-
128
- # Function to write to a file
129
- def write_file(file_path, content):
130
- try:
131
- with open(file_path, "w") as file:
132
- file.write(content)
133
- return f"Successfully written to {file_path}."
134
- except:
135
- return "Error occurred while writing to file."
136
-
137
- # Function to append to a file
138
- def append_file(file_path, content):
139
- try:
140
- with open(file_path, "a") as file:
141
- file.write(content)
142
- return f"Successfully appended to {file_path}."
143
- except:
144
- return "Error occurred while appending to file."
145
-
146
-
147
- title = """<h1 align="center">Memory Chat Story Generator ChatGPT</h1>"""
148
- description = """
149
- ## ChatGPT Datasets πŸ“š
150
- - WebText
151
- - Common Crawl
152
- - BooksCorpus
153
- - English Wikipedia
154
- - Toronto Books Corpus
155
- - OpenWebText
156
- ## ChatGPT Datasets - Details πŸ“š
157
- - **WebText:** A dataset of web pages crawled from domains on the Alexa top 5,000 list. This dataset was used to pretrain GPT-2.
158
- - [WebText: A Large-Scale Unsupervised Text Corpus by Radford et al.](https://paperswithcode.com/dataset/webtext)
159
- - **Common Crawl:** A dataset of web pages from a variety of domains, which is updated regularly. This dataset was used to pretrain GPT-3.
160
- - [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/common-crawl) by Brown et al.
161
- - **BooksCorpus:** A dataset of over 11,000 books from a variety of genres.
162
- - [Scalable Methods for 8 Billion Token Language Modeling](https://paperswithcode.com/dataset/bookcorpus) by Zhu et al.
163
- - **English Wikipedia:** A dump of the English-language Wikipedia as of 2018, with articles from 2001-2017.
164
- - [Improving Language Understanding by Generative Pre-Training](https://huggingface.co/spaces/awacke1/WikipediaUltimateAISearch?logs=build) Space for Wikipedia Search
165
- - **Toronto Books Corpus:** A dataset of over 7,000 books from a variety of genres, collected by the University of Toronto.
166
- - [Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond](https://paperswithcode.com/dataset/bookcorpus) by Schwenk and Douze.
167
- - **OpenWebText:** A dataset of web pages that were filtered to remove content that was likely to be low-quality or spammy. This dataset was used to pretrain GPT-3.
168
- - [Language Models are Few-Shot Learners](https://paperswithcode.com/dataset/openwebtext) by Brown et al.
169
- """
170
-
171
- # 6. Use Gradio to pull it all together
172
- with gr.Blocks(css = """#col_container {width: 1400px; margin-left: auto; margin-right: auto;} #chatbot {height: 600px; overflow: auto;}""") as demo:
173
- gr.HTML(title)
174
- with gr.Column(elem_id = "col_container"):
175
- inputs = gr.Textbox(placeholder= "Hi there!", label= "Type an input and press Enter")
176
- chatbot = gr.Chatbot(elem_id='chatbot')
177
- state = gr.State([])
178
- b1 = gr.Button()
179
- with gr.Accordion("Parameters", open=False):
180
- top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
181
- temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
182
- chat_counter = gr.Number(value=0, visible=True, precision=0)
183
 
184
-
185
- # Episodic/Semantic IO
186
- fileName = gr.Textbox(label="Filename")
187
- fileContent = gr.TextArea(label="File Content")
188
- completedMessage = gr.Textbox(label="Completed")
189
- label = gr.Label()
190
- with gr.Row():
191
- listFiles = gr.Button("πŸ“„ List File(s)")
192
- readFile = gr.Button("πŸ“– Read File")
193
- saveFile = gr.Button("πŸ’Ύ Save File")
194
- deleteFile = gr.Button("πŸ—‘οΈ Delete File")
195
- appendFile = gr.Button("βž• Append File")
196
- listFiles.click(list_files, inputs=fileName, outputs=fileContent)
197
- readFile.click(read_file, inputs=fileName, outputs=fileContent)
198
- saveFile.click(write_file, inputs=[fileName, fileContent], outputs=completedMessage)
199
- deleteFile.click(delete_file, inputs=fileName, outputs=completedMessage)
200
- appendFile.click(append_file, inputs=[fileName, fileContent], outputs=completedMessage )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
 
 
 
 
 
202
 
203
- inputs.submit(predict, [inputs, top_p, temperature,chat_counter, chatbot, state], [chatbot, state, chat_counter])
204
- b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], [chatbot, state, chat_counter])
205
- b1.click(reset_textbox, [], [inputs])
206
- inputs.submit(reset_textbox, [], [inputs])
207
- gr.Markdown(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- demo.queue().launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
  import requests
11
+ import time
12
+ import re
13
+ import textract
14
 
15
+ from datetime import datetime
16
+ from openai import ChatCompletion
17
+ from xml.etree import ElementTree as ET
18
+ from bs4 import BeautifulSoup
19
+ from collections import deque
20
+ from audio_recorder_streamlit import audio_recorder
21
+
22
+ from dotenv import load_dotenv
23
+ from PyPDF2 import PdfReader
24
+ from langchain.text_splitter import CharacterTextSplitter
25
+ from langchain.embeddings import OpenAIEmbeddings
26
+ from langchain.vectorstores import FAISS
27
+ from langchain.chat_models import ChatOpenAI
28
+ from langchain.memory import ConversationBufferMemory
29
+ from langchain.chains import ConversationalRetrievalChain
30
+ from templates import css, bot_template, user_template
31
+
32
+
33
+
34
+ def generate_filename(prompt, file_type):
35
+ central = pytz.timezone('US/Central')
36
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
37
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
38
+ return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
39
 
40
+
41
+ def transcribe_audio(openai_key, file_path, model):
42
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
43
  headers = {
44
+ "Authorization": f"Bearer {openai_key}",
 
45
  }
46
+ with open(file_path, 'rb') as f:
47
+ data = {'file': f}
48
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
49
+ if response.status_code == 200:
50
+ st.write(response.json())
51
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
52
+ transcript = response.json().get('text')
53
+ #st.write('Responses:')
54
+ #st.write(chatResponse)
55
+ filename = generate_filename(transcript, 'txt')
56
+ create_file(filename, transcript, chatResponse)
57
+ return transcript
58
+ else:
59
+ st.write(response.json())
60
+ st.error("Error in API call.")
61
+ return None
62
 
63
+ def save_and_play_audio(audio_recorder):
64
+ audio_bytes = audio_recorder()
65
+ if audio_bytes:
66
+ filename = generate_filename("Recording", "wav")
67
+ with open(filename, 'wb') as f:
68
+ f.write(audio_bytes)
69
+ st.audio(audio_bytes, format="audio/wav")
70
+ return filename
71
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def create_file(filename, prompt, response):
74
+ if filename.endswith(".txt"):
75
+ with open(filename, 'w') as file:
76
+ file.write(f"{prompt}\n{response}")
77
+ elif filename.endswith(".htm"):
78
+ with open(filename, 'w') as file:
79
+ file.write(f"{prompt} {response}")
80
+ elif filename.endswith(".md"):
81
+ with open(filename, 'w') as file:
82
+ file.write(f"{prompt}\n\n{response}")
83
+
84
+ def truncate_document(document, length):
85
+ return document[:length]
86
+ def divide_document(document, max_length):
87
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
88
+
89
+ def get_table_download_link(file_path):
90
+ with open(file_path, 'r') as file:
91
+ try:
92
+ data = file.read()
93
+ except:
94
+ st.write('')
95
+ return file_path
96
+ b64 = base64.b64encode(data.encode()).decode()
97
+ file_name = os.path.basename(file_path)
98
+ ext = os.path.splitext(file_name)[1] # get the file extension
99
+ if ext == '.txt':
100
+ mime_type = 'text/plain'
101
+ elif ext == '.py':
102
+ mime_type = 'text/plain'
103
+ elif ext == '.xlsx':
104
+ mime_type = 'text/plain'
105
+ elif ext == '.csv':
106
+ mime_type = 'text/plain'
107
+ elif ext == '.htm':
108
+ mime_type = 'text/html'
109
+ elif ext == '.md':
110
+ mime_type = 'text/markdown'
111
+ else:
112
+ mime_type = 'application/octet-stream' # general binary data type
113
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
114
+ return href
115
 
116
+ def CompressXML(xml_text):
117
+ root = ET.fromstring(xml_text)
118
+ for elem in list(root.iter()):
119
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
120
+ elem.parent.remove(elem)
121
+ return ET.tostring(root, encoding='unicode', method="xml")
122
 
123
+ def read_file_content(file,max_length):
124
+ if file.type == "application/json":
125
+ content = json.load(file)
126
+ return str(content)
127
+ elif file.type == "text/html" or file.type == "text/htm":
128
+ content = BeautifulSoup(file, "html.parser")
129
+ return content.text
130
+ elif file.type == "application/xml" or file.type == "text/xml":
131
+ tree = ET.parse(file)
132
+ root = tree.getroot()
133
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
134
+ return xml
135
+ elif file.type == "text/markdown" or file.type == "text/md":
136
+ md = mistune.create_markdown()
137
+ content = md(file.read().decode())
138
+ return content
139
+ elif file.type == "text/plain":
140
+ return file.getvalue().decode()
141
+ else:
142
+ return ""
143
+
144
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
145
+ model = model_choice
146
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
147
+ conversation.append({'role': 'user', 'content': prompt})
148
+ if len(document_section)>0:
149
+ conversation.append({'role': 'assistant', 'content': document_section})
150
+
151
+ start_time = time.time()
152
+ report = []
153
+ res_box = st.empty()
154
+ collected_chunks = []
155
+ collected_messages = []
156
+
157
+ for chunk in openai.ChatCompletion.create(
158
+ model='gpt-3.5-turbo',
159
+ messages=conversation,
160
+ temperature=0.5,
161
+ stream=True
162
+ ):
163
+
164
+ collected_chunks.append(chunk) # save the event response
165
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
166
+ collected_messages.append(chunk_message) # save the message
167
+
168
+ content=chunk["choices"][0].get("delta",{}).get("content")
169
+
170
+ try:
171
+ report.append(content)
172
+ if len(content) > 0:
173
+ result = "".join(report).strip()
174
+ #result = result.replace("\n", "")
175
+ res_box.markdown(f'*{result}*')
176
+ except:
177
+ st.write(' ')
178
+
179
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
180
+ st.write("Elapsed time:")
181
+ st.write(time.time() - start_time)
182
+ return full_reply_content
183
 
184
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
185
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
186
+ conversation.append({'role': 'user', 'content': prompt})
187
+ if len(file_content)>0:
188
+ conversation.append({'role': 'assistant', 'content': file_content})
189
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
190
+ return response['choices'][0]['message']['content']
191
+
192
+ def extract_mime_type(file):
193
+ # Check if the input is a string
194
+ if isinstance(file, str):
195
+ pattern = r"type='(.*?)'"
196
+ match = re.search(pattern, file)
197
+ if match:
198
+ return match.group(1)
199
+ else:
200
+ raise ValueError(f"Unable to extract MIME type from {file}")
201
+ # If it's not a string, assume it's a streamlit.UploadedFile object
202
+ elif isinstance(file, streamlit.UploadedFile):
203
+ return file.type
204
  else:
205
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ from io import BytesIO
208
+ import re
209
+
210
+ def extract_file_extension(file):
211
+ # get the file name directly from the UploadedFile object
212
+ file_name = file.name
213
+ pattern = r".*?\.(.*?)$"
214
+ match = re.search(pattern, file_name)
215
+ if match:
216
+ return match.group(1)
217
+ else:
218
+ raise ValueError(f"Unable to extract file extension from {file_name}")
219
+
220
+ def pdf2txt(docs):
221
+ text = ""
222
+ for file in docs:
223
+ file_extension = extract_file_extension(file)
224
+ # print the file extension
225
+ st.write(f"File type extension: {file_extension}")
226
+
227
+ # read the file according to its extension
228
+ try:
229
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
230
+ text += file.getvalue().decode('utf-8')
231
+ elif file_extension.lower() == 'pdf':
232
+ from PyPDF2 import PdfReader
233
+ pdf = PdfReader(BytesIO(file.getvalue()))
234
+ for page in range(len(pdf.pages)):
235
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
236
+ except Exception as e:
237
+ st.write(f"Error processing file {file.name}: {e}")
238
+
239
+ return text
240
 
241
+ def pdf2txt_old(pdf_docs):
242
+ st.write(pdf_docs)
243
+ for file in pdf_docs:
244
+ mime_type = extract_mime_type(file)
245
+ st.write(f"MIME type of file: {mime_type}")
246
 
247
+ text = ""
248
+ for pdf in pdf_docs:
249
+ pdf_reader = PdfReader(pdf)
250
+ for page in pdf_reader.pages:
251
+ text += page.extract_text()
252
+ return text
253
+
254
+ def txt2chunks(text):
255
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
256
+ return text_splitter.split_text(text)
257
+
258
+ def vector_store(text_chunks):
259
+ key = os.getenv('OPENAI_API_KEY')
260
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
261
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
262
+
263
+ def get_chain(vectorstore):
264
+ llm = ChatOpenAI()
265
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
266
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
267
+
268
+ def process_user_input(user_question):
269
+ response = st.session_state.conversation({'question': user_question})
270
+ st.session_state.chat_history = response['chat_history']
271
+ for i, message in enumerate(st.session_state.chat_history):
272
+ template = user_template if i % 2 == 0 else bot_template
273
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
274
+ # Save file output from PDF query results
275
+ filename = generate_filename(user_question, 'txt')
276
+ create_file(filename, user_question, message.content)
277
+
278
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
279
+
280
+ def divide_prompt(prompt, max_length):
281
+ words = prompt.split()
282
+ chunks = []
283
+ current_chunk = []
284
+ current_length = 0
285
+ for word in words:
286
+ if len(word) + current_length <= max_length:
287
+ current_length += len(word) + 1 # Adding 1 to account for spaces
288
+ current_chunk.append(word)
289
+ else:
290
+ chunks.append(' '.join(current_chunk))
291
+ current_chunk = [word]
292
+ current_length = len(word)
293
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
294
+ return chunks
295
 
296
+ def main():
297
+ # Sidebar and global
298
+ openai.api_key = os.getenv('OPENAI_API_KEY')
299
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner",layout="wide")
300
+
301
+ # File type for output, model choice
302
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"] #619
303
+ choice = st.sidebar.selectbox("Output File Type:", menu)
304
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
305
+
306
+ # Audio, transcribe, GPT:
307
+ filename = save_and_play_audio(audio_recorder)
308
+ if filename is not None:
309
+ transcription = transcribe_audio(openai.api_key, filename, "whisper-1")
310
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
311
+ filename=None # since transcription is finished next time just use the saved transcript
312
+
313
+ # prompt interfaces
314
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
315
+
316
+ # file section interface for prompts against large documents as context
317
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
318
+ with collength:
319
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
320
+ with colupload:
321
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx","csv","html", "htm", "md", "txt"])
322
+
323
+ # Document section chat
324
+ document_sections = deque()
325
+ document_responses = {}
326
+ if uploaded_file is not None:
327
+ file_content = read_file_content(uploaded_file, max_length)
328
+ document_sections.extend(divide_document(file_content, max_length))
329
+ if len(document_sections) > 0:
330
+ if st.button("πŸ‘οΈ View Upload"):
331
+ st.markdown("**Sections of the uploaded file:**")
332
+ for i, section in enumerate(list(document_sections)):
333
+ st.markdown(f"**Section {i+1}**\n{section}")
334
+ st.markdown("**Chat with the model:**")
335
+ for i, section in enumerate(list(document_sections)):
336
+ if i in document_responses:
337
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
338
+ else:
339
+ if st.button(f"Chat about Section {i+1}"):
340
+ st.write('Reasoning with your inputs...')
341
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
342
+ st.write('Response:')
343
+ st.write(response)
344
+ document_responses[i] = response
345
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
346
+ create_file(filename, user_prompt, response)
347
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
348
+
349
+ if st.button('πŸ’¬ Chat'):
350
+ st.write('Reasoning with your inputs...')
351
+
352
+ #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
353
+
354
+ # Divide the user_prompt into smaller sections
355
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
356
+ full_response = ''
357
+ for prompt_section in user_prompt_sections:
358
+ # Process each section with the model
359
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
360
+ full_response += response + '\n' # Combine the responses
361
+
362
+ #st.write('Response:')
363
+ #st.write(full_response)
364
+
365
+ response = full_response
366
+ st.write('Response:')
367
+ st.write(response)
368
+
369
+ filename = generate_filename(user_prompt, choice)
370
+ create_file(filename, user_prompt, response)
371
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
372
+
373
+ all_files = glob.glob("*.*")
374
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
375
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
376
+
377
+ # sidebar of files
378
+ file_contents=''
379
+ next_action=''
380
+ for file in all_files:
381
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
382
+ with col1:
383
+ if st.button("🌐", key="md_"+file): # md emoji button
384
+ with open(file, 'r') as f:
385
+ file_contents = f.read()
386
+ next_action='md'
387
+ with col2:
388
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
389
+ with col3:
390
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
391
+ with open(file, 'r') as f:
392
+ file_contents = f.read()
393
+ next_action='open'
394
+ with col4:
395
+ if st.button("πŸ”", key="read_"+file): # search emoji button
396
+ with open(file, 'r') as f:
397
+ file_contents = f.read()
398
+ next_action='search'
399
+ with col5:
400
+ if st.button("πŸ—‘", key="delete_"+file):
401
+ os.remove(file)
402
+ st.experimental_rerun()
403
+
404
+ if len(file_contents) > 0:
405
+ if next_action=='open':
406
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
407
+ if next_action=='md':
408
+ st.markdown(file_contents)
409
+ if next_action=='search':
410
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
411
+ st.write('Reasoning with your inputs...')
412
+ response = chat_with_model(user_prompt, file_contents, model_choice)
413
+ filename = generate_filename(file_contents, choice)
414
+ create_file(filename, file_contents, response)
415
+
416
+ st.experimental_rerun()
417
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
418
+
419
+ if __name__ == "__main__":
420
+ main()
421
+
422
+ load_dotenv()
423
+ st.write(css, unsafe_allow_html=True)
424
+
425
+ st.header("Chat with documents :books:")
426
+ user_question = st.text_input("Ask a question about your documents:")
427
+ if user_question:
428
+ process_user_input(user_question)
429
+
430
+ with st.sidebar:
431
+ st.subheader("Your documents")
432
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
433
+ with st.spinner("Processing"):
434
+ raw = pdf2txt(docs)
435
+ if len(raw) > 0:
436
+ length = str(len(raw))
437
+ text_chunks = txt2chunks(raw)
438
+ vectorstore = vector_store(text_chunks)
439
+ st.session_state.conversation = get_chain(vectorstore)
440
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
441
+ filename = generate_filename(raw, 'txt')
442
+ create_file(filename, raw, '')