Spaces:

allinaigc
/

coder001

Running

App Files Files Community

allinaigc commited on Sep 24, 2023

Commit

a44d8a9

1 Parent(s): 7723f88

Upload 6 files

Browse files

Files changed (3) hide show

app.py +312 -100
localKB_construct copy.py +101 -0
save_database_info.py +47 -0

app.py CHANGED Viewed

@@ -16,12 +16,9 @@
         # credentials["usernames"].update({un:user_dict})
         credentials["usernames"].update({un: user_dict})
 '''
-# TODO：1. Chinese display isssue. 2. account system.
-from dotenv import load_dotenv  # pip3 install python-dotenv
 import database as db
 from deta import Deta  # pip3 install deta
 import requests
@@ -31,7 +28,6 @@ from codeinterpreterapi import CodeInterpreterSession
 import openai
 import os
 import matplotlib.pyplot as plt
-import xlrd
 import pandas as pd
 # import csv
 import tempfile
@@ -44,14 +40,21 @@ from time import sleep
 import streamlit_authenticator as stauth
 import database as db  # python文件同目录下的.py程序，直接导入。
 import deta
 os.environ["OPENAI_API_KEY"] = os.environ['user_token']
 openai.api_key = os.environ['user_token']
-bing_search_api_key = os.environ['bing_api_key']
-bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search'
 # os.environ["VERBOSE"] = "True"  # 可以看到具体的错误？
-# # #* 如果碰到接口问题，可以启用如下设置。
 # openai.proxy = {
 #     "http": "http://127.0.0.1:7890",
 #             "https": "http://127.0.0.1:7890"
@@ -72,40 +75,80 @@ if reset_button:
     st.session_state.messages = []
     message_placeholder = st.empty()
-# with tab2:
-def upload_file(uploaded_file):
-    if uploaded_file is not None:
-        filename = uploaded_file.name
-        st.write(filename)  # print out the whole file name to validate.
-        try:
-            if '.csv' in filename:
-                csv_file = pd.read_csv(uploaded_file)
-                csv_file.to_csv('./upload.csv', encoding='utf-8', index=False)
-                st.write(csv_file[:3])  # 这里只是显示文件，后面需要定位文件所在的绝对路径。
-            else:
-                xls_file = pd.read_excel(uploaded_file)
-                xls_file.to_csv('./upload.csv', index=False)
-                st.write(xls_file[:3])
-        except Exception as e:
-            st.write(e)
-        uploaded_file_name = "File_provided"
-        temp_dir = tempfile.TemporaryDirectory()
-        # ! working.
-        uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
-        # with open('./upload.csv', 'wb') as output_temporary_file:
-        with open(f'./{name}_upload.csv', 'wb') as output_temporary_file:
-            # print(f'./{name}_upload.csv')
-            # ! 必须用这种格式读入内容，然后才可以写入temporary文件夹中。
-            # output_temporary_file.write(uploaded_file.getvalue())
-            output_temporary_file.write(uploaded_file.getvalue())
-            # st.write(uploaded_file_path)  # * 可以查看文件是否真实存在，然后是否可以
-        # st.write('Now file saved successfully.')
-        return None
 def search(query):
@@ -129,17 +172,16 @@ def search(query):
 # openai.api_key = st.secrets["OPENAI_API_KEY"]
 async def text_mode():
     # Set a default model
     if "openai_model" not in st.session_state:
         st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
     if radio_1 == 'GPT-3.5':
         # print('----------'*5)
-        # print('radio_1: GPT-3.5 starts!')
         st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
     else:
-        # print('radio_1: GPT-4.0 starts!')
         st.session_state["openai_model"] = "gpt-4"
     # Initialize chat history
@@ -154,8 +196,8 @@ async def text_mode():
     # Display assistant response in chat message container
     # if prompt := st.chat_input("Say something"):
     prompt = st.chat_input("Say something")
-    # print('prompt now:', prompt)
-    # print('----------'*5)
     # if prompt:
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
@@ -167,7 +209,7 @@ async def text_mode():
             full_response = ""
             if radio_2 == '联网模式':
-                # print('联网模式入口，prompt:', prompt)
                 input_message = prompt
                 internet_search_result = search(input_message)
                 search_prompt = [
@@ -197,8 +239,8 @@ async def text_mode():
                 st.session_state.messages = []
             if radio_2 == '核心模式':
-                # print('GPT only starts!!!')
-                # print('messages:', st.session_state['messages'])
                 for response in openai.ChatCompletion.create(
                     model=st.session_state["openai_model"],
                     # messages=[
@@ -218,10 +260,95 @@ async def text_mode():
                 {"role": "assistant", "content": full_response})
 async def data_mode():
-    # print('数据分析模式启动！')
     # uploaded_file_path = './upload.csv'
-    uploaded_file_path = f'./{name}_upload.csv'
     # # st.write(f"passed file path in data_mode: {uploaded_file_path}")
     # tmp1 = pd.read_csv('./upload.csv')
     # st.write(tmp1[:5])
@@ -238,8 +365,8 @@ async def data_mode():
     # Display assistant response in chat message container
     # if prompt := st.chat_input("Say something"):
     prompt = st.chat_input("Say something")
-    # print('prompt now:', prompt)
-    # print('----------'*5)
     # if prompt:
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
@@ -269,7 +396,7 @@ async def data_mode():
                 user_request = environ_settings + "\n\n" + \
                     "你需要完成以下任务：\n\n" + prompt + "\n\n" \
                     f"注：文件位置在{uploaded_file_path}"
-                # print('user_request: \n', user_request)
                 # 加载上传的文件，主要路径在上面代码中。
                 files = [File.from_path(str(uploaded_file_path))]
@@ -281,7 +408,7 @@ async def data_mode():
                     )
                     # output to the user
-                    # print("AI: ", response.content)
                     full_response = response.content
                     ### full_response = "this is full response"
@@ -306,19 +433,21 @@ async def data_mode():
         # st.session_state.messages.append({"role": "assistant", "content": full_response})
-# authentication with a local yaml file.
-# import yaml
-# from yaml.loader import SafeLoader
-# with open('/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/config.yaml') as file:
-#     config = yaml.load(file, Loader=SafeLoader)
-# authenticator = stauth.Authenticate(
-#     config['credentials'],
-#     config['cookie']['name'],
-#     config['cookie']['key'],
-#     config['cookie']['expiry_days'],
-#     config['preauthorized']
-# )
 # authentication with a remove cloud-based database.
 # 导入云端用户数据库。
@@ -329,32 +458,28 @@ async def data_mode():
 # deta = Deta(DETA_KEY)
-# mybase is the name of the database in Deta. You can change it to any name you want.
-credentials = {"usernames":{}}
-# credentials = {"users": {}}
-# db = db()
-users = []
-email = []
-passwords = []
-names = []
-for row in db.fetch_all_users():
-    # users.append(row["key"])
-    # names.append(row["username"])
-    users.append(row["username"])
-    email.append(row["email"])
-    names.append(row["key"])
-    passwords.append(row["password"])
-hashed_passwords = stauth.Hasher(passwords).generate()
 ## 需要严格的按照yaml文件的格式来定义如下几个字段。
-for un, name, pw in zip(users, names, hashed_passwords):
-    # user_dict = {"name":name,"password":pw}
-    user_dict = {"name": un, "password": pw}
-    # credentials["usernames"].update({un:user_dict})
-    credentials["usernames"].update({un: user_dict})
 # ## sign-up模块，未完成。
 # database_table = []
@@ -366,12 +491,8 @@ for un, name, pw in zip(users, names, hashed_passwords):
 #     database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']])
 # print("database_table:",database_table)
-authenticator = stauth.Authenticate(
-    credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30)
-user, authentication_status, username = authenticator.login('用户登录', 'main')
-# print("name", name, "username", username)
 # ## sign-up widget，未完成。
 # try:
@@ -383,6 +504,11 @@ user, authentication_status, username = authenticator.login('用户登录', 'mai
 #         st.success('注册成功！')
 # except Exception as e:
 #     st.error(e)
 if authentication_status:
     with st.sidebar:
@@ -419,7 +545,7 @@ if authentication_status:
             with st.text(body="说明"):
                 st.markdown("* “GPT-4”回答质量极佳，但速度缓慢、且不支持长文。建议适当使用。")
             with st.text(body="说明"):
-                st.markdown("* “联网模式”与搜索引擎一致，仅限一轮对话，不会保持之前的会话记录。")
             with st.text(body="说明"):
                 st.markdown(
                     "* “数据模式”暂时只支持1000个单元格以内的数据分析，单元格中的内容不支持中文数据（表头也尽量不使用中文）。一般���行时间在1-5分钟左右，期间需要保持网络畅通。")
@@ -458,28 +584,114 @@ if authentication_status:
     col1, col2 = st.columns(spec=[1, 2])
     radio_2 = col2.radio(label='模式选择', options=[
         '核心模式', '联网模式', '知识库模式', '数据模式'], horizontal=True, label_visibility='visible')
-    # radio_1 = col1.selectbox(label='ChatGPT版本', options=[
-    #     'GPT-3.5', 'GPT-4.0'], label_visibility='visible')
     radio_1 = col1.radio(label='ChatGPT版本', options=[
         'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible')
 elif authentication_status == False:
     st.error('⛔ 用户名或密码错误！')
 elif authentication_status == None:
-    st.warning('🔼 请先登录！')
 if __name__ == "__main__":
     import asyncio
     try:
         if radio_2 == "核心模式":
-            # print(f'radio 选择了 {radio_2}')
             # * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘
             asyncio.run(text_mode())
         if radio_2 == "联网模式":
-            # print(f'radio 选择了 {radio_2}')
-            # * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘
             asyncio.run(text_mode())
         if radio_2 == "数据模式":
             uploaded_file = st.file_uploader(
                 "选择一个文件", type=(["csv", "xlsx", "xls"]))

         # credentials["usernames"].update({un:user_dict})
         credentials["usernames"].update({un: user_dict})
 '''
+# TODO：1. Chinese display isssue. 2. account system. 3. local enterprise database.
 import database as db
 from deta import Deta  # pip3 install deta
 import requests
 import openai
 import os
 import matplotlib.pyplot as plt
 import pandas as pd
 # import csv
 import tempfile
 import streamlit_authenticator as stauth
 import database as db  # python文件同目录下的.py程序，直接导入。
 import deta
+from langchain.chat_models import ChatOpenAI
+from llama_index import StorageContext, load_index_from_storage, GPTVectorStoreIndex, LLMPredictor, PromptHelper
+from llama_index import ServiceContext, QuestionAnswerPrompt
+import sys
+import time
+import PyPDF2 ## read the local_KB PDF file.
+# import localKB_construct
+import save_database_info
+from datetime import datetime
 os.environ["OPENAI_API_KEY"] = os.environ['user_token']
 openai.api_key = os.environ['user_token']
 # os.environ["VERBOSE"] = "True"  # 可以看到具体的错误？
+# #* 如果碰到接口问题，可以启用如下设置。
 # openai.proxy = {
 #     "http": "http://127.0.0.1:7890",
 #             "https": "http://127.0.0.1:7890"
     st.session_state.messages = []
     message_placeholder = st.empty()
+def clear_all():
+    st.session_state.conversation = None
+    st.session_state.chat_history = None
+    st.session_state.messages = []
+    message_placeholder = st.empty()
+    return None
+# # with tab2:
+# def upload_file(uploaded_file):
+#     if uploaded_file is not None:
+#         filename = uploaded_file.name
+#         # st.write(filename)  # print out the whole file name to validate. not to show in the final version.
+#         try:
+#             if '.pdf' in filename:
+#                 # pdf_file = PyPDF2.PdfReader(uploaded_file)
+#                 PyPDF2.PdfReader(uploaded_file)
+#                 # st.write(pdf_file.pages[0].extract_text())
+#                 # with st.status('正在为您解析新知识库...', expanded=False, state='running') as status:
+#                 spinner = st.spinner('正在为您解析新知识库...请耐心等待')
+#                 # with st.spinner('正在为您解析新知识库...请耐心等待'):
+#                 with spinner:
+#                     import localKB_construct
+#                     # sleep(3)
+#                     # st.write(upload_file)
+#                     localKB_construct.process_file(uploaded_file)
+#                     st.markdown('新知识库解析成功，可以开始对话！')
+#                     spinner = st.empty()
+#                     # sleep(3)
+#                     # display = []
+#             else:
+#                 if '.csv' in filename:
+#                     csv_file = pd.read_csv(uploaded_file)
+#                     csv_file.to_csv('./upload.csv', encoding='utf-8', index=False)
+#                     st.write(csv_file[:3])  # 这里只是显示文件，后面需要定位文件所在的绝对路径。
+#                 else:
+#                     xls_file = pd.read_excel(uploaded_file)
+#                     xls_file.to_csv('./upload.csv', index=False)
+#                     st.write(xls_file[:3])
+#                 uploaded_file_name = "File_provided"
+#                 temp_dir = tempfile.TemporaryDirectory()
+#                 # ! working.
+#                 uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
+#                 # with open('./upload.csv', 'wb') as output_temporary_file:
+#                 with open(f'./{name}_upload.csv', 'wb') as output_temporary_file:
+#                     # print(f'./{name}_upload.csv')
+#                     # ! 必须用这种格式读入内容，然后才可以写入temporary文件夹中。
+#                     # output_temporary_file.write(uploaded_file.getvalue())
+#                     output_temporary_file.write(uploaded_file.getvalue())
+#                     # st.write(uploaded_file_path)  #* 可以查看文件是否真实存在，然后是否可以
+#                 # st.write('Now file saved successfully.')
+#         except Exception as e:
+#             st.write(e)
+#         # uploaded_file_name = "File_provided"
+#         # temp_dir = tempfile.TemporaryDirectory()
+#         # # ! working.
+#         # uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
+#         # # with open('./upload.csv', 'wb') as output_temporary_file:
+#         # with open(f'./{name}_upload.csv', 'wb') as output_temporary_file:
+#         #     # print(f'./{name}_upload.csv')
+#         #     # ! 必须用这种格式读入内容，然后才可以写入temporary文件夹中。
+#         #     # output_temporary_file.write(uploaded_file.getvalue())
+#         #     output_temporary_file.write(uploaded_file.getvalue())
+#         #     # st.write(uploaded_file_path)  # * 可以查看文件是否真实存在，然后是否可以
+#         # # st.write('Now file saved successfully.')
+#         return None
+bing_search_api_key = os.environ['bing_api_key']
+bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search'
 def search(query):
 # openai.api_key = st.secrets["OPENAI_API_KEY"]
 async def text_mode():
     # Set a default model
     if "openai_model" not in st.session_state:
         st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
     if radio_1 == 'GPT-3.5':
         # print('----------'*5)
+        print('radio_1: GPT-3.5 starts!')
         st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
     else:
+        print('radio_1: GPT-4.0 starts!')
         st.session_state["openai_model"] = "gpt-4"
     # Initialize chat history
     # Display assistant response in chat message container
     # if prompt := st.chat_input("Say something"):
     prompt = st.chat_input("Say something")
+    print('prompt now:', prompt)
+    print('----------'*5)
     # if prompt:
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
             full_response = ""
             if radio_2 == '联网模式':
+                print('联网模式入口，prompt:', prompt)
                 input_message = prompt
                 internet_search_result = search(input_message)
                 search_prompt = [
                 st.session_state.messages = []
             if radio_2 == '核心模式':
+                print('GPT only starts!!!')
+                print('messages:', st.session_state['messages'])
                 for response in openai.ChatCompletion.create(
                     model=st.session_state["openai_model"],
                     # messages=[
                 {"role": "assistant", "content": full_response})
+## load the local_KB PDF file.
+# async def localKB_mode():
+def localKB_mode(username):
+    ### clear all the prior conversation.
+    st.session_state.conversation = None
+    st.session_state.chat_history = None
+    st.session_state.messages = []
+    message_placeholder = st.empty()
+    print('now starts the local KB version of ChatGPT')
+    # Initialize chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Display assistant response in chat message container
+    # if prompt := st.chat_input("Say something"):
+    # prompt = st.chat_input("Say something")
+    # print('prompt now:', prompt)
+    # print('----------'*5)
+    # if prompt:
+    if prompt := st.chat_input("Say something"):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        with st.status('检索中...', expanded=True, state='running') as status:
+            with st.chat_message("assistant"):
+                message_placeholder = st.empty()
+                full_response = ""
+            # if radio_2 == "知识库模式":
+                # ! 这里需要重新装载一下storage_context。
+                QA_PROMPT_TMPL = (
+                "We have provided context information below. \n"
+                "---------------------\n"
+                "{context_str}"
+                "\n---------------------\n"
+                "Given all this information, please answer the following questions,"
+                "You MUST use the SAME language as the question:\n"
+                "{query_str}\n")
+                QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL)
+                # print('QA_PROMPT:', QA_PROMPT)
+                # llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.8, model_name="gpt-3.5-turbo", max_tokens=4024,streaming=True))
+                # # print('llm_predictor:', llm_predictor)
+                # prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
+                # print('prompt_helper:', prompt_helper)
+                # service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+                # print('service_context:', service_context)
+                # # # index = load_index_from_storage(storage_context)
+                # print("storage_context:", storage_context)
+                # index = load_index_from_storage(storage_context,service_context=service_context)
+                storage_context = StorageContext.from_defaults(persist_dir=f"./{username}/")
+                print('storage_context:',storage_context)
+                index = load_index_from_storage(storage_context)
+                # query_engine = index.as_query_engine(streaming=True, similarity_top_k=3, text_qa_template=QA_PROMPT)
+                query_engine = index.as_query_engine(streaming=True)
+                # query_engine = index.as_query_engine(streaming=True, text_qa_template=QA_PROMPT)
+                # query_engine = index.as_query_engine(streaming=False, text_qa_template=QA_PROMPT)
+                # query_engine = index.as_query_engine()
+                # reply = query_engine.query(prompt)
+                llama_index_reply = query_engine.query(prompt)
+                # full_response += query_engine.query(prompt)
+                print('local KB reply:', llama_index_reply)
+                # query_engine.query(prompt).print_response_stream() #* 能在terminal中流式输出。
+                # for resp in llama_index_reply.response_gen:
+                #     print(resp)
+                #     full_response += resp
+                #     message_placeholder.markdown(full_response + "▌")
+                message_placeholder.markdown(llama_index_reply)
+                # st.session_state.messages.append(
+                #     {"role": "assistant", "content": full_response})
+                # st.session_state.messages = []
+                # full_response += reply
+                # full_response = reply
+                # st.session_state.messages.append(
+                #     {"role": "assistant", "content": full_response})
 async def data_mode():
+    print('数据分析模式启动！')
     # uploaded_file_path = './upload.csv'
+    # uploaded_file_path = f'./{joejoe}_upload.csv'
+    uploaded_file_path = f'./joejoe_upload.csv'
     # # st.write(f"passed file path in data_mode: {uploaded_file_path}")
     # tmp1 = pd.read_csv('./upload.csv')
     # st.write(tmp1[:5])
     # Display assistant response in chat message container
     # if prompt := st.chat_input("Say something"):
     prompt = st.chat_input("Say something")
+    print('prompt now:', prompt)
+    print('----------'*5)
     # if prompt:
     if prompt:
         st.session_state.messages.append({"role": "user", "content": prompt})
                 user_request = environ_settings + "\n\n" + \
                     "你需要完成以下任务：\n\n" + prompt + "\n\n" \
                     f"注：文件位置在{uploaded_file_path}"
+                print('user_request: \n', user_request)
                 # 加载上传的文件，主要路径在上面代码中。
                 files = [File.from_path(str(uploaded_file_path))]
                     )
                     # output to the user
+                    print("AI: ", response.content)
                     full_response = response.content
                     ### full_response = "this is full response"
         # st.session_state.messages.append({"role": "assistant", "content": full_response})
+### authentication with a local yaml file.
+import yaml
+from yaml.loader import SafeLoader
+with open('/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/config.yaml') as file:
+    config = yaml.load(file, Loader=SafeLoader)
+authenticator = stauth.Authenticate(
+    config['credentials'],
+    config['cookie']['name'],
+    config['cookie']['key'],
+    config['cookie']['expiry_days'],
+    config['preauthorized']
+)
+###'''authentication with a remove cloud-based database.'''
 # authentication with a remove cloud-based database.
 # 导入云端用户数据库。
 # deta = Deta(DETA_KEY)
+# # mybase is the name of the database in Deta. You can change it to any name you want.
+# credentials = {"usernames":{}}
+# users = []
+# email = []
+# passwords = []
+# names = []
+# for row in db.fetch_all_users():
+#     users.append(row["username"])
+#     email.append(row["email"])
+#     names.append(row["key"])
+#     passwords.append(row["password"])
+# hashed_passwords = stauth.Hasher(passwords).generate()
 ## 需要严格的按照yaml文件的格式来定义如下几个字段。
+# for un, name, pw in zip(users, names, hashed_passwords):
+#     # user_dict = {"name":name,"password":pw}
+#     user_dict = {"name": un, "password": pw}
+#     # credentials["usernames"].update({un:user_dict})
+#     credentials["usernames"].update({un: user_dict})
 # ## sign-up模块，未完成。
 # database_table = []
 #     database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']])
 # print("database_table:",database_table)
+# authenticator = stauth.Authenticate(
+#     credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30)
 # ## sign-up widget，未完成。
 # try:
 #         st.success('注册成功！')
 # except Exception as e:
 #     st.error(e)
+''''''
+# user, authentication_status, username = authenticator.login('用户登录', 'main')
+user, authentication_status, username = authenticator.login('用户登录', 'sidebar')
+# print("name", name, "username", username)
 if authentication_status:
     with st.sidebar:
             with st.text(body="说明"):
                 st.markdown("* “GPT-4”回答质量极佳，但速度缓慢、且不支持长文。建议适当使用。")
             with st.text(body="说明"):
+                st.markdown("* “联网模式”和“知识库模式”均基于检索功能，仅限一轮对话，不会保持之前的会话记录。")
             with st.text(body="说明"):
                 st.markdown(
                     "* “数据模式”暂时只支持1000个单元格以内的数据分析，单元格中的内容不支持中文数据（表头也尽量不使用中文）。一般���行时间在1-5分钟左右，期间需要保持网络畅通。")
     col1, col2 = st.columns(spec=[1, 2])
     radio_2 = col2.radio(label='模式选择', options=[
         '核心模式', '联网模式', '知识库模式', '数据模式'], horizontal=True, label_visibility='visible')
     radio_1 = col1.radio(label='ChatGPT版本', options=[
         'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible')
 elif authentication_status == False:
     st.error('⛔ 用户名或密码错误！')
 elif authentication_status == None:
+    st.warning('⬅ 请先登录！')
+### 上传文件的模块
+def upload_file(uploaded_file):
+    if uploaded_file is not None:
+        filename = uploaded_file.name
+        # st.write(filename)  # print out the whole file name to validate. not to show in the final version.
+        try:
+            if '.pdf' in filename:
+                # pdf_file = PyPDF2.PdfReader(uploaded_file)
+                PyPDF2.PdfReader(uploaded_file)
+                # st.write(pdf_file.pages[0].extract_text())
+                # with st.status('正在为您解析新知识库...', expanded=False, state='running') as status:
+                spinner = st.spinner('正在为您解析新知识库...请耐心等待')
+                # with st.spinner('正在为您解析新知识库...请耐心等待'):
+                with spinner:
+                    import localKB_construct
+                    # st.write(upload_file)
+                    localKB_construct.process_file(uploaded_file, username)
+                    save_database_info.save_database_info(f'./{username}/database_name.csv', filename, str(datetime.now().strftime("%Y-%m-%d %H:%M")))
+                    st.markdown('新知识库解析成功，请务必刷新页面，然后开启对话 🔁')
+                    # spinner = st.empty()
+            else:
+                if '.csv' in filename:
+                    csv_file = pd.read_csv(uploaded_file)
+                    csv_file.to_csv(f'./{username}/upload.csv', encoding='utf-8', index=False)
+                    st.write(csv_file[:3])  # 这里只是显示文件，后面需要定位文件所在的绝对路径。
+                else:
+                    xls_file = pd.read_excel(uploaded_file)
+                    xls_file.to_csv(f'./{username}/upload.csv', index=False)
+                    st.write(xls_file[:3])
+                uploaded_file_name = "File_provided"
+                temp_dir = tempfile.TemporaryDirectory()
+                # ! working.
+                uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
+                # with open('./upload.csv', 'wb') as output_temporary_file:
+                with open(f'./{username}_upload.csv', 'wb') as output_temporary_file:
+                    # print(f'./{name}_upload.csv')
+                    # ! 必须用这种格式读入内容，然后才可以写入temporary文件夹中。
+                    # output_temporary_file.write(uploaded_file.getvalue())
+                    output_temporary_file.write(uploaded_file.getvalue())
+                    # st.write(uploaded_file_path)  #* 可以查看文件是否真实存在，然后是否可以
+                # st.write('Now file saved successfully.')
+        except Exception as e:
+            st.write(e)
+        ## 以下代码是为了解决上传文件后，文件路径和文件名不对的问题。
+        # uploaded_file_name = "File_provided"
+        # temp_dir = tempfile.TemporaryDirectory()
+        # # ! working.
+        # uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
+        # # with open('./upload.csv', 'wb') as output_temporary_file:
+        # with open(f'./{name}_upload.csv', 'wb') as output_temporary_file:
+        #     # print(f'./{name}_upload.csv')
+        #     # ! 必须用这种格式读入内容，然后才可以写入temporary文件夹中。
+        #     # output_temporary_file.write(uploaded_file.getvalue())
+        #     output_temporary_file.write(uploaded_file.getvalue())
+        #     # st.write(uploaded_file_path)  # * 可以查看文件是否真实存在，然后是否可以
+        # # st.write('Now file saved successfully.')
+        return None
 if __name__ == "__main__":
     import asyncio
     try:
         if radio_2 == "核心模式":
+            print(f'radio 选择了 {radio_2}')
             # * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘
             asyncio.run(text_mode())
         if radio_2 == "联网模式":
+            print(f'radio 选择了 {radio_2}')
             asyncio.run(text_mode())
+        if radio_2 == "知识库模式":
+            print(f'radio 选择了 {radio_2}')
+            path = f'./{username}/vector_store.json'
+            if os.path.exists(path):
+                database_info = pd.read_csv(f'./{username}/database_name.csv')
+                current_database_name = database_info.iloc[-1][0]
+                current_database_date = database_info.iloc[-1][1]
+                database_claim = f"当前知识库为：{current_database_name}，创建于{current_database_date}。可以开始提问！"
+                st.markdown(database_claim)
+                # st.markdown("注意：系统中已经存在一个知识库，您现在可以开始提问！")
+            uploaded_file = st.file_uploader(
+                "选择上传一个新知识库", type=(["pdf"]))
+            # 默认状态下没有上传文件，None，会报错。需要判断。
+            if uploaded_file is not None:
+                # uploaded_file_path = upload_file(uploaded_file)
+                upload_file(uploaded_file)
+                # st.write('PDF file uploaded sucessfully!')
+                # clear_all()
+                # spinner = st.empty()
+            localKB_mode(username)
+            # asyncio.run(localKB_mode())
         if radio_2 == "数据模式":
             uploaded_file = st.file_uploader(
                 "选择一个文件", type=(["csv", "xlsx", "xls"]))

localKB_construct copy.py ADDED Viewed

	@@ -0,0 +1,101 @@

+'''
+1.更新了llama-index的库。对应的函数名和用法都有所改变。
+'''
+# import gradio as gr
+import openai
+import requests
+import csv
+from llama_index import PromptHelper
+# from llama_index import GPTSimpleVectorIndex ## renamed in the latest version.
+from llama_index import LLMPredictor
+from llama_index import ServiceContext
+from langchain.chat_models import ChatOpenAI
+from langchain import OpenAI
+from fastapi import FastAPI #* 实现流式数据
+from fastapi.responses import StreamingResponse #* 实现流式数据
+import sys
+import os
+import torch
+import math
+import pandas as pd
+import numpy as np
+import PyPDF2
+# from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper #* working in the previous version.
+##* in the latest version: GPTSimpleVectorIndex was renamed to GPTVectorStoreIndex, try removing it from the end of your imports
+from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTVectorStoreIndex, LLMPredictor, PromptHelper
+from llama_index import StorageContext, load_index_from_storage
+from llama_index import ServiceContext
+from llama_index import download_loader
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+import sys
+import os
+from rich import print
+## enironment settings.
+os.environ["OPENAI_API_KEY"] = "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"
+openai.api_key = "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"
+# file_path = "/Users/yunshi/Downloads/txt_dir/Sparks_of_AGI.pdf"
+# file_path = "/Users/yunshi/Downloads/txt_dir/2023年百人会电动论坛 纪要 20230401.pdf"
+## 建立index或者的过程。
+def construct_index(directory_path):
+    # file_path = f"{directory_path}/uploaded_file.pdf"
+    file_path = directory_path
+    # set maximum input si771006
+    # max_input_size = 4096 #* working
+    max_input_size = 4096
+    # set number of output tokens
+    # num_outputs = 3000 #* working
+    num_outputs = 1000
+    # set maximum chunk overlap
+    max_chunk_overlap = -1000 #* working
+    # set chunk size limit
+    # chunk_size_limit = 600
+    chunk_size_limit = 6000 #* working
+    # ## add chunk_overlap_ratio according to github.
+    # chunk_overlap_ratio= 0.1
+    # define LLM
+    # llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=2000))
+    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo-16k", max_tokens=512,streaming=True))
+    ## 好像work了，2023.09.22, 注意这里的写法有调整。
+    # prompt_helper = PromptHelper(max_input_s≈ize, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
+    prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio= 0.1, chunk_size_limit=chunk_size_limit)
+    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+    ## 如果是txt文件，那么需要用如下命令。注意与PDF文件的区别。
+    # documents = SimpleDirectoryReader(directory_path).load_data()
+    ## 如果是PDF文件，那么需要用如下命令。注意与txt文件的区别。切需要from llama_index import download_loader。
+    #NOTE: 这里可以问：give me an example of GPT-4 solving math problem. 会回答关于这个PDF中的内容，所以可以确认这个程序调用了in-context learning的功能。
+    CJKPDFReader = download_loader("CJKPDFReader")
+    loader = CJKPDFReader()
+    # documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身，而不同于txt文件的指文件夹。
+    documents = loader.load_data(file=directory_path) #! 注意这里是指向文件本身，而不同于txt文件的指文件夹。
+    # index = GPTSimpleVectorIndex(
+    #     documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper
+    # )
+    # index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) ## oringinal version, working.
+    index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) #* the funciton renamed.
+    # index.save_to_disk('/Users/yunshi/Downloads/txt_dir/index.json') ## in the latest version, this function is not working.
+    return index, service_context
+def process_file():
+    print('process_file starts')
+    file_path = "/Users/yunshi/Downloads/txt_dir/Sparks_of_AGI.pdf"
+    #! 第一次运行是需要开启这个function。如果测试通过index，因此不需要在运行了。记得上传PDF和JSON文件到云服务器上。
+    index, service_context = construct_index(file_path)
+    # index.storage_context.persist(persist_dir="/Users/yunshi/Downloads/txt_dir/") #* 存储到本地，为以后调用。
+    index.storage_context.persist(persist_dir=f"./") #* 存储到本地，为以后调用。
+    print(index)
+process_file()

save_database_info.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# -*- coding: utf-8 -*-
+import numpy as np
+import pandas as pd
+import re
+from re import sub
+import smtplib
+import matplotlib.pyplot as plt
+from itertools import product
+from tqdm import tqdm_notebook, tqdm, trange
+import time
+import seaborn as sns
+from matplotlib.pyplot import style
+from rich import print
+import warnings
+warnings.filterwarnings('ignore')
+sns.set()
+# style.use('seaborn')
+import csv
+def save_database_info(filepath, database_name, date):
+    # 读取CSV文件
+    with open('/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/test/database_name.csv', 'r', encoding='utf-8') as file:
+        # 创建CSV读取器
+        reader = csv.reader(file)
+        # 将内容存储到列表中
+        rows = []
+        for row in reader:
+            rows.append(row)
+    # 添加新行
+    # new_row = ['New Data 1', 'New Data 2'] # 新行的数据
+    new_row = [database_name, date] # 新行的数据
+    rows.append(new_row)
+    # 写入CSV文件
+    with open('/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/test/database_name.csv', 'w', newline='', encoding='utf-8') as file:
+        # 创建CSV写入器
+        writer = csv.writer(file)
+        # 写入所有行
+        writer.writerows(rows)
+    # close the file to save the data.
+    file.close()
+    return None