''' | |
1. 基于ChatGPT的多场景应用: | |
1. 核心模式 | |
1. 联网模式 | |
1. 知识库模式 | |
1. 数据分析模式 | |
1. 智能体模式 | |
1. RAG: | |
1. 核心文件包括: | |
1. langchain_KB.py包含了形成vector database的函数,和产生total prompt的函数。 | |
1. rag_source.py包含了从vector database中提取信息来源的函数,包括文档名称和页码。 | |
''' | |
# TODO:1. 更新huggingface上code01的版本,包括:知识库和数据分析模块。 2. 将知识库模块更新为:multi-query + source。 3. 将数据分析模块重写。 | |
import numpy as np | |
import pandas as pd | |
from dotenv import load_dotenv # pip3 install python-dotenv | |
import requests | |
from codeinterpreterapi import CodeInterpreterSession, File | |
import streamlit as st | |
import openai | |
import os | |
import matplotlib.pyplot as plt | |
import xlrd | |
import pandas as pd | |
# import csv | |
import tempfile | |
from tempfile import NamedTemporaryFile | |
import pathlib | |
from pathlib import Path | |
from matplotlib.font_manager import FontProperties | |
import seaborn as sns | |
from time import sleep | |
import streamlit_authenticator as stauth | |
# from langchain.chat_models import ChatOpenAI | |
# from langchain.llms import openai | |
import sys | |
import time | |
import PyPDF2 ## read the local_KB PDF file. | |
# import localKB_construct | |
# from streamlit_option_menu import option_menu | |
# import st_reset_conversation | |
from st_reset_conversation import reset_all, reset_message | |
import save_database_info | |
import pytz | |
from datetime import datetime | |
from dotenv import load_dotenv | |
from openai import OpenAI | |
import st_msautogen | |
import rag_source | |
# import add_fonts | |
import asyncio | |
import warnings | |
warnings.filterwarnings("ignore") | |
#make it look nice from the start | |
# st.set_page_config(layout='wide',initial_sidebar_state='collapsed',) | |
### 设置openai的API key | |
load_dotenv() | |
openai.api_key = os.environ['user_token'] | |
os.environ["OPENAI_API_KEY"] = os.environ['user_token'] | |
bing_search_api_key = os.environ['bing_api_key'] | |
# # #* 如果数据分析模块在本地调试时碰到接口问题,可以启用如下设置。还可能是一个bash命令的问题,见ChatGPT讲课要点.txt. | |
openai.proxy = { | |
"http": "http://127.0.0.1:7890", | |
"https": "http://127.0.0.1:7890" | |
} | |
## layout settings. | |
st.title("专业版大语言模型智能中心") | |
st.subheader("Artificial Intelligence Backend Center for Professionals") | |
st.caption("_声明:本网站仅提供技术测试与评估服务。内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") | |
# st.divider() | |
# ## clear conversion. | |
# def reset_all(): | |
# # st.session_state.conversation = None | |
# st.session_state.chat_history = None | |
# st.session_state.messages = [] | |
# message_placeholder = st.empty() | |
# return None | |
# navigation menu using Hydralit. 并没有解决menu跳转的问题。 | |
# option_data = [ | |
# {'icon': "house", 'label':"核心模式"}, | |
# {'icon':"cloud-upload",'label':"信息检索模式"}, | |
# {'icon': "gear", 'label':"数据分析模式"}, | |
# {'icon': "list-task", 'label':"智能体模式"}, | |
# ] | |
# navi_menu = op = hc.option_bar(option_definition=option_data,title=None,key='PrimaryOption', horizontal_orientation=True) | |
# navi_menu = hc.nav_bar(menu_definition=option_data, key='navi_menu', use_animation=True, option_menu=False, sticky_mode='pinned', sticky_nav=False, hide_streamlit_markers=False) | |
### 使用streamlit_option_menu格式的类似横幅选项。但是会出现第一次无法运行,需要手动清零或者做一个动作,才可以。 | |
# navi_menu = option_menu( | |
# menu_title=None, | |
# options=['核心模式', '信息检索模式', '数据分析模式', '智能体模式'], | |
# # options=['GPT-3.5', 'GPT-4.0','清华GLM2-6B','百川Baichuan-13B', '阿里通义千问14B'], | |
# icons=['house', 'cloud-upload','gear','list-task'], | |
# menu_icon='cast', | |
# default_index=0, | |
# orientation='horizontal', | |
# # manual_select=0, | |
# # styles={ | |
# # "container": {"padding": "0!important", "background-color": "#fafafa"}, | |
# # "icon": {"color": "orange", "font-size": "25px"}, | |
# # "nav-link": {"font-size": "25px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"}, | |
# # "nav-link-selected": {"background-color": "green"}, | |
# # } | |
# ) | |
### 常规streamlit选择 | |
navi_menu = st.radio(label='选择一个大语言模型工作模式', options=['核心模式', '联网模式', '知识库模式','数据分析模式', '智能体模式'],index=0,horizontal=True) | |
# navi_menu = st.selectbox('选择一个大语言模型工作模式', ['核心模式', '信息检索模式', '数据分析模式', '智能体模式'],index=0) ### 原始agent001模式。 | |
reset_button_key = "reset_button" | |
reset_button = st.button(label=("清除所有记录,并开启一轮新对话 ▶"), | |
key=reset_button_key, use_container_width=True, type="primary") | |
def clear_all(): | |
st.session_state.conversation = None | |
st.session_state.chat_history = None | |
st.session_state.messages = [] | |
message_placeholder = st.empty() | |
return None | |
## 清除所有对话记录, reset all conversation. | |
if reset_button: | |
reset_all() | |
### 上传文件的模块 | |
def upload_file(uploaded_file): | |
if uploaded_file is not None: | |
# filename = uploaded_file.name | |
# st.write(filename) # print out the whole file name to validate. not to show in the final version. | |
try: | |
# if '.pdf' in filename: ### original code here. | |
if '.pdf' in uploaded_file.name: | |
pdf_filename = uploaded_file.name ### original code here. | |
filename = uploaded_file.name | |
# print('PDF file:', pdf_filename) | |
# with st.status('正在为您解析新知识库...', expanded=False, state='running') as status: | |
spinner = st.spinner('正在为您解析新知识库...请耐心等待') | |
with spinner: | |
### 一下是llama_index方法,但是升级后,可能会报错。 | |
# import localKB_construct | |
# # st.write(upload_file) | |
# localKB_construct.process_file(uploaded_file, username) | |
# ## 在屏幕上展示当前知识库的信息,包括名字和加载日期。 | |
# save_database_info.save_database_info(f'./{username}/database_name.csv', filename, str(datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d %H:%M"))) | |
# st.markdown('新知识库解析成功,请务必刷新页面,然后开启对话 🔃') | |
### 以下是langchain方案。 | |
import langchain_KB | |
import save_database_info | |
uploaded_file_name = "File_provided" | |
temp_dir = tempfile.TemporaryDirectory() | |
# ! working. | |
uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name | |
with open(pdf_filename, 'wb') as output_temporary_file: | |
# with open(f'./{username}_upload.pdf', 'wb') as output_temporary_file: ### original code here. 可能会造成在引用信息来源时文件名不对的问题。 | |
# ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。 | |
# output_temporary_file.write(uploaded_file.getvalue()) | |
output_temporary_file.write(uploaded_file.getvalue()) | |
langchain_KB.langchain_localKB_construct(output_temporary_file, username) | |
## 在屏幕上展示当前知识库的信息,包括名字和加载日期。 | |
save_database_info.save_database_info(f'./{username}/database_name.csv', pdf_filename, str(datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d %H:%M"))) | |
st.markdown('新知识库解析成功,请务必刷新页面,然后开启对话 🔃') | |
return pdf_filename | |
else: | |
# if '.csv' in filename: ### original code here. | |
if '.csv' in uploaded_file.name: | |
print('start the csv file processing...') | |
csv_filename = uploaded_file.name | |
filename = uploaded_file.name | |
csv_file = pd.read_csv(uploaded_file) | |
csv_file.to_csv(f'./{username}/{username}_upload.csv', encoding='utf-8', index=False) | |
st.write(csv_file[:3]) # 这里只是显示文件,后面需要定位文件所在的绝对路径。 | |
else: | |
xls_file = pd.read_excel(uploaded_file) | |
xls_file.to_csv(f'./{username}_upload.csv', index=False) | |
st.write(xls_file[:3]) | |
print('end the csv file processing...') | |
# uploaded_file_name = "File_provided" | |
# temp_dir = tempfile.TemporaryDirectory() | |
# ! working. | |
# uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name | |
# with open('./upload.csv', 'wb') as output_temporary_file: | |
# with open(f'./{username}_upload.csv', 'wb') as output_temporary_file: | |
# print(f'./{name}_upload.csv') | |
# ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。 | |
# output_temporary_file.write(uploaded_file.getvalue()) | |
# st.write(uploaded_file_path) #* 可以查看文件是否真实存在,然后是否可以 | |
except Exception as e: | |
st.write(e) | |
## 以下代码是为了解决上传文件后,文件路径和文件名不对的问题。 | |
# uploaded_file_name = "File_provided" | |
# temp_dir = tempfile.TemporaryDirectory() | |
# # ! working. | |
# uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name | |
# # with open('./upload.csv', 'wb') as output_temporary_file: | |
# with open(f'./{name}_upload.csv', 'wb') as output_temporary_file: | |
# # print(f'./{name}_upload.csv') | |
# # ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。 | |
# # output_temporary_file.write(uploaded_file.getvalue()) | |
# output_temporary_file.write(uploaded_file.getvalue()) | |
# # st.write(uploaded_file_path) # * 可以查看文件是否真实存在,然后是否可以 | |
# # st.write('Now file saved successfully.') | |
# return pdf_filename, csv_filename | |
return filename | |
### 互联网搜索模块 | |
bing_search_api_key = os.environ['bing_api_key'] | |
bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search' | |
def search(query): | |
# Construct a request | |
# mkt = 'en-EN' | |
mkt = 'zh-CN' | |
params = {'q': query, 'mkt': mkt} | |
headers = {'Ocp-Apim-Subscription-Key': bing_search_api_key} | |
# Call the API | |
try: | |
response = requests.get(bing_search_endpoint, | |
headers=headers, params=params) | |
response.raise_for_status() | |
json = response.json() | |
return json["webPages"]["value"] | |
# print("\nJSON Response:\n") | |
# pprint(response.json()) | |
except Exception as e: | |
raise e | |
# async def text_mode(): | |
def text_mode(): | |
# reset_message() ## reset the message and placeholder. | |
print('text mode starts!') | |
# Set a default model | |
if "openai_model" not in st.session_state: | |
st.session_state["openai_model"] = "gpt-3.5-turbo-16k" | |
if radio_1 == 'ChatGPT-3.5': | |
# print('----------'*5) | |
print('radio_1: GPT-3.5 starts!') | |
st.session_state["openai_model"] = "gpt-3.5-turbo-16k" | |
elif radio_1 == 'ChatGPT-4': | |
print('radio_1: GPT-4.0 starts!') | |
st.session_state["openai_model"] = "gpt-4-1106-preview" | |
else: | |
st.markdown("**当前大模型无效,请在左侧工具栏中选择一个有效的模型。您现在访问的站点仅提供ChatGPT中的GPT-3.5/4。**") | |
print(st.session_state["openai_model"]) | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Display assistant response in chat message container | |
# if prompt := st.chat_input("说点什么吧"): | |
prompt = st.chat_input("说点什么吧...") | |
print('prompt now:', prompt) | |
print('----------'*5) | |
if prompt: | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
full_response = "" | |
if navi_menu == '联网模式': | |
# if (navi_menu=='信息检索模式') and (radio_2=='互联网'): ### original code here. | |
print('联网模式下的prompt:', prompt) | |
input_message = prompt | |
internet_search_result = search(input_message) | |
search_prompt = [ | |
f"Source:\nTitle: {result['name']}\nURL: {result['url']}\nContent: {result['snippet']}" for result in internet_search_result] | |
prompt = "基于如下的互联网公开信息, 回答问题:\n\n" + \ | |
"\n\n".join(search_prompt[:3]) + "\n\n问题: " + input_message + \ | |
"你需要注意的是回答问题时必须用提问的语言(如英文或者中文)来提示:'答案基于互联网公开信息。'" + "\n\n答案: " # 限制了只有3个搜索结果。 | |
# prompt = "Use these sources to answer the question:\n\n" + "\n\n".join(search_prompt[0:3]) + "\n\nQuestion: " + input_message + "(注意:回答问题时请提示'以下答案基于互联网公开信息。')\n\n" + "\n\nAnswer: " | |
st.session_state.messages.append( | |
{"role": "user", "content": prompt}) | |
## old version of openai API. | |
# for response in openai.ChatCompletion.create( | |
# model=st.session_state["openai_model"], | |
# messages=[ | |
# {"role": m["role"], "content": m["content"]} | |
# for m in st.session_state.messages | |
# ], | |
# stream=True, | |
# ): | |
# full_response += response.choices[0].delta.get( | |
# "content", "") | |
# message_placeholder.markdown(full_response + "▌") | |
# message_placeholder.markdown(full_response) | |
# st.session_state.messages.append( | |
# {"role": "assistant", "content": full_response}) | |
# st.session_state.messages = [] | |
## new version of openai API. | |
openai_client = OpenAI() | |
for response in openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
messages=[ | |
{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages | |
], | |
stream=True, | |
): | |
if str(response.choices[0].delta.content) != 'None': | |
full_response += str(response.choices[0].delta.content) | |
message_placeholder.markdown(full_response + "▌") | |
message_placeholder.markdown(full_response) | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_response}) | |
st.session_state.messages = [] | |
# elif radio_2 != '互联网': | |
else: | |
print('ChatGPT only starts!!!') | |
## 这里需要确认是直接从import openai中获得的函数,而不是langchain中调用openai,否则随着langchain的更新,会出现问题。 | |
# for response in openai.ChatCompletion.create( | |
# model=st.session_state["openai_model"], | |
# max_tokens=max_tokens, | |
# temperature=temperature, | |
# top_p=top_p, | |
# presence_penalty=presence_penalty, | |
# frequency_penalty=frequency_penalty, | |
# ## 多轮会话,需要记住历史记录。 | |
# messages=[ | |
# {"role": m["role"], "content": m["content"]} | |
# for m in st.session_state.messages | |
# ], | |
# # messages=[{'role': 'system', 'content': 'you are ChatGPT'}, { | |
# # 'role': 'user', 'content': prompt}], ## 这是单轮会话。 | |
# stream=True, | |
# ): | |
openai_client = OpenAI() | |
for response in openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
presence_penalty=presence_penalty, | |
frequency_penalty=frequency_penalty, | |
## 多轮会话,需要记住历史记录。 | |
messages=[ | |
{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages | |
], | |
# messages=[{'role': 'system', 'content': 'you are ChatGPT'}, { | |
# 'role': 'user', 'content': prompt}], ## 这是单轮会话。 | |
stream=True, | |
): | |
# print('full response now:', full_response) | |
# print('response now:', response) | |
## old version output format. | |
# full_response += response.choices[0].delta.get( | |
# "content", "") | |
## new version output format. | |
if str(response.choices[0].delta.content) != 'None': ## 注意这里是内容,而不是response,否则一个chunk的回复。 | |
# print('response now:',response) | |
full_response += str(response.choices[0].delta.content) | |
message_placeholder.markdown(full_response + "▌") | |
message_placeholder.markdown(full_response) | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_response}) | |
## load the local_KB PDF file. | |
# # def local_KB(uploaded_file): | |
# print('now starts the local KB version of ChatGPT') | |
# max_input_size = 4096 | |
# # set number of output tokens | |
# # num_outputs = 3000 #* working | |
# num_outputs = 1000 | |
# # set maximum chunk overlap | |
# max_chunk_overlap = -1000 #* working | |
# # set chunk size limit | |
# # chunk_size_limit = 600 | |
# chunk_size_limit = 6000 #* working | |
# history = [] | |
# if input: | |
# # ! 这里需要重新装载一下storage_context。 | |
# QA_PROMPT_TMPL = ( | |
# "We have provided context information below. \n" | |
# "---------------------\n" | |
# "{context_str}" | |
# "\n---------------------\n" | |
# "Given all this information, please answer the following questions," | |
# "You MUST use the SAME language as the question:\n" | |
# "{query_str}\n") | |
# QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) | |
# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.8, model_name="gpt-3.5-turbo", max_tokens=4096,streaming=True)) | |
# prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) | |
# service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) | |
# # # index = load_index_from_storage(storage_context) | |
# storage_context = StorageContext.from_defaults(persist_dir="./") | |
# index = load_index_from_storage(storage_context,service_context=service_context) | |
# # query_engine = index.as_query_engine(streaming=True, similarity_top_k=3, text_qa_template=QA_PROMPT) | |
# # query_engine = index.as_query_engine(streaming=True) | |
# query_engine = index.as_query_engine(streaming=True, text_qa_template=QA_PROMPT) | |
# reply = query_engine.query(input) | |
# async def localKB_mode(username): | |
def localKB_mode(username): | |
# reset_all() ## reset the conversation. | |
reset_message() ## only reset the message and placeholder. | |
print('now starts the local KB version of ChatGPT') | |
# # Initialize chat history | |
# if "messages" not in st.session_state: | |
# st.session_state.messages = [] | |
# for message in st.session_state.messages: | |
# with st.chat_message(message["role"]): | |
# st.markdown(message["content"]) | |
# Display assistant response in chat message container | |
# if prompt: | |
if prompt := st.chat_input("说点什么吧"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.status('检索中...', expanded=True, state='running') as status: | |
# try: | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
full_response = "" | |
### llama_index框架的RAG代码,最近更新版本后不成功,会报错。 | |
### outdated version. | |
# llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.8, model_name="gpt-3.5-turbo", max_tokens=4024,streaming=True)) | |
# # print('llm_predictor:', llm_predictor) | |
# prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit) | |
# print('prompt_helper:', prompt_helper) | |
# service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper) | |
# print('service_context:', service_context) | |
# # # index = load_index_from_storage(storage_context) | |
# print("storage_context:", storage_context) | |
# index = load_index_from_storage(storage_context,service_context=service_context) | |
## sample code for reference. | |
# docstore = 'storage/docstore.json' | |
# index_store = 'storage/index_store.json' | |
# vector_store = 'storage/vector_store.json' | |
# print('storage_context:', storage_context) | |
##NOTE: 这里需要重新装载一下storage_context。 | |
# storage_context = StorageContext.from_defaults(persist_dir=f"./{username}/") | |
# print('--'*30) | |
# print('storage_context:',storage_context) | |
# print('type of storage_context.index_store:', type(storage_context.index_store)) | |
# print('--'*30) | |
# # storage_context = {storage_context} | |
# index = load_index_from_storage(storage_context) | |
# print('--'*30) | |
# print('index now:', index) | |
# context_str = index | |
# ##TODO 重新构建Prompt,加入QA_Template. | |
# QA_PROMPT_TMPL = ( | |
# "We have provided context information below. \n" | |
# "---------------------\n" | |
# "{context_str}" | |
# "\n---------------------\n" | |
# "Given all this information, please answer the following questions," | |
# "You MUST use the SAME language as the question and the default language is Chinese:\n" | |
# "{query_str}\n") | |
# # QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) ## outdated version. | |
# ##TODO: newer version but may run into llama_index import problem. | |
# # qa_template = PromptTemplate(QA_PROMPT_TMPL) | |
# # prompt = qa_template.format(context_str=context_str, query_str=prompt) | |
# # prompt = qa_template.format(context_str=context_str, query_str=QA_PROMPT) | |
# # query_engine = index.as_query_engine(streaming=True, similarity_top_k=3, text_qa_template=QA_PROMPT) | |
# query_engine = index.as_query_engine(streaming=False) | |
# print('111') | |
## older version. | |
# query_engine = index.as_query_engine(streaming=True, text_qa_template=QA_PROMPT) | |
# query_engine = index.as_query_engine() | |
# reply = query_engine.query(prompt) | |
# llama_index_reply = query_engine.query(prompt) | |
# # full_response += query_engine.query(prompt) | |
# print('local KB reply:', llama_index_reply) | |
# # query_engine.query(prompt).print_response_stream() #* 能在terminal中流式输出。 | |
# # for resp in llama_index_reply.response_gen: | |
# # print(resp) | |
# # full_response += resp | |
# # message_placeholder.markdown(full_response + "▌") | |
# message_placeholder.markdown(str(llama_index_reply)) | |
# print('333') | |
# # st.session_state.messages.append( | |
# # {"role": "assistant", "content": full_response}) | |
# # st.session_state.messages = [] | |
# # full_response += reply | |
# # full_response = reply | |
# # st.session_state.messages.append( | |
# # {"role": "assistant", "content": full_response}) | |
### 用langchain的FAISS来做RAG | |
import langchain_KB | |
### 默认选择使用multi-query的方法进行查询。 | |
##NOTE: 目前这个版本是把所有的multiquery当成一个问题提交给大模型。后续可以考虑将每一个问题分别送入大模型,然后得到的多个答案,然后在一并汇总。 | |
if True: | |
import multiQuery_prompt | |
prompt = multiQuery_prompt.generate_sim_query(orignal_question=prompt) | |
# print('multiQuery prompts now:', prompt) | |
prompt = str(prompt) ## 需要强制转换成str格式。 | |
total_prompt, docs = langchain_KB.langchain_RAG(prompt, username) | |
print('total_prompt now:', total_prompt) | |
st.session_state.messages.append({"role": "user", "content": total_prompt}) | |
## new version of openai API. | |
openai_client = OpenAI() | |
for response in openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
messages=[ | |
{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages | |
], | |
stream=True, | |
): | |
if str(response.choices[0].delta.content) != 'None': | |
full_response += str(response.choices[0].delta.content) | |
message_placeholder.markdown(full_response + "▌") | |
message_placeholder.markdown(full_response) | |
st.session_state.messages.append( | |
{"role": "assistant", "content": full_response}) | |
st.session_state.messages = [] | |
try: | |
### 显示RAG的source,即查询得到的信息来源出处。 | |
print('docs now:', docs) | |
source = rag_source.rag_source(docs) ## get the k reference source of the RAG answer, in a designed format. | |
# print('返回的source内容:', source) | |
st.divider() | |
st.caption(source) | |
except Exception as e: | |
print('Exception:', e) | |
pass | |
##TODO 确认是否需要? | |
st.session_state.messages = [] | |
# except Exception as e: | |
# print('Exception:', e) | |
# pass | |
# async def data_mode(): | |
def data_mode(): | |
clear_all() ## reset the conversation. | |
print('数据分析模式启动!') | |
# uploaded_file_path = './upload.csv' | |
uploaded_file_path = f'./{username}/{username}_upload.csv' | |
# uploaded_file_path = f'./{username}_upload.csv' ### original code here. | |
print('file path:', uploaded_file_path) | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Display assistant response in chat message container | |
# if prompt := st.chat_input("Say something"): | |
prompt = st.chat_input("Say something") | |
print('prompt now:', prompt) | |
print('----------'*5) | |
if prompt: | |
try: | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.status('思考中...需要1至10分钟左右,请耐心等待 🏃', expanded=True, state='running') as status: | |
with st.chat_message("assistant"): | |
#### Using the open-source CodeInterpreter solution below. May not work after version update, need to upgrade the code accoridngly later on. | |
# from langchain.chat_models import ChatOpenAI | |
# llm_model = ChatOpenAI(model_name="gpt-4-1106-preview") | |
# # llm_model = ChatOpenAI(model_name="gpt-4") | |
# # async with CodeInterpreterSession(llm=llm_model) as session: | |
# import interpreter | |
# interpreter.llm.model = "gpt-3.5-turbo" | |
# with CodeInterpreterSession(llm=llm_model) as session: | |
# # with CodeInterpreterSession(llm=llm_model) as session: | |
# print('222') | |
# # user_request = "对于文件中的'SepalLengthCm’数据给我一个'直方图',提供图表,并给出分析结果" | |
# #! 可以用设定dpi=300来输出高质量的图表。(注:图的解析度dpi设定为300) | |
# environ_settings = """【背景要求】如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答: | |
# ------------------------------------------------------------------------- | |
# 1. 你需要用我提问的语言来回答,且默认情况下用中文来回答。 | |
# 2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark'。 | |
# 3. 图上所有的文字全部翻译成<英文English>来表示。 | |
# 4. 你回答的文字内容必须尽可能的详细且通俗易懂。 | |
# 5. 回答时尽可能地展示分析所对应的图表,并提供分析结果。 你需要按如下格式提供内容: | |
# 5.1 提供详细且专业的分析结果,提供足够的分析依据。 | |
# 5.2 给出可能造成这一结果的可能原因有哪些? | |
# 以上内容全部用【1, 2, 3这样的序列号格式】来表达。 | |
# ------------------------------------------------------------------------- | |
# """ # seaborn中的palette参数可以设定图表的颜色,选项包括:deep, muted, pastel, bright, dark, colorblind,Spectral。更多参数可以参考:https://seaborn.pydata.org/generated/seaborn.color_palette.html。 | |
# # uploaded_file_path = upload_file() | |
# user_request = environ_settings + "\n\n" + \ | |
# "你需要完成以下任务:\n\n" + prompt + "\n\n" \ | |
# f"注:文件位置在 {uploaded_file_path}" | |
# # user_request = str(prompt) ### only prompt without environment prompt. | |
# print('user_request: \n', user_request) | |
# # 加载上传的文件,主要路径在上面代码中。 | |
# files = [File.from_path(str(uploaded_file_path))] | |
# # files = [File.from_path("/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/rawdata/short_csv.csv")] | |
# # st.write(pd.DataFrame(files)) | |
# # print('session.__init__', session.__init__) | |
# # print('session', session.__init__) | |
# with st.status('思考中...', expanded=True, state='running') as status: | |
# # generate the response | |
# # response = await session.generate_response(user_msg=user_request, files=files, detailed_error=True) | |
# # response = await session.generate_response(user_msg=user_request, files=files) | |
# response = session.generate_response(user_msg=user_request, files=files) | |
# # output to the user | |
# print("AI: ", response.content) | |
# full_response = response.content | |
# ### full_response = "this is full response" | |
# # for file in response.files: | |
# for i, file in enumerate(response.files): | |
# # await file.asave(f"/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/output{i}.png") ##working. | |
# # st.image(file.get_image()) #! working. | |
# # * 注意这里的设定,可以提高图片的精细程度。 | |
# st.image(file.get_image(), width=None, | |
# output_format='PNG') | |
# # message_placeholder.markdown(full_response + "▌") ## orignal code. | |
# # message_placeholder.markdown(full_response) ## orignal code. | |
# st.write(full_response) | |
# status.update(label='complete', state='complete') | |
# # TODO: 确认是否要记录所有的full response。 | |
# st.session_state.messages.append( | |
# {"role": "assistant", "content": full_response}) | |
# # await session.astop() # ! 确认需要关闭。 | |
# session.astop() # ! 确认需要关闭。 | |
# # st.session_state.messages.append({"role": "assistant", "content": full_response}) | |
#### #### Using the OpenAI's assistant API, wrap into the st_openai_assistant.py. | |
import st_openai_assistant | |
### NOTE:在st_openai_assistant.py中可以设置system_prompt. | |
# sys_prompt = """ 1. 你是一位智能AI助手,你连接着一台电脑,但请注意不能联网。在使用Python解决任务时,你可以运行代码并得到结果,如果运行结果有错误,你需要尽可能对代码进行改进。你可以处理用户上传到电脑上的文件。 | |
# 2. 你使用matplotlib.pylab(plt)或者seaborn(sns)画图时,需要添加中文字库,代码如下: | |
# matplotlib.rcParams['font.sans-serif'] = ['Microsoft YaHei UI'] | |
# sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark', font='Microsoft YaHei UI') | |
# 3. 如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答: | |
# 3.1 你回答的文字内容必须尽可能的详细且通俗易懂。 | |
# 3.2 回答时尽可能地展示分析所对应的图表,并提供分析结果。图表上的文字采用中文。你需要按如下格式提供内容: | |
# * 提供详细且专业的分析结果,提供足够的分析依据。 | |
# * 给出可能造成这一结果的可能原因有哪些? | |
# 以上内容你用序列号1、2、3这样的格式表达。 | |
# """ | |
# prompt = [ | |
# {"role": "system", "content": sys_prompt}, | |
# {"role": "user", "content": prompt}, | |
# ] | |
messages, text_response, img_response, image_files, final_answer = st_openai_assistant.openai_assistant(prompt=prompt, filepath=uploaded_file_path, username=username) | |
# st.image(img_response) ## show one single image. | |
# st.markdown(text_response) ## show one single text response. | |
try: | |
from PIL import Image | |
print("返回到Agent001程序中的图表个数:", len(image_files)) | |
# plt.imshow(img) | |
# plt.show() | |
for img in image_files: | |
img = Image.open(img) ## image object. | |
st.image(img, output_format='PNG') | |
# st.image(f"./{username}/{img_response[i]}", output_format='PNG') | |
# st.image(f'/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/joeshi/output{i}.png', output_format='PNG') | |
except: | |
pass | |
try: | |
st.markdown(final_answer) ## all messages are appended together, need to print out one by one? | |
except: | |
pass | |
except Exception as e: | |
print(e) | |
pass | |
### authentication with a local yaml file. | |
import yaml | |
from yaml.loader import SafeLoader | |
with open('./config.yaml') as file: | |
config = yaml.load(file, Loader=SafeLoader) | |
authenticator = stauth.Authenticate( | |
config['credentials'], | |
config['cookie']['name'], | |
config['cookie']['key'], | |
config['cookie']['expiry_days'], | |
config['preauthorized'] | |
) | |
# authentication with a remove cloud-based database. | |
# 导入云端用户数据库。 | |
# DETA_KEY = "c0zegv33efm_4MBTaoQAn76GzUfsZeKV64Uh9qMY3WZb" | |
# load_dotenv(".env") | |
# DETA_KEY = os.getenv("DETA_KEY") | |
# print(DETA_KEY) | |
# deta = Deta(DETA_KEY) | |
# mybase is the name of the database in Deta. You can change it to any name you want. | |
# credentials = {"usernames":{}} | |
# # credentials = {"users": {}} | |
# # db = db() | |
# users = [] | |
# email = [] | |
# passwords = [] | |
# names = [] | |
# for row in db.fetch_all_users(): | |
# users.append(row["username"]) | |
# email.append(row["email"]) | |
# names.append(row["key"]) | |
# passwords.append(row["password"]) | |
# hashed_passwords = stauth.Hasher(passwords).generate() | |
## 需要严格的按照yaml文件的格式来定义如下几个字段。 | |
# for un, name, pw in zip(users, names, hashed_passwords): | |
# # user_dict = {"name":name,"password":pw} | |
# user_dict = {"name": un, "password": pw} | |
# # credentials["usernames"].update({un:user_dict}) | |
# credentials["usernames"].update({un: user_dict}) | |
# ## sign-up模块,未完成。 | |
# database_table = [] | |
# # print(pd.DataFrame(credentials)) | |
# for i in credentials['usernames'].keys(): | |
# # print("i:",i) | |
# # print("name",credentials['usernames'][i]['name']) | |
# # print("password",credentials['usernames'][i]['password']) | |
# database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']]) | |
# print("database_table:",database_table) | |
# authenticator = stauth.Authenticate( | |
# credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30) | |
user, authentication_status, username = authenticator.login('用户登录', 'main') | |
# print("name", name, "username", username) | |
# ## sign-up widget,未完成。 | |
# try: | |
# if authenticator.register_user('新用户注册', preauthorization=False): | |
# # for list in database_table: | |
# # db.update_user(username=list[0], name=list[1], password=list[2]) | |
# db.update_user(username=list[-1][0], name=list[-1][1], password=list[-1][2]) | |
# # st.success('User registered successfully') | |
# st.success('注册成功!') | |
# except Exception as e: | |
# st.error(e) | |
if authentication_status: | |
with st.sidebar: | |
st.markdown( | |
""" | |
<style> | |
[data-testid="stSidebar"][aria-expanded="true"]{ | |
min-width: 450px; | |
max-width: 450px; | |
} | |
""", | |
unsafe_allow_html=True, | |
) | |
### siderbar的题目。 | |
st.header(f'**欢迎 **{username}** 来到人工智能的世界** ♠') | |
st.write(f'_Welcome and Hope U Enjoy Staying Here_') | |
authenticator.logout('登出', 'sidebar') | |
# reset_button_key = "reset_button" | |
# reset_button = st.button(label=("清除所有记录,并开启一轮新对话 ▶"), | |
# key=reset_button_key, use_container_width=True, type="primary") | |
# ## 清除所有对话记录, reset all conversation. | |
# if reset_button: | |
# reset_all() | |
# st.markdown("#### 大语言模型设置") | |
# with st.expander(label='**选择一个大语言模型基座**', expanded=True): | |
radio_1 = st.selectbox( | |
label='选择一个大语言模型基座 (注:根据站点不同,部分基座不可用)', | |
options=["ChatGPT-4", "ChatGPT-3.5", "Google Gemini","Claude 3", "清华ChatGLM3-6B", "百川Baichuan-13B", "阿里通义千问-14B", "阿里通义千问-72B", "Llama-2", "Mistral", "Vicuna"], | |
index=0, | |
placeholder="大语言模型列表", | |
) | |
## 在sidebar上的三个分页显示,用st.tabs实现。 | |
tab_1, tab_2, tab_3, tab_4 = st.tabs(['使用须知', '模型参数', '提示词模板', '系统角色设定']) | |
# with st.expander(label='**使用须知**', expanded=False): | |
with tab_1: | |
# st.markdown("#### 快速上手指南") | |
# with st.text(body="说明"): | |
# st.markdown("* 重启一轮新对话时,只需要刷新页面(按Ctrl/Command + R)即可。") | |
with st.text(body="说明"): | |
st.markdown("* 为了保护数据与隐私,所有对话均不会被保存,刷新页面立即删除。敬请放心。") | |
with st.text(body="说明"): | |
st.markdown("* “GPT-4”回答质量极佳,但速度缓慢,建议适当使用。") | |
with st.text(body="说明"): | |
st.markdown("* “信息检索模式”与所有的搜索引擎或者数据库检索方式一样,仅限一轮对话,将不会保持之前的会话记录。") | |
with st.text(body="说明"): | |
st.markdown( | |
"* “数据分析模式”暂时只支持1000个单元格以内的数据分析,单元格中的内容不支持中文数据(表头也尽量不使用中文)。一般运行时间在1至10分钟左右,期间需要保持网络畅通。") | |
with st.text(body="说明"): | |
st.markdown("* “数据分析模式”推荐上传csv格式的文件,部分Excel文件容易出现数据不兼容的情况。") | |
## 大模型参数 | |
# with st.expander(label='**大语言模型参数**', expanded=True): | |
with tab_2: | |
max_tokens = st.slider(label='Max_Token(生成结果时最大字数)', min_value=100, max_value=8096, value=4096,step=100) | |
temperature = st.slider(label='Temperature (温度)', min_value=0.0, max_value=1.0, value=0.8, step=0.1) | |
top_p = st.slider(label='Top_P (核采样)', min_value=0.0, max_value=1.0, value=0.6, step=0.1) | |
frequency_penalty = st.slider(label='Frequency Penalty (重复度惩罚因子)', min_value=-2.0, max_value=2.0, value=1.0, step=0.1) | |
presence_penalty = st.slider(label='Presence Penalty (控制主题的重复度)', min_value=-2.0, max_value=2.0, value=1.0, step=0.1) | |
## reset password widget | |
# try: | |
# if authenticator.reset_password(st.session_state["username"], 'Reset password'): | |
# st.success('Password modified successfully') | |
# except Exception as e: | |
# st.error(e) | |
# with st.header(body="欢迎"): | |
# st.markdown("# 欢迎使用大语言模型商业智能中心") | |
# with st.expander(label=("**重要的使用注意事项**"), expanded=True): | |
# with st.container(): | |
with tab_3: | |
# st.markdown("#### Prompt提示词参考资料") | |
with st.expander(label="**大语言模型基础提示词Prompt示例**", expanded=False): | |
st.code( | |
body="继续用中文写一篇关于 [文章主题] 的文章,以下列句子开头:[文章开头]。", language='plaintext') | |
st.code(body="将以下文字概括为 100 个字,使其易于阅读和理解。避免使用复杂的句子结构或技术术语。", | |
language='plaintext') | |
st.code(body="给我出一个迪奥2024春季发布会活动策划。", language='plaintext') | |
st.code(body="帮我按照正式会议结构写一个会邀:主题是xx手机游戏立项会议。", language='plaintext') | |
st.code(body="帮我写一个车内健康监测全场景落地的项目计划,用表格。", language='plaintext') | |
st.code( | |
body="同时掷两枚质地均匀的骰子,则两枚骰子向上的点数之和为 7 的概率是多少。", language='plaintext') | |
st.code(body="写一篇产品经理的演讲稿,注意使用以下词汇: 赋能,抓手,中台,闭环,落地,漏斗,沉淀,给到,同步,对齐,对标,迭代,拉通,打通,升级,交付,聚焦,倒逼,复盘,梳理,方案,联动,透传,咬合,洞察,渗透,兜底,解耦,耦合,复用,拆解。", language='plaintext') | |
with st.expander(label="**数据分析模式的专用提示词Prompt示例**", expanded=False): | |
# with st.subheader(body="提示词Prompt"): | |
st.code(body="分析此数据集并绘制一些'有趣的图表'。", language='python') | |
st.code( | |
body="对于这个文件中的数据,你需要要找出[X,Y]数据之间的寻找'相关性'。", language='python') | |
st.code(body="对于这个文件中的[xxx]数据给我一个'整体的分析'。", language='python') | |
st.code(body="对于[xxx]数据给我一个'直方图',提供图表,并给出分析结果。", language='python') | |
st.code(body="对于[xxx]数据给我一个'小提琴图',并给出分析结果。", language='python') | |
st.code( | |
body="对于[X,Y,Z]数据在一个'分布散点图 (stripplot)',所有的数据在一张图上展现, 并给出分析结果。", language='python') | |
st.code(body="对于[X,Y]数据,进行'T检验',你需要展示图表,并给出分析结果。", | |
language='python') | |
st.code(body="对于[X,Y]数据给我一个3个类别的'聚类分析',并给出分析结果。", | |
language='python') | |
with tab_4: | |
st.text_area(label='系统角色设定', value='你是一个人工智能,你需要回答我提出的问题,或者完成我交代的任务。你需要使用我提问的语言(如中文、英文)来回答。', height=200, label_visibility='hidden') | |
elif authentication_status == False: | |
st.error('⛔ 用户名或密码错误!') | |
elif authentication_status == None: | |
st.warning('⬆ 请先登录!') | |
if __name__ == "__main__": | |
import asyncio | |
try: | |
match navi_menu: | |
case "核心模式": | |
# if navi_menu == "核心模式": | |
print(f'navi_menu 选择了 {navi_menu}') | |
# reset_all() | |
# * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘ | |
# asyncio.run(text_mode()) | |
text_mode() | |
# elif navi_menu == "信息检索模式": | |
case "联网模式": | |
# print(f'navi_menu 选择了 {navi_menu}') | |
# reset_all() | |
##TODO 如下设置中的index=None, 可能可以解决了刷屏会调回第一项的问题?好像不会。 | |
# radio_2 = st.radio(label='信息检索源选择:', options=['互联网', '维基百科', '本地文档', '文献库', '企业知识库','知识图谱库'], horizontal=True, label_visibility='visible') | |
### 横向排列的checkbox选项。也可以实现多项选择的功能。 | |
# col_1, col_2, col_3, col_4, col_5 = st.columns(5) | |
# rag_1 = col_1.checkbox(label='互联网', label_visibility='visible') | |
# rag_2 = col_2.checkbox(label='上传文件', label_visibility='visible') | |
# rag_3 = col_3.checkbox(label='企业知识库', label_visibility='visible') | |
# rag_4 = col_4.checkbox(label='百科全书', label_visibility='visible') | |
# rag_5 = col_5.checkbox(label='其他数据源', label_visibility='visible') | |
if (navi_menu=='联网模式'): | |
# print(f'radio 选择了 {radio_2}') | |
# asyncio.run(text_mode()) | |
text_mode() | |
case "知识库模式": | |
print(f'navi_menu 选择了 {navi_menu}') | |
st.session_state.messages = [] | |
# ### llama_index框架的RAG代码,最近更新版本后不成功,会报错。 | |
# path = f'./{username}/vector_store.json' | |
# if os.path.exists(path): | |
# print(f'{path} local KB exists') | |
# database_info = pd.read_csv(f'./{username}/database_name.csv') | |
# current_database_name = database_info.iloc[-1][0] | |
# current_database_date = database_info.iloc[-1][1] | |
# database_claim = f"当前知识库为:{current_database_name},创建于{current_database_date}。可以开始提问!" | |
# st.markdown(database_claim) | |
### Langchain框架的RAG代码。 | |
path = f'./{username}/faiss_index/index.faiss' | |
if os.path.exists(path): | |
print(f'{path} local KB exists') | |
database_info = pd.read_csv(f'./{username}/database_name.csv', encoding='utf-8', header=None) ## 不加encoding的话,中文名字的PDF会报错。 | |
print(database_info) | |
current_database_name = database_info.iloc[-1][0] | |
current_database_date = database_info.iloc[-1][1] | |
database_claim = f"当前知识库为:{current_database_name},创建于{current_database_date}。可以开始提问!" | |
st.markdown(database_claim) | |
try: | |
uploaded_file = st.file_uploader( | |
"选择上传一个新知识库", type=(["pdf"])) | |
# 默认状态下没有上传文件,None,会报错。需要判断。 | |
if uploaded_file is not None: | |
# uploaded_file_path = upload_file(uploaded_file) | |
upload_file(uploaded_file) | |
except Exception as e: | |
print(e) | |
pass | |
try: | |
## 启动本地知识库模式。 | |
localKB_mode(username) | |
# asyncio.run(localKB_mode(username)) | |
except Exception as e: | |
print(e) | |
pass | |
# elif navi_menu == "数据分析模式": | |
case "数据分析模式": | |
# reset_message() | |
uploaded_file = st.file_uploader( | |
"选择一个文件", type=(["csv", "xlsx", "xls"])) | |
# 默认状态下没有上传文件,None,会报错。需要判断。 | |
if uploaded_file is not None: | |
# uploaded_file_path = upload_file(uploaded_file) ### original code here. | |
csv_filename = upload_file(uploaded_file) | |
# asyncio.run(data_mode()) | |
reset_all() | |
data_mode() | |
# elif navi_menu == "智能体模式": | |
case "智能体模式": | |
uploaded_file = st.file_uploader( | |
"选择一个文件", type=(["csv"])) | |
reset_all() | |
print('st uploaded_file:',uploaded_file) | |
# 默认状态下没有上传文件,None,会报错。需要判断。 | |
# if uploaded_file is not None: | |
if uploaded_file is not None: | |
uploaded_file_path = upload_file(uploaded_file) | |
# asyncio.run(data_mode()) | |
else: | |
uploaded_file_path = None | |
# st.markdown('**此功能还在内部测试阶段,尚未开放,敬请期待!**') | |
# reset_message() | |
print('st_msautogen starts!') | |
uploaded_file_path = '/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/joeshi_upload.csv' | |
# asyncio.run(st_msautogen.auto_gen(uploaded_file_path)) ## 好像不需要启动asyncio,也可以正常运行。在msautogen中已经启动了。 | |
st_msautogen.auto_gen(uploaded_file_path) ## 这里不需要使用asyncio.run(),否则会streamlit中会刷新页面? | |
except Exception as e: | |
print('Exception Raised:',e) | |
pass |