|
''' |
|
参考: https://github.com/shroominic/codeinterpreter-api |
|
|
|
1. 可以存在本地,然后再调出来。 working. |
|
1. 可以在临时文件夹中读取文件。 |
|
1. 可以直接在内存中读出图片。 |
|
1. 中文字体成功。 |
|
from matplotlib.font_manager import FontProperties |
|
myfont=FontProperties(fname='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/rawdata/SimHei.ttf') |
|
sns.set_style('whitegrid',{'font.sans-serif':['simhei','Arial']}) |
|
1. 解决了account login的问题,主要格式: |
|
## 需要严格的按照yaml文件的格式来定义如下几个字段。 |
|
for un, name, pw in zip(users, names, hashed_passwords): |
|
# user_dict = {"name":name,"password":pw} |
|
user_dict = {"name": un, "password": pw} |
|
# credentials["usernames"].update({un:user_dict}) |
|
credentials["usernames"].update({un: user_dict}) |
|
|
|
''' |
|
|
|
|
|
import database as db |
|
from deta import Deta |
|
import requests |
|
from codeinterpreterapi import CodeInterpreterSession, File |
|
import streamlit as st |
|
|
|
import openai |
|
import os |
|
import matplotlib.pyplot as plt |
|
import pandas as pd |
|
|
|
import tempfile |
|
from tempfile import NamedTemporaryFile |
|
import pathlib |
|
from pathlib import Path |
|
from matplotlib.font_manager import FontProperties |
|
import seaborn as sns |
|
from time import sleep |
|
import streamlit_authenticator as stauth |
|
import database as db |
|
import deta |
|
from langchain.chat_models import ChatOpenAI |
|
from llama_index import StorageContext, load_index_from_storage, GPTVectorStoreIndex, LLMPredictor, PromptHelper |
|
from llama_index import ServiceContext, QuestionAnswerPrompt |
|
import sys |
|
import time |
|
import PyPDF2 |
|
|
|
import save_database_info |
|
from datetime import datetime |
|
import pytz |
|
|
|
os.environ["OPENAI_API_KEY"] = os.environ['user_token'] |
|
openai.api_key = os.environ['user_token'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("专业版大语言模型商业智能中心") |
|
st.subheader("Artificial Intelligence Backend Center for Professionals") |
|
|
|
|
|
reset_button_key = "reset_button" |
|
reset_button = st.button(label=("扫清世间烦恼,清除所有记录,并开启一轮新对话 ▶"), |
|
key=reset_button_key, use_container_width=True, type="secondary") |
|
if reset_button: |
|
st.session_state.conversation = None |
|
st.session_state.chat_history = None |
|
st.session_state.messages = [] |
|
message_placeholder = st.empty() |
|
|
|
def clear_all(): |
|
st.session_state.conversation = None |
|
st.session_state.chat_history = None |
|
st.session_state.messages = [] |
|
message_placeholder = st.empty() |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bing_search_api_key = os.environ['bing_api_key'] |
|
bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search' |
|
|
|
def search(query): |
|
|
|
|
|
mkt = 'zh-CN' |
|
params = {'q': query, 'mkt': mkt} |
|
headers = {'Ocp-Apim-Subscription-Key': bing_search_api_key} |
|
|
|
|
|
try: |
|
response = requests.get(bing_search_endpoint, |
|
headers=headers, params=params) |
|
response.raise_for_status() |
|
json = response.json() |
|
return json["webPages"]["value"] |
|
|
|
|
|
except Exception as e: |
|
raise e |
|
|
|
|
|
|
|
async def text_mode(): |
|
|
|
if "openai_model" not in st.session_state: |
|
st.session_state["openai_model"] = "gpt-3.5-turbo-16k" |
|
if radio_1 == 'GPT-3.5': |
|
|
|
print('radio_1: GPT-3.5 starts!') |
|
st.session_state["openai_model"] = "gpt-3.5-turbo-16k" |
|
else: |
|
print('radio_1: GPT-4.0 starts!') |
|
st.session_state["openai_model"] = "gpt-4" |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
prompt = st.chat_input("Say something") |
|
print('prompt now:', prompt) |
|
print('----------'*5) |
|
|
|
if prompt: |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
with st.chat_message("user"): |
|
st.markdown(prompt) |
|
|
|
with st.chat_message("assistant"): |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
|
|
if radio_2 == '联网模式': |
|
print('联网模式入口,prompt:', prompt) |
|
input_message = prompt |
|
internet_search_result = search(input_message) |
|
search_prompt = [ |
|
f"Source:\nTitle: {result['name']}\nURL: {result['url']}\nContent: {result['snippet']}" for result in internet_search_result] |
|
prompt = "基于如下的互联网公开信息, 回答问题:\n\n" + \ |
|
"\n\n".join(search_prompt[:3]) + "\n\n问题: " + input_message + \ |
|
"你需要注意的是回答问题时必须用提问的语言(如英文或者中文)来提示:'答案基于互联网公开信息。'" + "\n\n答案: " |
|
|
|
|
|
st.session_state.messages.append( |
|
{"role": "user", "content": prompt}) |
|
|
|
for response in openai.ChatCompletion.create( |
|
model=st.session_state["openai_model"], |
|
messages=[ |
|
{"role": m["role"], "content": m["content"]} |
|
for m in st.session_state.messages |
|
], |
|
stream=True, |
|
): |
|
full_response += response.choices[0].delta.get( |
|
"content", "") |
|
message_placeholder.markdown(full_response + "▌") |
|
message_placeholder.markdown(full_response) |
|
st.session_state.messages.append( |
|
{"role": "assistant", "content": full_response}) |
|
st.session_state.messages = [] |
|
|
|
if radio_2 == '核心模式': |
|
print('GPT only starts!!!') |
|
print('messages:', st.session_state['messages']) |
|
for response in openai.ChatCompletion.create( |
|
model=st.session_state["openai_model"], |
|
messages=[ |
|
{"role": m["role"], "content": m["content"]} |
|
for m in st.session_state.messages |
|
], |
|
stream=True, |
|
|
|
|
|
|
|
): |
|
full_response += response.choices[0].delta.get( |
|
"content", "") |
|
message_placeholder.markdown(full_response + "▌") |
|
|
|
message_placeholder.markdown(full_response) |
|
st.session_state.messages.append( |
|
{"role": "assistant", "content": full_response}) |
|
|
|
|
|
|
|
|
|
def localKB_mode(username): |
|
|
|
|
|
|
|
|
|
|
|
clear_all() |
|
|
|
print('now starts the local KB version of ChatGPT') |
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if prompt := st.chat_input("Say something"): |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
with st.chat_message("user"): |
|
st.markdown(prompt) |
|
|
|
with st.status('检索中...', expanded=True, state='running') as status: |
|
with st.chat_message("assistant"): |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
|
|
|
|
|
|
QA_PROMPT_TMPL = ( |
|
"We have provided context information below. \n" |
|
"---------------------\n" |
|
"{context_str}" |
|
"\n---------------------\n" |
|
"Given all this information, please answer the following questions," |
|
"You MUST use the SAME language as the question:\n" |
|
"{query_str}\n") |
|
QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir=f"./{username}/") |
|
print('storage_context:',storage_context) |
|
index = load_index_from_storage(storage_context) |
|
|
|
|
|
query_engine = index.as_query_engine(streaming=True) |
|
|
|
|
|
|
|
|
|
|
|
llama_index_reply = query_engine.query(prompt) |
|
|
|
print('local KB reply:', llama_index_reply) |
|
|
|
|
|
|
|
|
|
|
|
message_placeholder.markdown(llama_index_reply) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def data_mode(): |
|
print('数据分析模式启动!') |
|
clear_all() |
|
|
|
uploaded_file_path = f'./{username}_upload.csv' |
|
|
|
print('file path:', uploaded_file_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
prompt = st.chat_input("Say something") |
|
print('prompt now:', prompt) |
|
print('----------'*5) |
|
|
|
if prompt: |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
with st.chat_message("user"): |
|
st.markdown(prompt) |
|
|
|
with st.chat_message("assistant"): |
|
async with CodeInterpreterSession() as session: |
|
|
|
|
|
environ_settings = """【背景要求】如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答: |
|
------------------------------------------------------------------------- |
|
1. 你需要用提问的语言来回答(如:中文提问你就用中文来回答,英文提问你就用英文来回答)。 |
|
2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark'。 |
|
3. 图上所有的文字全部翻译成<英文English>来表示。 |
|
4. 你回答的文字内容必须尽可能的详细且通俗易懂。 |
|
5. 回答时尽可能地展示分析所对应的图表,并提供分析结果。 你需要按如下格式提供内容: |
|
|
|
5.1 提供详细且专业的分析结果,提供足够的分析依据。 |
|
5.2 给出可能造成这一结果的可能原因有哪些? |
|
|
|
以上内容全部用【1/2/3这样的序列号格式】来表达。 |
|
------------------------------------------------------------------------- |
|
""" |
|
|
|
|
|
|
|
user_request = environ_settings + "\n\n" + \ |
|
"你需要完成以下任务:\n\n" + prompt + "\n\n" \ |
|
f"注:文件位置在 {uploaded_file_path}" |
|
user_request = str(prompt) |
|
print('user_request: \n', user_request) |
|
|
|
|
|
files = [File.from_path(str(uploaded_file_path))] |
|
|
|
|
|
|
|
|
|
|
|
with st.status('Thinking...', expanded=True, state='running') as status: |
|
|
|
|
|
response = await session.generate_response(user_msg=user_request, files=files, detailed_error=True) |
|
|
|
|
|
print("AI: ", response.content) |
|
full_response = response.content |
|
|
|
|
|
|
|
for i, file in enumerate(response.files): |
|
|
|
|
|
|
|
st.image(file.get_image(), width=None, |
|
output_format='PNG') |
|
|
|
|
|
|
|
st.write(full_response) |
|
status.update(label='complete', state='complete') |
|
|
|
|
|
st.session_state.messages.append( |
|
{"role": "assistant", "content": full_response}) |
|
|
|
await session.astop() |
|
|
|
|
|
|
|
|
|
import yaml |
|
from yaml.loader import SafeLoader |
|
with open('./config.yaml') as file: |
|
config = yaml.load(file, Loader=SafeLoader) |
|
authenticator = stauth.Authenticate( |
|
config['credentials'], |
|
config['cookie']['name'], |
|
config['cookie']['key'], |
|
config['cookie']['expiry_days'], |
|
config['preauthorized'] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'''''' |
|
|
|
|
|
user, authentication_status, username = authenticator.login('用户登录', 'sidebar') |
|
|
|
|
|
if authentication_status: |
|
with st.sidebar: |
|
st.markdown( |
|
""" |
|
<style> |
|
[data-testid="stSidebar"][aria-expanded="true"]{ |
|
min-width: 600px; |
|
max-width: 600px; |
|
} |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
st.header(f'**欢迎 **{username}** 来到人工智能的世界** ♠') |
|
st.write(f'_Welcome and Hope U Enjoy Staying Here!_') |
|
authenticator.logout('登出', 'sidebar') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.container(): |
|
st.markdown("#### 快速上手指南") |
|
|
|
|
|
with st.text(body="说明"): |
|
st.markdown("* 为了保护数据与隐私,所有对话均不会被保存,刷新页面立即删除。敬请放心。") |
|
with st.text(body="说明"): |
|
st.markdown("* “GPT-4”回答质量极佳,但速度缓慢、且不支持长文。建议适当使用。") |
|
with st.text(body="说明"): |
|
st.markdown("* “联网模式”和“知识库模式”均基于检索功能,仅限一轮对话,不会保持之前的会话记录。") |
|
with st.text(body="说明"): |
|
st.markdown( |
|
"* “数据模式”暂时只支持1000个单元格以内的数据分析,单元格中的内容不支持中文数据(表头也尽量不使用中文)。一般运行时间在1-5分钟左右,期间需要保持网络畅通。") |
|
with st.text(body="说明"): |
|
st.markdown("* “数据模式”推荐上传csv格式的文件,部分Excel文件容易出现数据不兼容的情况。") |
|
|
|
st.markdown("#### 参考资料") |
|
with st.expander(label="**核心模式的专用提示词Prompt示例**", expanded=False): |
|
|
|
st.code( |
|
body="继续用中文写一篇关于 [文章主题] 的文章,以下列句子开头:[文章开头]。", language='plaintext') |
|
st.code(body="将以下文字概括为 100 个字,使其易于阅读和理解。避免使用复杂的句子结构或技术术语。", |
|
language='plaintext') |
|
st.code(body="给我出一个迪奥2023春季发布会活动策划。", language='plaintext') |
|
st.code(body="帮我按照正式会议结构写一个会邀:主题是xx手机游戏立项会议。", language='plaintext') |
|
st.code(body="帮我写一个车内健康监测全场景落地的项目计划,用表格。", language='plaintext') |
|
st.code( |
|
body="同时掷两枚质地均匀的骰子,则两枚骰子向上的点数之和为 7 的概率是多少。", language='plaintext') |
|
st.code(body="写一篇产品经理的演讲稿,注意使用以下词汇: 赋能,抓手,中台,闭环,落地,漏斗,沉淀,给到,同步,对齐,对标,迭代,拉通,打通,升级,交付,聚焦,倒逼,复盘,梳理,方案,联动,透传,咬合,洞察,渗透,兜底,解耦,耦合,复用,拆解。", language='plaintext') |
|
|
|
with st.expander(label="**数据模式的专用提示词Prompt示例**", expanded=False): |
|
|
|
st.code(body="分析此数据集并绘制一些'有趣的图表'。", language='python') |
|
st.code( |
|
body="对于这个文件中的数据,你需要要找出[X,Y]数据之间的寻找'相关性'。", language='python') |
|
st.code(body="对于这个文件中的[xxx]数据给我一个'整体的分析'。", language='python') |
|
st.code(body="对于[xxx]数据给我一个'直方图',提供图表,并给出分析结果。", language='python') |
|
st.code(body="对于[xxx]数据给我一个'小提琴图',并给出分析结果。", language='python') |
|
st.code( |
|
body="对于[X,Y,Z]数据在一个'分布散点图 (stripplot)',所有的数据在一张图上展现, 并给出分析结果。", language='python') |
|
st.code(body="对于[X,Y]数据,进行'T检验',你需要展示图表,并给出分析结果。", |
|
language='python') |
|
st.code(body="对于[X,Y]数据给我一个3个类别的'聚类分析',并给出分析结果。", |
|
language='python') |
|
|
|
col1, col2 = st.columns(spec=[1, 2]) |
|
radio_2 = col2.radio(label='模式选择', options=[ |
|
'核心模式', '联网模式', '知识库模式', '数据模式'], horizontal=True, label_visibility='visible') |
|
radio_1 = col1.radio(label='ChatGPT版本', options=[ |
|
'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible') |
|
|
|
elif authentication_status == False: |
|
st.error('⛔ 用户名或密码错误!') |
|
elif authentication_status == None: |
|
st.warning('⬅ 请先登录!') |
|
|
|
|
|
def upload_file(uploaded_file): |
|
if uploaded_file is not None: |
|
filename = uploaded_file.name |
|
|
|
try: |
|
if '.pdf' in filename: |
|
|
|
PyPDF2.PdfReader(uploaded_file) |
|
|
|
|
|
spinner = st.spinner('正在为您解析新知识库...请耐心等待') |
|
|
|
with spinner: |
|
import localKB_construct |
|
|
|
localKB_construct.process_file(uploaded_file, username) |
|
|
|
save_database_info.save_database_info(f'./{username}/database_name.csv', filename, str(datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d %H:%M"))) |
|
st.markdown('新知识库解析成功,请务必刷新页面,然后开启对话 🔃') |
|
|
|
|
|
else: |
|
if '.csv' in filename: |
|
csv_file = pd.read_csv(uploaded_file) |
|
csv_file.to_csv(f'./{username}_upload.csv', encoding='utf-8', index=False) |
|
st.write(csv_file[:3]) |
|
else: |
|
xls_file = pd.read_excel(uploaded_file) |
|
xls_file.to_csv(f'./{username}_upload.csv', index=False) |
|
st.write(xls_file[:3]) |
|
|
|
uploaded_file_name = "File_provided" |
|
temp_dir = tempfile.TemporaryDirectory() |
|
|
|
|
|
|
|
with open(f'./{username}_upload.csv', 'wb') as output_temporary_file: |
|
|
|
|
|
|
|
output_temporary_file.write(uploaded_file.getvalue()) |
|
|
|
|
|
except Exception as e: |
|
st.write(e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
import asyncio |
|
try: |
|
if radio_2 == "核心模式": |
|
print(f'radio 选择了 {radio_2}') |
|
|
|
asyncio.run(text_mode()) |
|
|
|
if radio_2 == "联网模式": |
|
print(f'radio 选择了 {radio_2}') |
|
asyncio.run(text_mode()) |
|
|
|
if radio_2 == "知识库模式": |
|
print(f'radio 选择了 {radio_2}') |
|
|
|
path = f'./{username}/vector_store.json' |
|
if os.path.exists(path): |
|
database_info = pd.read_csv(f'./{username}/database_name.csv') |
|
current_database_name = database_info.iloc[-1][0] |
|
current_database_date = database_info.iloc[-1][1] |
|
database_claim = f"当前知识库为:{current_database_name},创建于{current_database_date}。可以开始提问!" |
|
st.markdown(database_claim) |
|
|
|
|
|
uploaded_file = st.file_uploader( |
|
"选择上传一个新知识库", type=(["pdf"])) |
|
|
|
if uploaded_file is not None: |
|
|
|
upload_file(uploaded_file) |
|
|
|
localKB_mode(username) |
|
|
|
|
|
if radio_2 == "数据模式": |
|
uploaded_file = st.file_uploader( |
|
"选择一个文件", type=(["csv", "xlsx", "xls"])) |
|
|
|
if uploaded_file is not None: |
|
uploaded_file_path = upload_file(uploaded_file) |
|
asyncio.run(data_mode()) |
|
except: |
|
|
|
pass |
|
|