coder001 / app.py
allinaigc's picture
Upload 2 files
085b4f0
raw
history blame
23.2 kB
'''
参考: https://github.com/shroominic/codeinterpreter-api
1. 可以存在本地,然后再调出来。 working.
1. 可以在临时文件夹中读取文件。
1. 可以直接在内存中读出图片。
1. 中文字体成功。
from matplotlib.font_manager import FontProperties
myfont=FontProperties(fname='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/rawdata/SimHei.ttf')
sns.set_style('whitegrid',{'font.sans-serif':['simhei','Arial']})
1. 解决了account login的问题,主要格式:
## 需要严格的按照yaml文件的格式来定义如下几个字段。
for un, name, pw in zip(users, names, hashed_passwords):
# user_dict = {"name":name,"password":pw}
user_dict = {"name": un, "password": pw}
# credentials["usernames"].update({un:user_dict})
credentials["usernames"].update({un: user_dict})
'''
# TODO:1. Chinese display isssue. 2. account system.
from dotenv import load_dotenv # pip3 install python-dotenv
import database as db
from deta import Deta # pip3 install deta
import requests
from codeinterpreterapi import CodeInterpreterSession, File
import streamlit as st
from codeinterpreterapi import CodeInterpreterSession
import openai
import os
import matplotlib.pyplot as plt
import xlrd
import pandas as pd
# import csv
import tempfile
from tempfile import NamedTemporaryFile
import pathlib
from pathlib import Path
from matplotlib.font_manager import FontProperties
import seaborn as sns
from time import sleep
import streamlit_authenticator as stauth
import database as db # python文件同目录下的.py程序,直接导入。
import deta
os.environ["OPENAI_API_KEY"] = os.environ['user_token']
openai.api_key = os.environ['user_token']
bing_search_api_key = os.environ['bing_api_key']
bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search'
# os.environ["VERBOSE"] = "True" # 可以看到具体的错误?
# # #* 如果碰到接口问题,可以启用如下设置。
# openai.proxy = {
# "http": "http://127.0.0.1:7890",
# "https": "http://127.0.0.1:7890"
# }
# layout settings.
st.title("专业版大语言模型商业智能中心")
st.subheader("Artificial Intelligence Backend Center for Professionals")
# clear conversion.
reset_button_key = "reset_button"
reset_button = st.button(label=("扫清世间烦恼,清除所有记录,并开启一轮新对话 ▶"),
key=reset_button_key, use_container_width=True, type="secondary")
if reset_button:
st.session_state.conversation = None
st.session_state.chat_history = None
st.session_state.messages = []
message_placeholder = st.empty()
# with tab2:
def upload_file(uploaded_file):
if uploaded_file is not None:
filename = uploaded_file.name
st.write(filename) # print out the whole file name to validate.
try:
if '.csv' in filename:
csv_file = pd.read_csv(uploaded_file)
csv_file.to_csv('./upload.csv', encoding='utf-8', index=False)
st.write(csv_file[:3]) # 这里只是显示文件,后面需要定位文件所在的绝对路径。
else:
xls_file = pd.read_excel(uploaded_file)
xls_file.to_csv('./upload.csv', index=False)
st.write(xls_file[:3])
except Exception as e:
st.write(e)
uploaded_file_name = "File_provided"
temp_dir = tempfile.TemporaryDirectory()
# ! working.
uploaded_file_path = pathlib.Path(temp_dir.name) / uploaded_file_name
# with open('./upload.csv', 'wb') as output_temporary_file:
with open(f'./{name}_upload.csv', 'wb') as output_temporary_file:
# print(f'./{name}_upload.csv')
# ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。
# output_temporary_file.write(uploaded_file.getvalue())
output_temporary_file.write(uploaded_file.getvalue())
# st.write(uploaded_file_path) # * 可以查看文件是否真实存在,然后是否可以
# st.write('Now file saved successfully.')
return None
def search(query):
# Construct a request
# mkt = 'en-EN'
mkt = 'zh-CN'
params = {'q': query, 'mkt': mkt}
headers = {'Ocp-Apim-Subscription-Key': bing_search_api_key}
# Call the API
try:
response = requests.get(bing_search_endpoint,
headers=headers, params=params)
response.raise_for_status()
json = response.json()
return json["webPages"]["value"]
# print("\nJSON Response:\n")
# pprint(response.json())
except Exception as e:
raise e
# openai.api_key = st.secrets["OPENAI_API_KEY"]
async def text_mode():
# Set a default model
if "openai_model" not in st.session_state:
st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
if radio_1 == 'GPT-3.5':
# print('----------'*5)
print('radio_1: GPT-3.5 starts!')
st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
else:
print('radio_1: GPT-4.0 starts!')
st.session_state["openai_model"] = "gpt-4"
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Display assistant response in chat message container
# if prompt := st.chat_input("Say something"):
prompt = st.chat_input("Say something")
print('prompt now:', prompt)
print('----------'*5)
# if prompt:
if prompt:
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
if radio_2 == '联网模式':
print('联网模式入口,prompt:', prompt)
input_message = prompt
internet_search_result = search(input_message)
search_prompt = [
f"Source:\nTitle: {result['name']}\nURL: {result['url']}\nContent: {result['snippet']}" for result in internet_search_result]
prompt = "基于如下的互联网公开信息, 回答问题:\n\n" + \
"\n\n".join(search_prompt[:3]) + "\n\n问题: " + input_message + \
"你需要注意的是回答问题时必须用提问的语言(如英文或者中文)来提示:'答案基于互联网公开信息。'" + "\n\n答案: " # 限制了只有3个搜索结果。
# prompt = "Use these sources to answer the question:\n\n" + "\n\n".join(search_prompt[0:3]) + "\n\nQuestion: " + input_message + "(注意:回答问题时请提示'以下答案基于互联网公开信息。')\n\n" + "\n\nAnswer: "
st.session_state.messages.append(
{"role": "user", "content": prompt})
for response in openai.ChatCompletion.create(
model=st.session_state["openai_model"],
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
stream=True,
):
full_response += response.choices[0].delta.get(
"content", "")
message_placeholder.markdown(full_response + "▌")
message_placeholder.markdown(full_response)
st.session_state.messages.append(
{"role": "assistant", "content": full_response})
st.session_state.messages = []
if radio_2 == '核心模式':
print('GPT only starts!!!')
print('messages:', st.session_state['messages'])
for response in openai.ChatCompletion.create(
model=st.session_state["openai_model"],
# messages=[
# {"role": m["role"], "content": m["content"]}
# for m in st.session_state.messages
# ],
messages=[{'role': 'system', 'content': 'you are ChatGPT'}, {
'role': 'user', 'content': prompt}],
stream=True,
):
full_response += response.choices[0].delta.get(
"content", "")
message_placeholder.markdown(full_response + "▌")
print('session completed!')
message_placeholder.markdown(full_response)
st.session_state.messages.append(
{"role": "assistant", "content": full_response})
async def data_mode():
print('数据分析模式启动!')
# uploaded_file_path = './upload.csv'
uploaded_file_path = f'./{name}_upload.csv'
# # st.write(f"passed file path in data_mode: {uploaded_file_path}")
# tmp1 = pd.read_csv('./upload.csv')
# st.write(tmp1[:5])
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Display assistant response in chat message container
# if prompt := st.chat_input("Say something"):
prompt = st.chat_input("Say something")
print('prompt now:', prompt)
print('----------'*5)
# if prompt:
if prompt:
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
async with CodeInterpreterSession() as session:
# user_request = "对于文件中的'SepalLengthCm’数据给我一个'直方图',提供图表,并给出分析结果"
#! 可以用设定dpi=300来输出高质量的图表。(注:图的解析度dpi设定为300)
environ_settings = """【背景要求】如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答:
-------------------------------------------------------------------------
1. 你需要用提问的语言来回答(如:中文提问你就用中文来回答,英文提问你就用英文来回答)。
2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark'。
3. 图上所有的文字全部翻译成<英文English>来表示。
4. 回答时尽可能地展示分析所对应的图表,并提供分析结果。 你需要按如下格式提供内容:
1. 提供详细且专业的分析结果,提供足够的分析依据。
2. 给出可能造成这一结果的可能原因有哪些?
以上内容全部用序列号格式来表达。
-------------------------------------------------------------------------
""" # seaborn中的palette参数可以设定图表的颜色,选项包括:deep, muted, pastel, bright, dark, colorblind,Spectral。更多参数可以参考:https://seaborn.pydata.org/generated/seaborn.color_palette.html。
# uploaded_file_path = upload_file()
user_request = environ_settings + "\n\n" + \
"你需要完成以下任务:\n\n" + prompt + "\n\n" \
f"注:文件位置在{uploaded_file_path}"
print('user_request: \n', user_request)
# 加载上传的文件,主要路径在上面代码中。
files = [File.from_path(str(uploaded_file_path))]
with st.status('Thinking...', expanded=True, state='running') as status:
# generate the response
response = await session.generate_response(
user_request, files=files
)
# output to the user
print("AI: ", response.content)
full_response = response.content
### full_response = "this is full response"
# for file in response.files:
for i, file in enumerate(response.files):
# await file.asave(f"/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/output{i}.png") ##working.
# st.image(file.get_image()) #! working.
# * 注意这里的设定,可以提高图片的精细程度。
st.image(file.get_image(), width=None,
output_format='PNG')
# message_placeholder.markdown(full_response + "▌") ## orignal code.
# message_placeholder.markdown(full_response) ## orignal code.
st.write(full_response)
status.update(label='complete', state='complete')
# TODO: 确认是否要记录所有的full response。
st.session_state.messages.append(
{"role": "assistant", "content": full_response})
await session.astop() # ! 确认需要关闭。
# st.session_state.messages.append({"role": "assistant", "content": full_response})
# authentication with a local yaml file.
# import yaml
# from yaml.loader import SafeLoader
# with open('/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/config.yaml') as file:
# config = yaml.load(file, Loader=SafeLoader)
# authenticator = stauth.Authenticate(
# config['credentials'],
# config['cookie']['name'],
# config['cookie']['key'],
# config['cookie']['expiry_days'],
# config['preauthorized']
# )
# authentication with a remove cloud-based database.
# 导入云端用户数据库。
# DETA_KEY = "c0zegv33efm_4MBTaoQAn76GzUfsZeKV64Uh9qMY3WZb"
# load_dotenv(".env")
# DETA_KEY = os.getenv("DETA_KEY")
# print(DETA_KEY)
# deta = Deta(DETA_KEY)
# mybase is the name of the database in Deta. You can change it to any name you want.
credentials = {"usernames":{}}
# credentials = {"users": {}}
# db = db()
users = []
email = []
passwords = []
names = []
for row in db.fetch_all_users():
# users.append(row["key"])
# names.append(row["username"])
users.append(row["username"])
email.append(row["email"])
names.append(row["key"])
passwords.append(row["password"])
hashed_passwords = stauth.Hasher(passwords).generate()
## 需要严格的按照yaml文件的格式来定义如下几个字段。
for un, name, pw in zip(users, names, hashed_passwords):
# user_dict = {"name":name,"password":pw}
user_dict = {"name": un, "password": pw}
# credentials["usernames"].update({un:user_dict})
credentials["usernames"].update({un: user_dict})
# ## sign-up模块,未完成。
# database_table = []
# # print(pd.DataFrame(credentials))
# for i in credentials['usernames'].keys():
# # print("i:",i)
# # print("name",credentials['usernames'][i]['name'])
# # print("password",credentials['usernames'][i]['password'])
# database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']])
# print("database_table:",database_table)
authenticator = stauth.Authenticate(
credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30)
user, authentication_status, username = authenticator.login('用户登录', 'main')
# print("name", name, "username", username)
# ## sign-up widget,未完成。
# try:
# if authenticator.register_user('新用户注册', preauthorization=False):
# # for list in database_table:
# # db.update_user(username=list[0], name=list[1], password=list[2])
# db.update_user(username=list[-1][0], name=list[-1][1], password=list[-1][2])
# # st.success('User registered successfully')
# st.success('注册成功!')
# except Exception as e:
# st.error(e)
if authentication_status:
with st.sidebar:
st.markdown(
"""
<style>
[data-testid="stSidebar"][aria-expanded="true"]{
min-width: 600px;
max-width: 600px;
}
""",
unsafe_allow_html=True,
)
st.header(f'**欢迎 **{username}** 来到人工智能的世界** ♠')
st.write(f'_Welcome and Hope U Enjoy Staying Here!_')
authenticator.logout('登出', 'sidebar')
## reset password widget
# try:
# if authenticator.reset_password(st.session_state["username"], 'Reset password'):
# st.success('Password modified successfully')
# except Exception as e:
# st.error(e)
# with st.header(body="欢迎"):
# st.markdown("# 欢迎使用大语言模型商业智能中心")
# with st.expander(label=("**重要的使用注意事项**"), expanded=True):
with st.container():
st.markdown("#### 快速上手指南")
# with st.text(body="说明"):
# st.markdown("* 重启一轮新对话时,只需要刷新页面(按Ctrl/Command + R)即可。")
with st.text(body="说明"):
st.markdown("* 为了保护数据与隐私,所有对话均不会被保存,刷新页面立即删除。敬请放心。")
with st.text(body="说明"):
st.markdown("* “GPT-4”回答质量极佳,但速度缓慢、且不支持长文。建议适当使用。")
with st.text(body="说明"):
st.markdown("* “联网模式”与搜索引擎一致,仅限一轮对话,不会保持之前的会话记录。")
with st.text(body="说明"):
st.markdown(
"* “数据模式”暂时只支持1000个单元格以内的数据分析,单元格中的内容不支持中文数据(表头也尽量不使用中文)。一般运行时间在1-5分钟左右,期间需要保持网络畅通。")
with st.text(body="说明"):
st.markdown("* “数据模式”推荐上传csv格式的文件,部分Excel文件容易出现数据不兼容的情况。")
st.markdown("#### 参考资料")
with st.expander(label="**核心模式的专用提示词Prompt示例**", expanded=False):
# with st.subheader(body="提示词Prompt"):
st.code(
body="继续用中文写一篇关于 [文章主题] 的文章,以下列句子开头:[文章开头]。", language='plaintext')
st.code(body="将以下文字概括为 100 个字,使其易于阅读和理解。避免使用复杂的句子结构或技术术语。",
language='plaintext')
st.code(body="给我出一个迪奥2023春季发布会活动策划。", language='plaintext')
st.code(body="帮我按照正式会议结构写一个会邀:主题是xx手机游戏立项会议。", language='plaintext')
st.code(body="帮我写一个车内健康监测全场景落地的项目计划,用表格。", language='plaintext')
st.code(
body="同时掷两枚质地均匀的骰子,则两枚骰子向上的点数之和为 7 的概率是多少。", language='plaintext')
st.code(body="写一篇产品经理的演讲稿,注意使用以下词汇: 赋能,抓手,中台,闭环,落地,漏斗,沉淀,给到,同步,对齐,对标,迭代,拉通,打通,升级,交付,聚焦,倒逼,复盘,梳理,方案,联动,透传,咬合,洞察,渗透,兜底,解耦,耦合,复用,拆解。", language='plaintext')
with st.expander(label="**数据模式的专用提示词Prompt示例**", expanded=False):
# with st.subheader(body="提示词Prompt"):
st.code(body="分析此数据集并绘制一些'有趣的图表'。", language='python')
st.code(
body="对于这个文件中的数据,你需要要找出[X,Y]数据之间的寻找'相关性'。", language='python')
st.code(body="对于这个文件中的[xxx]数据给我一个'整体的分析'。", language='python')
st.code(body="对于[xxx]数据给我一个'直方图',提供图表,并给出分析结果。", language='python')
st.code(body="对于[xxx]数据给我一个'小提琴图',并给出分析结果。", language='python')
st.code(
body="对于[X,Y,Z]数据在一个'分布散点图 (stripplot)',所有的数据在一张图上展现, 并给出分析结果。", language='python')
st.code(body="对于[X,Y]数据,进行'T检验',你需要展示图表,并给出分析结果。",
language='python')
st.code(body="对于[X,Y]数据给我一个3个类别的'聚类分析',并给出分析结果。",
language='python')
col1, col2 = st.columns(spec=[1, 2])
radio_2 = col2.radio(label='模式选择', options=[
'核心模式', '联网模式', '知识库模式', '数据模式'], horizontal=True, label_visibility='visible')
# radio_1 = col1.selectbox(label='ChatGPT版本', options=[
# 'GPT-3.5', 'GPT-4.0'], label_visibility='visible')
radio_1 = col1.radio(label='ChatGPT版本', options=[
'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible')
elif authentication_status == False:
st.error('⛔ 用户名或密码错误!')
elif authentication_status == None:
st.warning('🔼 请先登录!')
if __name__ == "__main__":
import asyncio
try:
if radio_2 == "核心模式":
print(f'radio 选择了 {radio_2}')
# * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘
asyncio.run(text_mode())
if radio_2 == "联网模式":
print(f'radio 选择了 {radio_2}')
# * 也可以用命令执行这个python文件。’streamlit run frontend/app.py‘
asyncio.run(text_mode())
if radio_2 == "数据模式":
uploaded_file = st.file_uploader(
"选择一个文件", type=(["csv", "xlsx", "xls"]))
# 默认状态下没有上传文件,None,会报错。需要判断。
if uploaded_file is not None:
uploaded_file_path = upload_file(uploaded_file)
asyncio.run(data_mode())
except:
# st.markdown('**请先登录!**')
pass