allinaigc commited on
Commit
7fe89de
·
1 Parent(s): f78ed68

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -121
app.py CHANGED
@@ -2,60 +2,68 @@
2
  参考: https://github.com/shroominic/codeinterpreter-api
3
 
4
  1. 可以存在本地,然后再调出来。 working.
 
5
  1. 可以直接在内存中读出图片。
 
 
 
 
 
6
  '''
7
- # TODO:如何在内存中读取文件。
8
 
9
- import matplotlib as mpl
10
  from codeinterpreterapi import CodeInterpreterSession, File
11
  import streamlit as st
12
  from codeinterpreterapi import CodeInterpreterSession
13
  import openai
14
  import os
15
  import matplotlib.pyplot as plt
 
16
  import pandas as pd
17
- from io import StringIO
18
- import csv
19
  import tempfile
20
  from tempfile import NamedTemporaryFile
21
  import pathlib
22
  from pathlib import Path
23
- import matplotlib
24
  from matplotlib.font_manager import FontProperties
25
  import seaborn as sns
26
 
 
27
  os.environ["OPENAI_API_KEY"] = os.environ['user_token']
28
  openai.api_key = os.environ['user_token']
29
  os.environ["VERBOSE"] = "True" # 可以看到具体的错误?
30
 
31
- # 设置中文字体。
32
- # myfont = FontProperties(fname='SimHei.ttf') # NOTE: 注意这里的云服务器路径格式。
33
- # mpl.rcParams['font.family'] = ['myfont']
34
- # sns.set(font='myfont')
35
- # plt.rcParams['font.sans-serif'] = ['myfont']
36
- # plt.rcParams['font.family'] = 'sans-serif'
37
- # plt.title("这个是数据分布图", fontsize=12, fontproperties=myfont)
38
- # plt.xlabel('数据A', fontproperties=myfont)
39
-
40
-
41
- # # #* 如果碰到接口问题,可以启用如下设置。
42
  # openai.proxy = {
43
- # "http": "http://127.0.0.1:7890",
44
  # "https": "http://127.0.0.1:7890"
45
- # }
46
 
 
 
 
 
 
 
 
 
47
 
48
- # st.title("ChatGPT-like clone")
49
- st.title("大语言模型商业数据分析中心")
50
- st.subheader("Business Data Analytics Based Upon LLM")
51
  uploaded_file = st.file_uploader(
52
- "Choose a file", type=(["csv", "txt", "xlsx", "xls"]))
53
- # uploaded_file = st.file_uploader("选择一个文件", type=(["csv","txt","xlsx","xls"]))
54
- # st.write(uploaded_file)
55
  if uploaded_file is not None:
56
- # csv_file = csv.reader(uploaded_file)
57
- csv_file = pd.read_csv(uploaded_file)
58
- st.write(csv_file[:5]) # 这里只是显示文件,后面需要定位文件所在的绝对路径。
 
 
 
 
 
 
 
 
59
 
60
  uploaded_file_name = "File_provided"
61
  temp_dir = tempfile.TemporaryDirectory()
@@ -65,42 +73,61 @@ if uploaded_file is not None:
65
  # output_temporary_file.write(uploaded_file.read())
66
  # ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。
67
  output_temporary_file.write(uploaded_file.getvalue())
68
- st.write(uploaded_file_path) #* 可以查看文件是否真实存在,然后是否可以
69
-
70
- # how to read data inside streamlit.
71
- # # files = pd.read_csv(uploaded_file)
72
- # bytes_data = uploaded_file.getvalue()
73
- # # st.write(bytes_data)
74
-
75
- # # To convert to a string based IO:
76
- # stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
77
- # # st.write(stringio)
78
-
79
- # # To read file as string:
80
- # string_data = stringio.read()
81
- # # st.write(string_data)
82
-
83
- # # Can be used wherever a "file-like" object is accepted:
84
- # # dataframe = pd.read_csv(uploaded_file)
85
- # files = pd.read_csv(uploaded_file, encoding='utf-8')
 
 
 
 
86
 
87
  # openai.api_key = st.secrets["OPENAI_API_KEY"]
88
 
89
-
90
  async def main():
 
 
 
 
 
 
 
91
  if "openai_model" not in st.session_state:
92
- # st.session_state["openai_model"] = "gpt-3.5-turbo"
93
- # NOTE: data analysis module must use GPT-4.
 
 
 
 
 
94
  st.session_state["openai_model"] = "gpt-4"
95
 
 
96
  if "messages" not in st.session_state:
97
  st.session_state.messages = []
98
 
 
99
  for message in st.session_state.messages:
100
  with st.chat_message(message["role"]):
101
  st.markdown(message["content"])
102
 
103
- if prompt := st.chat_input("What is up?"):
 
104
  st.session_state.messages.append({"role": "user", "content": prompt})
105
  with st.chat_message("user"):
106
  st.markdown(prompt)
@@ -109,78 +136,114 @@ async def main():
109
  message_placeholder = st.empty()
110
  full_response = ""
111
 
112
- # 原始示例 https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
113
- # for response in openai.ChatCompletion.create(
114
- # model=st.session_state["openai_model"],
115
- # messages=[
116
- # {"role": m["role"], "content": m["content"]}
117
- # for m in st.session_state.messages
118
- # ],
119
- # stream=True,
120
- # ):
121
- # full_response += response.choices[0].delta.get("content", "")
122
- # message_placeholder.markdown(full_response + "")
123
-
124
- async with CodeInterpreterSession() as session:
125
- # user_request = "对于文件中的'SepalLengthCm’数据给我一个'直方图',提供图表,并给出分析结果"
126
- #! 可以用设定dpi=300来输出高质量的图表。(注:图的解析度dpi设定为300)
127
- # environ_settings = "【<默认要求> 如果我没有告诉你任何定制化的要求,那么请按照以下的默认要求来回答:1. 你需要用提问的语言来回答(即:如果我用中文提问,你就用中文来回答;我如果用英文提问吗,你就用英文来回答)。2. 如果要求你输出图表,那么图的解析度dpi需要设定为300。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='deep')。】" ## seaborn中的palette参数可以设定图表的颜色,选项包括:deep, muted, pastel, bright, dark, colorblind,Spectral。更多参数可以参考:https://seaborn.pydata.org/generated/seaborn.color_palette.html
128
- environ_settings = """【背景要求】如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答:
129
- -------------------------------------------------------------------------
130
- 1. 你需要用提问的语言来回答(如:中文提问你就用中文来回答,英文提问你就用英文来回答)。
131
- 2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='bright')。
132
- -------------------------------------------------------------------------
133
- """ # seaborn中的palette参数可以设定图表的颜色,选项包括:deep, muted, pastel, bright, dark, colorblind,Spectral。更多参数可以参考:https://seaborn.pydata.org/generated/seaborn.color_palette.html。
134
-
135
- # sns.set(font='myfont')
136
-
137
- user_request = environ_settings + "\n\n" + \
138
- "你需要完成以下任务:\n\n" + prompt + f"注:文件位置在{uploaded_file_path}"
139
- # print('user_request: \n', user_request)
140
-
141
- # 加载上传的文件,主要路径在上面代码中。
142
- files = [File.from_path(str(uploaded_file_path))]
143
-
144
- with st.status('processing...', expanded=True, state='running') as status:
145
-
146
- # generate the response
147
-
148
- response = await session.generate_response(
149
- user_request, files=files
150
- )
151
-
152
- # output to the user
153
- print("AI: ", response.content)
154
- full_response = response.content
155
- ### full_response = "this is full response"
156
-
157
- # for file in response.files:
158
- for i, file in enumerate(response.files):
159
- # await file.asave(f"/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/output{i}.png") ##working.
160
- # st.image(file.get_image() #! working.
161
- # file.show_image()
162
- myfont = FontProperties(fname='SimHei.ttf') # NOTE: 注意这里的云服务器路径格式。
163
- sns.axes_style("whitegrid", {'font.sans-serif':['myfont','Arial']})
164
- plt.rcParams['font.sans-serif'] = ['myfont']
165
- plt.title(label="这个是中文数据分布图", fontproperties=myfont)
166
-
167
-
168
- # * 注意这里的设定,可以提高图片的精细程度。
169
- file.show_image() #TODO: 看一下是否可以直接出图,图的质量和文字是否okay?
170
- st.image(file.get_image(), width=None,
171
- output_format='PNG')
172
-
173
- # message_placeholder.markdown(full_response + "▌") ## orignal code.
174
- # message_placeholder.markdown(full_response) ## orignal code.
175
- st.write(full_response)
176
- status.update(label='complete', state='complete')
177
-
178
- # TODO:看看是否缩进准确? 放在这里好像可以。
179
- await session.astop() # ! 确认需要关闭。
180
-
181
- st.session_state.messages.append(
182
- {"role": "assistant", "content": full_response})
183
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  if __name__ == "__main__":
186
  import asyncio
 
2
  参考: https://github.com/shroominic/codeinterpreter-api
3
 
4
  1. 可以存在本地,然后再调出来。 working.
5
+ 1. 可以在临时文件夹中读取文件。
6
  1. 可以直接在内存中读出图片。
7
+ 1. 中文字体成功。
8
+ from matplotlib.font_manager import FontProperties
9
+ myfont=FontProperties(fname='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/rawdata/SimHei.ttf')
10
+ sns.set_style('whitegrid',{'font.sans-serif':['simhei','Arial']})
11
+
12
  '''
13
+ # TODO
14
 
 
15
  from codeinterpreterapi import CodeInterpreterSession, File
16
  import streamlit as st
17
  from codeinterpreterapi import CodeInterpreterSession
18
  import openai
19
  import os
20
  import matplotlib.pyplot as plt
21
+ import xlrd
22
  import pandas as pd
23
+ # from io import StringIO
24
+ # import csv
25
  import tempfile
26
  from tempfile import NamedTemporaryFile
27
  import pathlib
28
  from pathlib import Path
 
29
  from matplotlib.font_manager import FontProperties
30
  import seaborn as sns
31
 
32
+
33
  os.environ["OPENAI_API_KEY"] = os.environ['user_token']
34
  openai.api_key = os.environ['user_token']
35
  os.environ["VERBOSE"] = "True" # 可以看到具体的错误?
36
 
37
+ # #* 如果碰到接口问题,可以启用如下设置。
 
 
 
 
 
 
 
 
 
 
38
  # openai.proxy = {
39
+ # "http": "http://127.0.0.1:7890",
40
  # "https": "http://127.0.0.1:7890"
41
+ # }
42
 
43
+ # layout settings.
44
+ st.title("个人大语言模型商业智能中心")
45
+ st.subheader("Artificial Intelligence Backend Center for Individuals")
46
+ # col1, col2 = st.columns(spec=[1, 2])
47
+ # radio_1 = col1.radio(label='ChatGPT版本', options=[
48
+ # 'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible')
49
+ # radio_2 = col2.radio(label='模式选择', options=[
50
+ # '核心模式', '联网模式', '数据模式'], horizontal=True, label_visibility='visible')
51
 
 
 
 
52
  uploaded_file = st.file_uploader(
53
+ "选择一个文件", type=(["csv", "xlsx", "xls"]))
54
+
 
55
  if uploaded_file is not None:
56
+ filename=uploaded_file.name
57
+ # st.write(filename) ## print out the whole file name to validate.
58
+ try:
59
+ if '.csv' in filename:
60
+ csv_file = pd.read_csv(uploaded_file)
61
+ st.write(csv_file[:3]) # 这里只是显示文件,后面需要定位文件所在的绝对路径。
62
+ else:
63
+ xls_file = pd.read_excel(uploaded_file)
64
+ st.write(xls_file[:3])
65
+ except Exception as e:
66
+ st.write(e)
67
 
68
  uploaded_file_name = "File_provided"
69
  temp_dir = tempfile.TemporaryDirectory()
 
73
  # output_temporary_file.write(uploaded_file.read())
74
  # ! 必须用这种格式读入内容,然后才可以写入temporary文件夹中。
75
  output_temporary_file.write(uploaded_file.getvalue())
76
+ st.write(uploaded_file_path) # * 可以查看文件是否真实存在,然后是否可以
77
+
78
+ import requests
79
+ bing_search_api_key = os.environ['bing_api_key']
80
+ bing_search_endpoint = 'https://api.bing.microsoft.com/v7.0/search'
81
+ def search(query):
82
+ # Construct a request
83
+ # mkt = 'en-EN'
84
+ mkt = 'zh-CN'
85
+ params = {'q': query, 'mkt': mkt}
86
+ headers = {'Ocp-Apim-Subscription-Key': bing_search_api_key}
87
+
88
+ # Call the API
89
+ try:
90
+ response = requests.get(bing_search_endpoint, headers=headers, params=params)
91
+ response.raise_for_status()
92
+ json = response.json()
93
+ return json["webPages"]["value"]
94
+ # print("\nJSON Response:\n")
95
+ # pprint(response.json())
96
+ except Exception as e:
97
+ raise e
98
 
99
  # openai.api_key = st.secrets["OPENAI_API_KEY"]
100
 
101
+ # async def main():
102
  async def main():
103
+ col1, col2 = st.columns(spec=[1, 2])
104
+ radio_1 = col1.radio(label='ChatGPT版本', options=[
105
+ 'GPT-3.5', 'GPT-4.0'], horizontal=True, label_visibility='visible')
106
+ radio_2 = col2.radio(label='模式选择', options=[
107
+ '核心模式', '联网模式', '数据模式'], horizontal=True, label_visibility='visible')
108
+
109
+ ## Set a default model
110
  if "openai_model" not in st.session_state:
111
+ st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
112
+
113
+ if radio_1 == 'GPT-3.5':
114
+ print('radio_1: GPT-3.5 starts!')
115
+ st.session_state["openai_model"] = "gpt-3.5-turbo-16k"
116
+ else:
117
+ print('radio_1: GPT-4.0 starts!')
118
  st.session_state["openai_model"] = "gpt-4"
119
 
120
+ # Initialize chat history
121
  if "messages" not in st.session_state:
122
  st.session_state.messages = []
123
 
124
+ # Display chat messages from history on app rerun
125
  for message in st.session_state.messages:
126
  with st.chat_message(message["role"]):
127
  st.markdown(message["content"])
128
 
129
+ # Display assistant response in chat message container
130
+ if prompt := st.chat_input("Ask something?"):
131
  st.session_state.messages.append({"role": "user", "content": prompt})
132
  with st.chat_message("user"):
133
  st.markdown(prompt)
 
136
  message_placeholder = st.empty()
137
  full_response = ""
138
 
139
+ if radio_2 == '数据分析模式':
140
+ print('数据分析模式启动!')
141
+
142
+ # clear cache to avoid any potential history problems.
143
+ st.cache_resource.clear()
144
+
145
+ with st.chat_message("assistant"):
146
+ # message_placeholder = st.empty()
147
+ # full_response = ""
148
+ async with CodeInterpreterSession() as session:
149
+ # user_request = "对于文件中的'SepalLengthCm’数据给我一个'直方图',提供图表,并给出分析结果"
150
+ #! 可以用设定dpi=300来输出高质量的图表。(注:图的解析度dpi设定为300)
151
+ environ_settings = """【背景要求】如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答:
152
+ -------------------------------------------------------------------------
153
+ 1. 你需要用提问的语言来回答(如:中文提问你就用中文来回答,英文提问你就用英文来回答)。
154
+ 2. 如果要求你输出图表,那么图的解析度dpi需要设定为300。图尽量使用seaborn库。seaborn库的参数设定:sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark'。
155
+ 3. 如果需要显示中文,那么设置如下:
156
+ 3.1 首先,你需要安装中文字体:
157
+ myfont=FontProperties(fname='/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/rawdata/SimHei.ttf')
158
+ 3.2 然后,你需要设定在matplotlib(plt)和seabornsns)中设定:
159
+ sns.set_style({'font.sans-serif':['Arial','SimHei']})
160
+ plt.rcParams['font.sans-serif'] = ['SimHei']
161
+ plt.rcParams['font.family']='sans-serif'
162
+ plt.title(fontsize = 18)
163
+ -------------------------------------------------------------------------
164
+ """ # seaborn中的palette参数可以设定图表的颜色,选项包括:deep, muted, pastel, bright, dark, colorblind,Spectral。更多参数可以参考:https://seaborn.pydata.org/generated/seaborn.color_palette.html。
165
+
166
+ user_request = environ_settings + "\n\n" + \
167
+ "你需要完成以下任务:\n\n" + prompt + \
168
+ f"注:文件位置在{uploaded_file_path}"
169
+ print('user_request: \n', user_request)
170
+
171
+ # 加载上传的文件,主要路径在上面代码中。
172
+ files = [File.from_path(str(uploaded_file_path))]
173
+
174
+ with st.status('processing...', expanded=True, state='running') as status:
175
+ # generate the response
176
+ response = await session.generate_response(
177
+ user_request, files=files
178
+ )
179
+
180
+ # output to the user
181
+ print("AI: ", response.content)
182
+ full_response = response.content
183
+ ### full_response = "this is full response"
184
+
185
+ # for file in response.files:
186
+ for i, file in enumerate(response.files):
187
+ # await file.asave(f"/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/output{i}.png") ##working.
188
+ # st.image(file.get_image()) #! working.
189
+ # * 注意这里的设定,可以提高图片的精细程度。
190
+ st.image(file.get_image(), width=None,
191
+ output_format='PNG')
192
+
193
+ # message_placeholder.markdown(full_response + "▌") ## orignal code.
194
+ # message_placeholder.markdown(full_response) ## orignal code.
195
+ st.write(full_response)
196
+ status.update(label='complete', state='complete')
197
+ # st.session_state.messages.append(
198
+ # {"role": "assistant", "content": full_response})
199
+
200
+ await session.astop() #! 确认需要关闭。
201
+ # st.session_state.messages.append({"role": "assistant", "content": full_response})
202
+
203
+ elif radio_2 == '联网模式':
204
+ # print('联网模式入口,prompt:', prompt)
205
+ input_message = prompt
206
+ internet_search_result = search(input_message)
207
+ search_prompt = [f"Source:\nTitle: {result['name']}\nURL: {result['url']}\nContent: {result['snippet']}" for result in internet_search_result]
208
+ prompt = "基于如下的互联网公开信息, 回答问题:\n\n" + "\n\n".join(search_prompt[:3]) + "\n\n问题: " + input_message + "你需要注意的是回答问题时必须用提问的语言(如英文或者中文)来提示:'答案基于互联网公开信息。'" + "\n\n答案: " ## 限制了只有3个搜索结果。
209
+ # prompt = "Use these sources to answer the question:\n\n" + "\n\n".join(search_prompt[0:3]) + "\n\nQuestion: " + input_message + "(注意:回答问题时请提示'以下答案基于互联网公开信息。')\n\n" + "\n\nAnswer: "
210
+
211
+ st.session_state.messages.append({"role": "user", "content": prompt})
212
+
213
+ for response in openai.ChatCompletion.create(
214
+ model=st.session_state["openai_model"],
215
+ messages=[
216
+ {"role": m["role"], "content": m["content"]}
217
+ for m in st.session_state.messages
218
+ ],
219
+ stream=True,
220
+ ):
221
+ full_response += response.choices[0].delta.get("content", "")
222
+ message_placeholder.markdown(full_response + "▌")
223
+ message_placeholder.markdown(full_response)
224
+ st.session_state.messages.append(
225
+ {"role": "assistant", "content": full_response})
226
+
227
+ elif radio_2 == '核心模式':
228
+ print('GPT only starts!!!')
229
+ print('st.session_state now:', st.session_state)
230
+ # st.session_state.messages.append({"role": "system", "content": 'You are a helpful AI assistant: ChatGPT.'})
231
+
232
+ for response in openai.ChatCompletion.create(
233
+ model=st.session_state["openai_model"],
234
+ messages=[
235
+ {"role": m["role"], "content": m["content"]}
236
+ for m in st.session_state.messages
237
+ ],
238
+ stream=True,
239
+ ):
240
+ # if len(response)>0:
241
+ full_response += response.choices[0].delta.get("content", "")
242
+ message_placeholder.markdown(full_response + "▌")
243
+ message_placeholder.markdown(full_response)
244
+ st.session_state.messages.append(
245
+ {"role": "assistant", "content": full_response})
246
+
247
 
248
  if __name__ == "__main__":
249
  import asyncio