Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import os | |
from openai import OpenAI | |
import json | |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY") | |
client = OpenAI(api_key=OPEN_AI_KEY) | |
def process_file(file): | |
# 读取文件 | |
if file.name.endswith('.csv'): | |
df = pd.read_csv(file) | |
else: | |
df = pd.read_excel(file) | |
df_string = df.to_string() | |
# 根据上传的文件内容生成问题 | |
questions = generate_questions(df_string) | |
df_summarise = generate_df_summarise(df_string) | |
# 返回按钮文本和 DataFrame 字符串 | |
return questions[0] if len(questions) > 0 else "", \ | |
questions[1] if len(questions) > 1 else "", \ | |
questions[2] if len(questions) > 2 else "", \ | |
df_summarise, \ | |
df_string | |
def generate_df_summarise(df_string): | |
# 使用 OpenAI 生成基于上传数据的问题 | |
sys_content = "你是一個資料分析師,服務對象為老師,請精讀資料,使用 zh-TW" | |
user_content = f"請根據 {df_string},大概描述這張表的欄位敘述,以及內容的資料樣態與解析" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
print("=====messages=====") | |
print(messages) | |
print("=====messages=====") | |
request_payload = { | |
"model": "gpt-4-1106-preview", | |
"messages": messages, | |
"max_tokens": 2000, | |
} | |
response = client.chat.completions.create(**request_payload) | |
df_summarise = response.choices[0].message.content.strip() | |
print("=====df_summarise=====") | |
print(df_summarise) | |
print("=====df_summarise=====") | |
return df_summarise | |
def generate_questions(df_string): | |
# 使用 OpenAI 生成基于上传数据的问题 | |
sys_content = "你是一個資料分析師,user為老師,請精讀資料,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW" | |
user_content = f"請根據 {df_string} 生成三個問題,並用 JSON 格式返回 questions:[q1, q2, q3]" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
response_format = { "type": "json_object" } | |
print("=====messages=====") | |
print(messages) | |
print("=====messages=====") | |
request_payload = { | |
"model": "gpt-4-1106-preview", | |
"messages": messages, | |
"max_tokens": 2000, | |
"response_format": response_format | |
} | |
response = client.chat.completions.create(**request_payload) | |
questions = json.loads(response.choices[0].message.content)["questions"] | |
print("=====json_response=====") | |
print(questions) | |
print("=====json_response=====") | |
return questions | |
def send_question(question, df_string_output, chat_history): | |
# 当问题按钮被点击时调用此函数 | |
return respond(question, df_string_output, chat_history) | |
def respond(user_message, df_string_output, chat_history): | |
print("=== 變數:user_message ===") | |
print(user_message) | |
print("=== 變數:chat_history ===") | |
print(chat_history) | |
sys_content = f"你是一個資料分析師,請用 {df_string_output} 為資料進行對話,使用 zh-TW" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_message} | |
] | |
print("=====messages=====") | |
print(messages) | |
print("=====messages=====") | |
request_payload = { | |
"model": "gpt-4-1106-preview", | |
"messages": messages, | |
"max_tokens": 2000 # 設定一個較大的值,可根據需要調整 | |
} | |
response = client.chat.completions.create(**request_payload) | |
print(response) | |
response_text = response.choices[0].message.content.strip() | |
# 更新聊天历史 | |
new_chat_history = (user_message, response_text) | |
if chat_history is None: | |
chat_history = [new_chat_history] | |
else: | |
chat_history.append(new_chat_history) | |
# 返回聊天历史和空字符串清空输入框 | |
return "", chat_history | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
file_upload = gr.File(label="Upload your file") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label="Message") | |
send_button = gr.Button("Send") | |
with gr.Column(): | |
with gr.Group(): | |
df_string_output = gr.Textbox(label="raw data") | |
with gr.Group(): | |
gr.Markdown("## 這是一張什麼表?") | |
df_summarise = gr.Textbox() | |
with gr.Group(): | |
gr.Markdown("## 常用問題") | |
btn_1 = gr.Button() | |
btn_2 = gr.Button() | |
btn_3 = gr.Button() | |
send_button.click( | |
respond, | |
inputs=[msg, df_string_output, chatbot], | |
outputs=[msg, chatbot] | |
) | |
# 连接按钮点击事件 | |
btn_1.click(respond, inputs=[btn_1, df_string_output, chatbot], outputs=[msg, chatbot]) | |
btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot]) | |
btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot]) | |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output) | |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output]) | |
demo.launch() | |