|
''' |
|
使用OpenAI的官方Assistant,实现code_interpreter功能: |
|
1. 使用OpenAI官方的API教程: |
|
https://cookbook.openai.com/examples/assistants_api_overview_python (重要) |
|
https://platform.openai.com/docs/assistants/overview |
|
2. YT视频: https://www.youtube.com/watch?v=5rcjGjgJNQc |
|
3. 基础代码参考:openai_interpreter.py |
|
4. 函数化text_response和image_response和prompt。 |
|
5. 中文字体显示还是有问题,现在妥协的方式是转换成英文来显示。不影响分析过程和结果。 |
|
|
|
##TEMP |
|
|
|
import matplotlib |
|
import seaborn as sns |
|
SimHei = FontProperties(fname='SimHei.ttf') |
|
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] |
|
sns.set(rc={'axes.facecolor':'#FFF9ED','figure.facecolor':'#FFF9ED'}, palette='dark', font='SimHei') |
|
fontproperties = SimHei |
|
|
|
每次输出图表时,你都需要在matplotlib和seaborn中安装中文字体'SimHei',安装的路径在:'/Users/yunshi/Downloads/360Data/Data Center/Working-On Task/演讲与培训/2023ChatGPT/Coding/code_interpreter/SimHei.ttf'。然后,你需要在mattplotlib和seaborn中设置字体为'SimHei'。 |
|
|
|
''' |
|
|
|
|
|
|
|
import csv |
|
import os |
|
import pathlib |
|
import re |
|
import tempfile |
|
import time |
|
import warnings |
|
from itertools import product |
|
from pathlib import Path |
|
from re import sub |
|
from tempfile import NamedTemporaryFile |
|
from time import sleep |
|
|
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import openai |
|
import pandas as pd |
|
import requests |
|
import seaborn as sns |
|
from dotenv import load_dotenv |
|
from matplotlib.pyplot import style |
|
from rich import print |
|
from tqdm import tqdm, tqdm_notebook, trange |
|
import chatgpt |
|
|
|
import streamlit as st |
|
import add_fonts |
|
|
|
warnings.filterwarnings('ignore') |
|
sns.set() |
|
|
|
load_dotenv() |
|
|
|
os.environ["OPENAI_API_KEY"] = os.environ['user_token'] |
|
openai.api_key = os.environ['user_token'] |
|
|
|
from openai import Client |
|
client = Client() |
|
|
|
|
|
import json |
|
def show_json(name=None,obj=None): |
|
|
|
print(name,":",json.loads(obj.model_dump_json())) |
|
print("--"*40) |
|
|
|
def save_json(obj): |
|
json_message = json.dumps(obj.model_dump_json(), indent=4) |
|
with open('message.json', 'w') as file: |
|
file.write(json_message) |
|
|
|
|
|
def openai_assistant(prompt=None, filepath=None, username=None): |
|
client = Client() |
|
file = client.files.create( |
|
file=open(filepath, 'rb'), |
|
|
|
purpose='assistants', |
|
) |
|
|
|
|
|
|
|
assistant = client.beta.assistants.create( |
|
name="AI Expert", |
|
|
|
instructions=""" |
|
你是一个强大的AI助手。当被问到一个问题时,你需要根据提供给你的文件中的信息来回答这个问题。如果我没有告诉你任何定制化的要求,那么请你按照以下的默认要求来回答: |
|
------------------------------------------------------------------------- |
|
1. 你需要用我提问的语言来回答。 |
|
2. 如果要求你输出图表,那么图的解析度dpi需要设定为600。图尽量使用seaborn库。 |
|
3. 图表上如果有非英文的文字,那么你需要将字体翻译为英文,然后显示。 |
|
4. 你回答的文字内容必须尽可能的详细且通俗易懂。 |
|
5. 回答时尽可能地展示分析所对应的图表,并提供分析结果。 你需要按如下格式提供内容: |
|
5.1 提供详细且专业的分析结果,提供足够的分析依据。 |
|
5.2 给出可能造成这一结果的可能原因有哪些? |
|
以上内容全部用1, 2, 3这样的序列号格式来表达。 |
|
""", |
|
|
|
tools=[{"type": "code_interpreter"}], |
|
|
|
model="gpt-3.5-turbo-1106", |
|
file_ids=[file.id], |
|
|
|
) |
|
|
|
|
|
|
|
|
|
thread = client.beta.threads.create( |
|
messages=[ |
|
{ |
|
"role": "user", |
|
|
|
"content": prompt, |
|
|
|
"file_ids": [file.id] |
|
} |
|
], |
|
) |
|
run = client.beta.threads.runs.create( |
|
thread_id = thread.id, |
|
assistant_id = assistant.id, |
|
) |
|
|
|
run = client.beta.threads.runs.retrieve( |
|
run_id = run.id, |
|
thread_id = thread.id, |
|
timeout=100, |
|
) |
|
|
|
|
|
import time |
|
def wait_on_run(run, thread): |
|
while run.status == "queued" or run.status == "in_progress": |
|
run = client.beta.threads.runs.retrieve( |
|
thread_id=thread.id, |
|
run_id=run.id, |
|
) |
|
time.sleep(0.5) |
|
return run |
|
run = wait_on_run(run, thread) |
|
|
|
|
|
messages = client.beta.threads.messages.list(thread_id=thread.id) |
|
|
|
show_json(name='messages:',obj=messages) |
|
print('--'*40) |
|
save_json(obj=messages) |
|
|
|
|
|
|
|
|
|
|
|
image_response = [] |
|
imagefile_count = 0 |
|
imagefile_position = [] |
|
for i in range(len(messages.data)): |
|
for j in range(len(messages.data[i].content)): |
|
try: |
|
if messages.data[i].content[j].image_file: |
|
imagefile_count += 1 |
|
imagefile_position.append((i,j)) |
|
except: |
|
pass |
|
|
|
print('--'*30) |
|
print("总共有几张图片?:", imagefile_count) |
|
print('--'*30) |
|
|
|
print('start the image and text repsonse process!') |
|
|
|
image_name = [] |
|
image_files = [] |
|
for x, y in imagefile_position: |
|
random_num = np.random.randint(10000, 50000) |
|
print('x,y=', x, y) |
|
try: |
|
if messages.data[x].content[y].image_file: |
|
|
|
image_file = openai.files.content(file_id=messages.data[x].content[y].image_file.file_id) |
|
image_files.append(image_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
pass |
|
|
|
|
|
text_response_num = 0 |
|
my_msg = [] |
|
for x in range(5): |
|
for y in range(5): |
|
try: |
|
if messages.data[x].content[y].text: |
|
print('x, y=', x, y) |
|
my_msg.append(messages.data[x].content[y].text.value) |
|
text_response_num += 1 |
|
except: |
|
pass |
|
final_msg = sorted(my_msg[1:], reverse=True) |
|
text_response = str() |
|
for i in range(len(final_msg)): |
|
text_response += final_msg[i] |
|
print('final_msg:', final_msg) |
|
print('总共有几个text response:', text_response_num) |
|
|
|
|
|
|
|
|
|
|
|
|
|
import chatgpt |
|
|
|
user_prompt = f"""首先,我会向你提供一段【文字内容】,这段文字中可能包括了一系列的多轮对话的内容。接着,我需要你根据这段文字中的内容整理成一段文字结论。你的回答风格需要很专业,包括:尽可能的包含统计数据、数字和专业的结论,不能有口语化的表达。【文字内容】如下{text_response}。""" |
|
final_answer = chatgpt.chatgpt(user_prompt=user_prompt) |
|
|
|
|
|
return messages, text_response, image_response, image_files, final_answer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|