""" 1. working! 用自己的方式实现了一个Qwen的数据分析代码。 1. working!在执行streamlit run命令前,先执行python add_fonts.py。这样可以确保中文字体被加载到当前的python执行环境中。 1. working!测试可以在局域网中访问。 1. 多个阶段实现这一功能: 1. 根据用户的prompt,用Qwen生成代码。 1. 根据生成的代码,用Python的exec等模块在本地执行代码。 1. 将所有的内容放入hist_message中,然后用Qwen生成结论。 1. 变量名尽量用’‘引号括起来,避免出现错误。 1. plt.rcParams['font.sans-serif'] = ['Microsoft YaHei UI' ## 在qwen数据分析模块中,这里最好用plt。 1. 需要把datafram的信息(如列名,数据类型)加入到prompt中,这样可以帮助用户更好的选择数据进行分析。 1. """ ##TODO: 1. re-submit button. # -*- coding: utf-8 -*- import requests import random # from keras.utils.np_utils import to_categorical # from keras.optimizers import SGD # from keras.layers import Dense, Activation, LSTM, Dropout, SimpleRNN, SimpleRNNCell # from keras.models import Sequential # import keras from sklearn.preprocessing import StandardScaler import numpy as np import pandas as pd import matplotlib.pyplot as plt from dateutil.relativedelta import relativedelta from scipy.optimize import minimize import statsmodels.formula.api as smf import statsmodels.tsa.api as smt import statsmodels.api as sm import scipy.stats as scs from itertools import product from tqdm import tqdm_notebook, tqdm, trange import time import pretty_errors import seaborn as sns import sklearn from matplotlib.pyplot import style from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score from sklearn.model_selection import TimeSeriesSplit import matplotlib.pylab as plt import warnings import re from re import sub import smtplib import io import os import contextlib import streamlit as st import streamlit_authenticator as stauth import random from http import HTTPStatus import dashscope from io import StringIO from PIL import Image import add_fonts import tempfile from tempfile import NamedTemporaryFile warnings.filterwarnings('ignore') ### 参考: ## export DASHSCOPE_API_KEY="sk-948adb3e65414e55961a9ad9d22d186b" dashscope.api_key = "sk-948adb3e65414e55961a9ad9d22d186b" ### make it look nice from the start st.set_page_config(layout='wide', initial_sidebar_state='auto', page_icon="🤖", page_title="本地化大模型智能数据分析演示") ## layout settings. st.title("本地化大模型智能数据分析演示") st.subheader("Artificial Intelligence Data Analysis Center for Professionals") st.markdown("_声明:内容由人工智能生成,仅供参考。如果您本人使用或对外传播本服务生成的输出,您应当主动核查输出内容的真实性、准确性,避免传播虚假信息。_") data_show = st.empty() ### authentication with a local yaml file. import yaml from yaml.loader import SafeLoader with open('./config.yaml') as file: config = yaml.load(file, Loader=SafeLoader) authenticator = stauth.Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'], config['preauthorized'] ) # authentication with a remove cloud-based database. # 导入云端用户数据库。 # DETA_KEY = "c0zegv33efm_4MBTaoQAn76GzUfsZeKV64Uh9qMY3WZb" # load_dotenv(".env") # DETA_KEY = os.getenv("DETA_KEY") # print(DETA_KEY) # deta = Deta(DETA_KEY) # mybase is the name of the database in Deta. You can change it to any name you want. # credentials = {"usernames":{}} # # credentials = {"users": {}} # # db = db() # users = [] # email = [] # passwords = [] # names = [] # for row in db.fetch_all_users(): # users.append(row["username"]) # email.append(row["email"]) # names.append(row["key"]) # passwords.append(row["password"]) # hashed_passwords = stauth.Hasher(passwords).generate() ## 需要严格的按照yaml文件的格式来定义如下几个字段。 # for un, name, pw in zip(users, names, hashed_passwords): # # user_dict = {"name":name,"password":pw} # user_dict = {"name": un, "password": pw} # # credentials["usernames"].update({un:user_dict}) # credentials["usernames"].update({un: user_dict}) # ## sign-up模块,未完成。 # database_table = [] # # print(pd.DataFrame(credentials)) # for i in credentials['usernames'].keys(): # # print("i:",i) # # print("name",credentials['usernames'][i]['name']) # # print("password",credentials['usernames'][i]['password']) # database_table.append([i,credentials['usernames'][i]['name'],credentials['usernames'][i]['password']]) # print("database_table:",database_table) # authenticator = stauth.Authenticate( # credentials=credentials, cookie_name="joeshi_gpt", key='abcedefg', cookie_expiry_days=30) user, authentication_status, username = authenticator.login('用户登录', 'main') # print("name", name, "username", username) # ## sign-up widget,未完成。 # try: # if authenticator.register_user('新用户注册', preauthorization=False): # # for list in database_table: # # db.update_user(username=list[0], name=list[1], password=list[2]) # db.update_user(username=list[-1][0], name=list[-1][1], password=list[-1][2]) # # st.success('User registered successfully') # st.success('注册成功!') # except Exception as e: # st.error(e) # ## clear conversion. def reset_all(): # st.session_state.conversation = None st.session_state.chat_history = None st.session_state.messages = [] # st.session_state.messages message_placeholder = st.empty() data_show = st.empty() return None ##TODO:在带有聊天历史的情况下,重新整理这个模块。 def regenerate(user_input): ## 因为可能没有历史,第一次的时候,所以需要处理异常。 try: reset_all() main(user_input) except Exception as e: print('Error:', e) pass return None ### 对长传数据进行描述性统计,获得列表名 def dataframe_describe(df): df = pd.DataFrame(df) return pd.DataFrame({'column_name': df.columns, 'data_type': df.dtypes.values}) ## get the current time from datetime import datetime from pytz import timezone def get_current_time(): beijing_tz = timezone('Asia/Shanghai') beijing_time = datetime.now(beijing_tz) current_time = beijing_time.strftime('%H:%M:%S') return current_time if authentication_status: with st.sidebar: st.markdown( """