Spaces:

allinaigc
/

text2sql

Sleeping

App Files Files Community

allinaigc commited on May 3, 2024

Commit

5255efb

verified ·

1 Parent(s): e8abf91

Upload 6 files

Browse files

Files changed (6) hide show

2D.png +0 -0
app.py +441 -0
chatsql004.py +157 -0
myexcelDB.db +0 -0
qwen_response.py +45 -0
requirements.txt +17 -0

2D.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,441 @@

+'''
+'''
+##TODO: 1. 转换成Qwen的API。 2.
+import time
+import os
+import pandas as pd
+import streamlit as st
+from code_editor import code_editor
+# from utils.setup import setup_connexion, setup_session_state
+# from utils.vanna_calls import (
+#     generate_questions_cached,
+#     generate_sql_cached,
+#     run_sql_cached,
+#     generate_plotly_code_cached,
+#     generate_plot_cached,
+#     generate_followup_cached,
+# )
+# import chatsql003 ### 本地ChatGLT 版本。
+import chatsql004 ###Qwen API 版本。
+import sql_command
+# from streamlit_pandas_profiling import st_profile_report
+import dashscope
+from dotenv import load_dotenv
+load_dotenv()
+### 设置openai的API key
+dashscope.api_key = os.environ['dashscope_api_key']
+st.set_page_config(layout="wide", page_icon="🧩", page_title="本地化国产大模型数据库查询演示")
+# setup_connexion()
+def clear_all():
+    st.session_state.conversation = None
+    st.session_state.chat_history = None
+    st.session_state.messages = []
+    message_placeholder = st.empty()
+    st.session_state["my_question"] = None
+    return None
+## 原始的控制面板
+# st.sidebar.title("大模型控制面板")
+# st.sidebar.checkbox("Show SQL", value=True, key="show_sql")
+# st.sidebar.checkbox("Show Table", value=True, key="show_table")
+# st.sidebar.checkbox("Show Plotly Code", value=True, key="show_plotly_code")
+# st.sidebar.checkbox("Show Chart", value=True, key="show_chart")
+# st.sidebar.checkbox("Show Follow-up Questions", value=True, key="show_followup")
+# st.sidebar.button("清除记录，重启一轮新对话", on_click=setup_session_state, use_container_width=True, type='primary')
+st.title("本地化国产大模型数据库查询演示")
+# st.title("大语言模型SQL数据库查询中心")
+# st.info("声明：内容由人工智能生成，仅供参考。如果您本人使用或对外传播本服务生成的输出，您应当主动核查输出内容的真实性、准确性，避免传播虚假信息。") ## 颜色比较明显。
+st.markdown("_声明：内容由人工智能生成，仅供参考。如果您本人使用或对外传播本服务生成的输出，您应当主动核查输出内容的真实性、准确性，避免传播虚假信息。_")
+### Streamlit Sidebar 左侧工具栏
+# st.sidebar.write(st.session_state)
+with st.sidebar:
+    st.markdown(
+        """
+        <style>
+        [data-testid="stSidebar"][aria-expanded="true"]{
+            min-width: 500px;
+            max-width: 500px;
+        }
+        """,
+        unsafe_allow_html=True,
+    )
+    ### siderbar的题目。
+    ### siderbar的题目。
+    # st.header(f'**大语言模型专家系统工作设定区**')
+    st.header(f'**系统控制面板** ')
+    # st.header(f'**欢迎 **{username}** 使用本系统** ') ## 用户登录显示。
+    st.write(f'_Large Language Model Expert System Working Environment_')
+    st.sidebar.button("清除记录，重启一轮新对话", on_click=clear_all, use_container_width=True, type='primary')
+    # st.sidebar.button("清除记录，重启一轮新对话", on_click=setup_session_state, use_container_width=True, type='primary')
+    ### 展示当前数据库
+    # st.markdown("#### 当前数据库中的数据:")
+    with st.expander("#### 当前数据库中的数据", expanded=True):
+        my_db = pd.read_sql_table('table01', 'sqlite:///myexcelDB.db')
+        st.dataframe(my_db, width=400)
+        ## 在sidebar上的三个分页显示，用st.tabs实现。
+    tab_1, tab_2, tab_4 = st.tabs(['使用须知', '模型参数', '角色设定'])
+    # tab_1, tab_2, tab_3, tab_4 = st.tabs(['使用须知', '模型参数', '提示词模板', '系统角色设定'])
+    # with st.expander(label='**使用须知**', expanded=False):
+    with tab_1:
+        # st.markdown("#### 快速上手指南")
+        # with st.text(body="说明"):
+        #     st.markdown("* 重启一轮新对话时，只需要刷新页面（按Ctrl/Command + R）即可。")
+        with st.text(body="说明"):
+            st.markdown("""* 简介
+本系统使用大模型技术，将自然语言描述转换为SQL查询语句。用户可以输入自然语言问题，系统会自动生成相应的SQL语句，并返回查询结果。
+* 使用步骤
+在文本框中输入您的自然语言问题。
+系统会自动生成相应的SQL语句，并显示在下方。
+点击“满意，请执行语句”选项，可查看查询结果。点击”不满意，需要改正语句“可以手动修改SQL语句。
+* 注意事项
+本系统仍在开发中，可能会存在一些错误或不准确的地方。
+自然语言描述越清晰，生成的SQL语句越准确。
+系统支持的SQL语法有限，请尽量使用简单易懂的语法。
+""")
+        # with st.text(body="说明"):
+        #     st.markdown("""在构建大语言模型本地知识库问答系统时，需要注意以下几点：
+        #                 LLM的选择：LLM的选择应根据系统的应用场景和需求进行。对于需要处理通用问题的系统，可以选择通用LLM；对于需要处理特定领域问题的系统，可以选择针对该领域进行微调的LLM。
+        #                 本地知识库的构建：本地知识库应包含系统所需的所有知识。知识的组织方式应便于LLM的访问和处理。
+        #                 系统的评估：系统应进行充分的评估，以确保其能够准确地回答用户问题。
+        #                 大语言模型本地知识库问答系统具有以下优势：
+        #                 准确性：LLM和本地知识库的结合可以提高系统的准确性。
+        #                 全面性：LLM和本地知识库的结合可以使系统能够回答更广泛的问题。
+        #                 效率：LLM可以快速生成候选答案，而本地知识库可以快速评估候选答案的准确性。
+        #                 """)
+        # with st.text(body="说明"):
+        #     st.markdown(
+        #         "* “数据分析模式”暂时只支持1000个单元格以内的数据分析，单元格中的内容不支持中文数据（表头也尽量不使用中文）。一般运行时间在1至10分钟左右，期间需要保持网络畅通。")
+        # with st.text(body="说明"):
+        #     st.markdown("* “数据分析模式”推荐上传csv格式的文件，部分Excel文件容易出现数据不兼容的情况。")
+    ## 大模型参数
+    # with st.expander(label='**大语言模型参数**', expanded=True):
+    with tab_2:
+        max_tokens = st.slider(label='Max_Token（生成结果时最大字数）', min_value=100, max_value=4096, value=2048, step=100)
+        temperature = st.slider(label='Temperature (温度)', min_value=0.0, max_value=1.0, value=0.8, step=0.1)
+        top_p = st.slider(label='Top_P (核采样)', min_value=0.0, max_value=1.0, value=0.6, step=0.1)
+        frequency_penalty = st.slider(label='Frequency Penalty (重复度惩罚因子)', min_value=-2.0, max_value=2.0, value=1.0, step=0.1)
+        presence_penalty = st.slider(label='Presence Penalty (控制主题的重复度)', min_value=-2.0, max_value=2.0, value=1.0, step=0.1)
+    ## reset password widget
+    # try:
+    #     if authenticator.reset_password(st.session_state["username"], 'Reset password'):
+    #         st.success('Password modified successfully')
+    # except Exception as e:
+    #     st.error(e)
+    # with st.header(body="欢迎"):
+    #     st.markdown("# 欢迎使用大语言模型商业智能中心")
+    # with st.expander(label=("**重要的使用注意事项**"), expanded=True):
+    # with st.container():
+    ##NOTE： 在SQL场景去不需要展示这些提示词。
+    # with tab_3:
+    #     # st.markdown("#### Prompt提示词参考资料")
+    #     # with st.expander(label="**大语言模型基础提示词Prompt示例**", expanded=False):
+    #     st.code(
+    #         body="继续用中文写一篇关于 [文章主题] 的文章，以下列句子开头：[文章开头］。", language='plaintext')
+    #     st.code(body="将以下文字概括为 100 个字，使其易于阅读和理解。避免使用复杂的句子结构或技术术语。",
+    #             language='plaintext')
+    #     st.code(body="给我出一个迪奥2023春季发布会活动策划。", language='plaintext')
+    #     st.code(body="帮我按照正式会议结构写一个会邀：主题是xx手机游戏立项会议。", language='plaintext')
+    #     st.code(body="帮我写一个车内健康监测全场景落地的项目计划，用表格。", language='plaintext')
+    #     st.code(
+    #         body="同时掷两枚质地均匀的骰子，则两枚骰子向上的点数之和为 7 的概率是多少。", language='plaintext')
+    #     st.code(body="写一篇产品经理的演讲稿，注意使用以下词汇: 赋能,抓手,中台,闭环,落地,漏斗,沉淀,给到,同步,对齐,对标,迭代,拉通,打通,升级,交付,聚焦,倒逼,复盘,梳理,方案,联动,透传,咬合,洞察,渗透,兜底,解耦,耦合,复用,拆解。", language='plaintext')
+        # with st.expander(label="**数据分析模式的专用提示词Prompt示例**", expanded=False):
+        #     # with st.subheader(body="提示词Prompt"):
+        #     st.code(body="分析此数据集并绘制一些'有趣的图表'。", language='python')
+        #     st.code(
+        #         body="对于这个文件中的数据，你需要要找出[X,Y]数据之间的寻找'相关性'。", language='python')
+        #     st.code(body="对于这个文件中的[xxx]数据给我一个'整体的分析'。", language='python')
+        #     st.code(body="对于[xxx]数据给我一个'直方图'，提供图表，并给出分析结果。", language='python')
+        #     st.code(body="对于[xxx]数据给我一个'小提琴图'，并给出分析结果。", language='python')
+        #     st.code(
+        #         body="对于[X,Y,Z]数据在一个'分布散点图 (stripplot)'���所有的数据在一张图上展现, 并给出分析结果。", language='python')
+        #     st.code(body="对于[X，Y]数据，进行'T检验'，你需要展示图表，并给出分析结果。",
+        #             language='python')
+        #     st.code(body="对于[X，Y]数据给我一个3个类别的'聚类分析'，并给出分析结果。",
+        #             language='python')
+    with tab_4:
+        st.text_area(label='系统角色设定', value='你是一个人工智能，你需要回答我提出的问题，或者完成我交代的任务。你需要使用我提问的语言（如中文、英文）来回答。', height=200, label_visibility='hidden')
+myavatar = "./2D.png"
+def set_question(question):
+    st.session_state["my_question"] = question
+assistant_message_suggested = st.chat_message(
+    "assistant", avatar=myavatar
+)
+##! 注意在填写列名时，最佳的选择是使用双引号将名字框出来。否则LLM会不认识。
+if assistant_message_suggested.button("点击这里可以查看参考问题示例 ℹ️"):
+    questions = [
+        '你查一下长度超过10的数据有哪些？',
+        '你告诉我长度超过30且宽度超过10的数据有哪些？',
+        '宽度超过20且价格超过25的数据有哪些？',
+        '”长度“超过10且”宽度“超过15且”类别“是”甲“的数据有哪些？',
+        '长度超过10以上的价格的总和是多少？',
+        '长度排名前三的产品ID和价格和长度？',
+    ]
+    st.session_state["my_question"] = None
+    # questions = generate_questions_cached() ## orignal.
+    for i, question in enumerate(questions):
+        time.sleep(0.05)
+        button = st.button(
+            question,
+            on_click=set_question,
+            args=(question,),
+        )
+my_question = st.session_state.get("my_question", default=None)
+if my_question is None:
+    my_question = st.chat_input(
+        "请在这里提交您的查询问题",
+    )
+if my_question:
+    st.session_state["my_question"] = my_question
+    user_message = st.chat_message("user")
+    user_message.write(f"{my_question}")
+    # sql = generate_sql_cached(question=my_question) ## original. 核心函数。用ChatGPT输出SQL语句。
+    # sql = chatsql003.main(prompt=my_question) ## 通过本地LLM输出SQL语句。
+    sql = chatsql004.main(prompt=my_question) ## 通过本地LLM输出SQL语句。
+    if sql:
+        if st.session_state.get("show_sql", True):
+            assistant_message_sql = st.chat_message(
+                "assistant", avatar=myavatar
+            )
+            assistant_message_sql.code(sql, language="sql", line_numbers=True)
+        user_message_sql_check = st.chat_message("user")
+        user_message_sql_check.write(f"您是否满意上面的SQL查询语句答案?")
+        with user_message_sql_check:
+            happy_sql = st.radio(
+                # "Happy",
+                label="您的反馈:",
+                options=["未决定", "满意，请执行语句", "不满意，需要改正语句"], ## 去掉第一个happy选项。
+                # options=["", "yes", "no"], ## original。
+                key="radio_sql",
+                index=0,
+                horizontal=True,
+                label_visibility='visible',
+            )
+        # ## 非original。后期添加内容。Working.
+        if happy_sql == "未决定":
+            df = None
+            st.session_state["df"] = None
+        if happy_sql == "不满意，需要改正语句":
+            st.warning(
+                "请您手动修正SQL语句，按Shift + Enter提交"
+            )
+            sql_response = code_editor(sql, lang="sql")
+            fixed_sql_query = sql_response["text"]
+            if fixed_sql_query != "":
+                # df = run_sql_cached(sql=fixed_sql_query) ## original. 核心函数。
+                df = sql_command.llm_query(sql_command=fixed_sql_query)
+            else:
+                df = None
+        elif happy_sql == "满意，请执行语句":
+            # df = run_sql_cached(sql=sql) ## original. 核心函数。
+            df = sql_command.llm_query(sql_command=sql) ## working.
+        else:
+            df = None
+        if df is not None:
+            st.session_state["df"] = df
+        if st.session_state.get("df") is not None:
+            ## 显示查询的结果汇总信息
+            # with st.container():
+            query_info_describe = st.chat_message("assistant", avatar=myavatar)
+            if df is not None:
+                with query_info_describe:
+                    metric_col1, metric_col2, metric_col3, metric_col4 = st.columns(4)
+                    metric_col1.metric(label='查询结果中的行数', value=f"{df.shape[0]} 行", delta=None)
+                    metric_col2.metric(label='查询结果中的列数', value=f"{df.shape[1]} 列", delta=None)
+                    metric_col3.metric(label='查询结果中的单元格���', value=f"{df.size} 个", delta=None)
+                    metric_col4.metric(label='查询结果中的缺失值', value=f"{df.isnull().sum().sum()} 个", delta=None)
+            if st.session_state.get("show_table", True):
+                df = st.session_state.get("df")
+                assistant_message_table = st.chat_message(
+                    "assistant",
+                    avatar=myavatar
+                )
+                ### 目前是最佳显示方式
+                if len(df) > 10:
+                    assistant_message_table.markdown("**查询结果较多，当前只显示前10行数据**") ##TODO: 看看是否有其他更好的显示方式。
+                    assistant_message_table.dataframe(df.head(10), hide_index=True)
+                    # assistant_message_table.dataframe(df.head(100), use_container_width=True)
+                else:
+                    assistant_message_table.dataframe(df, hide_index=True)
+                    # assistant_message_table.dataframe(df,use_container_width=True)
+                ## 以下是其他datafram的显示方式，均不理想。
+                # st.dataframe(df, use_container_width=True, height=100)
+                ## streamlit AwesomeTable, Dark模式下显示有问题。可以在sidebar上显示内容。
+                # from awesome_table import AwesomeTable
+                # AwesomeTable(df,show_order=True, show_search=True, show_search_order_in_sidebar=True)
+                # # AwesomeTable(pd.json_normalize(df))
+            # ## 展示图形
+            # # code = generate_plotly_code_cached(question=my_question, sql=sql, df=df) ## orignal. 用LLM来构建统计图表：https://github.com/vanna-ai/vanna/blob/main/src/vanna/mistral/mistral.py
+            # if st.session_state.get("show_plotly_code", False):
+            #     assistant_message_plotly_code = st.chat_message(
+            #         "assistant",
+            #         avatar=myavatar,
+            #     )
+            #     assistant_message_plotly_code.code(
+            #         code, language="python", line_numbers=True
+            #     )
+            # user_message_plotly_check = st.chat_message("user")
+            # user_message_plotly_check.write(
+            #     f"Are you happy with the generated Plotly code?"
+            # )
+            # with user_message_plotly_check:
+            #     happy_plotly = st.radio(
+            #         "Happy",
+            #         options=["", "yes", "no"],
+            #         key="radio_plotly",
+            #         index=0,
+            #     )
+            # if happy_plotly == "no":
+            #     st.warning(
+            #         "Please fix the generated Python code. Once you're done hit Shift + Enter to submit"
+            #     )
+            #     python_code_response = code_editor(code, lang="python")
+            #     code = python_code_response["text"]
+            # elif happy_plotly == "":
+            #     code = None
+            # if code is not None and code != "":
+            #     if st.session_state.get("show_chart", True):
+            #         assistant_message_chart = st.chat_message(
+            #             "assistant",
+            #             avatar=myavatar,
+            #         )
+            #         fig = generate_plot_cached(code=code, df=df)
+            #         if fig is not None:
+            #             assistant_message_chart.plotly_chart(fig)
+            #         else:
+            #             assistant_message_chart.error("I couldn't generate a chart")
+                ## Data Visualization: Ydata_profiling (pandas_profiling)
+                # from ydata_profiling import ProfileReport
+                # pr = ProfileReport(df)
+                # # with st.expander(label='**数据汇总信息**', expanded=False):
+                # with st.container():
+                #     st_profile_report(pr)
+                #     # st.stop()
+                ## 数据可视化内容，高级版。
+                ## pywalker: https://github.com/Kanaries/pygwalker/tree/main
+                user_message_plot_check = st.chat_message("user")
+                user_message_plot_check.write(f"您是否希望基于上述查询结果进行数据可视化?")
+                with user_message_plot_check:
+                    happy_plot = st.radio(
+                        # "Happy",
+                        label="您的反馈:",
+                        options=["未决定", "是，需要进行数据可视化", "否，不需要进行数据可视化"], ## 去掉第一个happy选项。
+                        # options=["", "yes", "no"], ## original。
+                        key="radio_plot",
+                        index=0,
+                        horizontal=True,
+                        label_visibility='visible',
+                    )
+                if happy_plot == "是，需要进行数��可视化":
+                    import pygwalker as pyg
+                    import streamlit.components.v1 as components
+                    import streamlit as st
+                    from pygwalker.api.streamlit import init_streamlit_comm, get_streamlit_html
+                    # Initialize pygwalker communication
+                    init_streamlit_comm()
+                    # When using `use_kernel_calc=True`, you should cache your pygwalker html, if you don't want your memory to explode
+                    @st.cache_resource
+                    def get_pyg_html(df: pd.DataFrame) -> str:
+                        # When you need to publish your application, you need set `debug=False`,prevent other users to write your config file.
+                        # If you want to use feature of saving chart config, set `debug=True`
+                        html = get_streamlit_html(df, spec="./gw0.json", use_kernel_calc=True, debug=False)
+                        return html
+                    # walker = pyg.walk(df)
+                    components.html(get_pyg_html(df), width=1200, height=800, scrolling=True)
+                ### streamlit echarts: https://github.com/andfanilo/streamlit-echarts
+                # from streamlit_echarts import st_echarts
+                # from streamlit_echarts import st_pyecharts
+                # st_pyecharts(df)
+                ### Follow-up questions session.
+                # if st.session_state.get("show_followup", True):
+                #     assistant_message_followup = st.chat_message(
+                #         "assistant",
+                #         avatar=myavatar,
+                #     )
+                #     followup_questions = generate_followup_cached(
+                #         question=my_question, df=df
+                #     )
+                #     st.session_state["df"] = None
+                #     if len(followup_questions) > 0:
+                #         assistant_message_followup.text(
+                #             "Here are some possible follow-up questions"
+                #         )
+                #         # Print the first 5 follow-up questions
+                #         for question in followup_questions[:5]:
+                #             time.sleep(0.05)
+                #             assistant_message_followup.write(question)
+    else:
+        assistant_message_error = st.chat_message(
+            "assistant", avatar=myavatar
+        )
+        assistant_message_error.error("我无法回答您的问题，请重新提问，谢谢！")

chatsql004.py ADDED Viewed

	@@ -0,0 +1,157 @@

+"""
+1. 这里用公网Qwen API来代替本地的ChatGLM模型。为了在Huggingface上演示。
+1. 使用确定了列名作为SQL语句的变量名，可以有效解决模型生成的SQL语句中变量名准确的问题。
+"""
+##TODO:
+import requests
+import os
+from rich import print
+import os
+import sys
+import time
+import pandas as pd
+import numpy as np
+import sys
+import time
+from typing import Any
+import requests
+import csv
+import os
+from rich import print
+import pandas
+import io
+from io import StringIO
+import re
+from langchain.llms.utils import enforce_stop_tokens
+import json
+from transformers import AutoModel, AutoTokenizer
+import mdtex2html
+import qwen_response
+''' Start: Environment settings. '''
+os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/Users/yunshi/Downloads/chatGLM/My_LocalKB_Project/'
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+import torch
+mps_device = torch.device("mps") ## 在mac机器上需要加上这句。必须要有这句，否则会报错。
+### 在langchain中定义chatGLM作为LLM。
+from typing import Any, List, Mapping, Optional
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+from langchain.llms.base import LLM
+from transformers import AutoTokenizer, AutoModel
+# llm_filepath = str("/Users/yunshi/Downloads/chatGLM/ChatGLM3-6B/6B") ## 第三代chatGLM 6B W/ code-interpreter
+# ## API模式启动ChatGLM
+# ## 配置ChatGLM的类与后端api server对应。
+# class ChatGLM(LLM):
+#     max_token: int = 2048
+#     temperature: float = 0.1
+#     top_p = 0.9
+#     history = []
+#     def __init__(self):
+#         super().__init__()
+#     @property
+#     def _llm_type(self) -> str:
+#         return "ChatGLM"
+#     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+#         # headers中添加上content-type这个参数，指定为json格式
+#         headers = {'Content-Type': 'application/json'}
+#         data=json.dumps({
+#             'prompt':prompt,
+#             'temperature':self.temperature,
+#             'history':self.history,
+#             'max_length':self.max_token
+#         })
+#         print("ChatGLM prompt:",prompt)
+#         # 调用api
+#         # response = requests.post("http://0.0.0.0:8000",headers=headers,data=data) ##working。
+#         response = requests.post("http://127.0.0.1:8000",headers=headers,data=data) ##working。
+#         print("ChatGLM resp:", response)
+#         if response.status_code!=200:
+#             return "查询结果错误"
+#         resp = response.json()
+#         if stop is not None:
+#             response = enforce_stop_tokens(response, stop)
+#         self.history = self.history+[[None, resp['response']]] ##original
+#         return resp['response'] ##original.
+# llm = ChatGLM() ## 启动一个实例。orignal working。
+# import asyncio
+# llm =  ChatGLM() ## 启动一个实例。
+''' End: Environment settings. '''
+### 我会用中文或者英文双引号（即：“ ”，" "）来告知你变量的名称。 长度"，"宽度"，"价格"，"产品ID"，"比率"，"类别"，"*"
+### 用ChatGLM构建一个只返回SQL语句的模型。
+def main(prompt):
+    full_reponse = []
+    sys_prompt = """
+    1. 你是一个将文字转换成SQL语句的人工智能。
+    2. 你需要注意：你只需要用纯文本回复代码的内容，即你不允许回复代码以外的任何信息。
+    3. SQL变量默认是中文，而且只能从如下的名称列表中选择，你不可以使用这些名字以外的变量名："长度"，"宽度"，"价格"，"产品ID"，"比率"，"类别"，"*"
+    4. 你不能写IF, THEN的SQL语句，需要使用CASE。
+    5. 我需要你转换的文字如下："""
+    total_prompt = sys_prompt + "在数据表格table01中，" + prompt
+    print('total prompt now:',total_prompt)
+    # for response, history in chatglm.model.stream_chat(chatglm.tokenizer, query=str(prompt)): ## 这里保留了所有的chat history在input_prompt中。
+    # for response, history in chatglm.model.stream_chat(chatglm.tokenizer, query=str(prompt)): ## 这里保留了所有的chat history在input_prompt中。
+    # for response, history in chatglm.model.stream_chat(chatglm.tokenizer, query=str(total_prompt)): ## 这里保留了所有的chat history在input_prompt中。
+            # # for response, history in chatglm.model.stream_chat(chatglm.tokenizer, query=str(input_prompt[-1][0])): ## 从用langchain的自定义方式来做。
+            # # for response, history in chatglm.model.stream_chat(chatglm.tokenizer, query=str(input_prompt[-1][0]), history=input_prompt, max_length=max_tokens, top_p=top_p, temperature=temperature): ## 从用langchain的自定义方式来做。
+            # if response != "<br>":
+            #     # print('response of model:', response)
+            #     # input_prompt[-1][1] = response ## working.
+            #     # input_prompt[-1][1] = response
+            #     # yield input_prompt
+        # full_reponse.append(response)
+    # ## 得到一个非stream格式的答复。非API模式。
+    # response, history = chatglm.model.chat(chatglm.tokenizer, query=str(total_prompt), temperature=0.1) ## 这里保留了所有的chat history在input_prompt中。
+    ###TODO：API模式，需要先启动API服务器。
+    # llm =  ChatGLM() ##!! 重要说明：每次都需要实例化一次！！！否则会报错content error。实际上是应该在每次函数调用的时候都要实例化一次！
+    # response = llm(total_prompt) ## 这里是本地的ChatGLM来作为大模型输出基座。
+    response = qwen_response.call_with_messages(total_prompt)
+    print('response of model:', response)
+    ## 用regex来提取纯SQL语句。需要构建多个正则式pattern
+    pattern_1 = r"(?:`sql\n|\n`)"
+    pattern_2 = r"(?:```|``)"
+    pattern_3 = r"(?s)(.*?SQL语句示例.*?：).*?\n"
+    pattern_4 = r"(?:`{3}|`{2}|`)"
+    # pattern_5 = r"[\u4e00-\u9FFF]" ## 匹配中文。
+    # pattern_6 = r"^[\u4e00-\u9fa5]{5,}" ## 首行中包含5个中文汉字的。
+    pattern_7 = r"^.{0,2}([\u4e00-\u9fa5]{5,}).*" ## 首行中包含5个中文汉字的。
+    pattern_8 = r'^"|"$' ## 去除一句话开始或者末尾的英文双引号
+    pattern_list = [pattern_1, pattern_2, pattern_3, pattern_4, pattern_7, pattern_8]
+    ## 遍历所有的pattern，逐个去除。
+    full_reponse = response
+    for p in pattern_list:
+        full_reponse = re.sub(p, "", full_reponse)
+    # final_response = re.sub(pattern_1, "", response) ## 逐步匹配。
+    # final_response = re.sub(pattern_1, "", response) ## 逐步匹配。
+    # final_response = re.sub(pattern_2, "", final_response)  ## 逐步匹配。
+    return full_reponse
+# prompt = "你给我一段复杂的SQL语句示例。"
+# prompt = "你给我一段SQL语句，用来完成如下工作：查询年龄大于30岁，男性，收入超过2万元的员工。"
+# res = main(prompt=prompt)
+# print(res)

myexcelDB.db ADDED Viewed

Binary file (16.4 kB). View file

qwen_response.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import random
+from http import HTTPStatus
+import dashscope
+### 参考：
+## export DASHSCOPE_API_KEY="sk-948adb3e65414e55961a9ad9d22d186b"
+dashscope.api_key = "sk-948adb3e65414e55961a9ad9d22d186b"
+# qwen_sys_prompt = """
+# 1. 你是一个将文字转换成SQL语句的人工智能。
+# 2. 此外你需要注意：你只需要用纯文本回复代码的内容，即你不允许回复代码以外的任何信息。
+# 3. 你不能将将字段名翻译成英文，而是必须使用如下的名词：长度，宽度，价格，产品ID，比率，类别，*
+# 4. 你不能写IF, THEN的SQL语句，需要使用CASE。
+# """
+qwen_sys_prompt = """你是一个将文字转换成SQL语句的人工智能。"""
+def call_with_messages(prompt):
+    messages = [{'role': 'system', 'content': qwen_sys_prompt},
+                {'role': 'user', 'content': prompt}]
+                # {'role': 'user', 'content': '如何做西红柿炒鸡蛋？'}]
+    response = dashscope.Generation.call(
+        "qwen-turbo",
+        messages=messages,
+        # set the random seed, optional, default to 1234 if not set
+        seed=random.randint(1, 10000),
+        # set the result to be "message" format.
+        result_format='message',
+    )
+    if response.status_code == HTTPStatus.OK:
+        print(response)
+    else:
+        print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
+            response.request_id, response.status_code,
+            response.code, response.message
+        ))
+    return response['output']['choices'][0]['message']['content'] ### 这里是content的内容，不是message的全部内容。
+# if __name__ == '__main__':
+#     # call_with_messages() ### original code here.
+#     res = call_with_messages() ## working.
+#     # print(res)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+dashscope==1.17.0
+fastapi==0.111.0
+langchain==0.1.17
+mdtex2html==1.2.0
+nest_asyncio==1.5.8
+numpy==1.26.4
+pandas==2.2.2
+pygwalker==0.4.8.1
+pyngrok==7.0.5
+python-dotenv==1.0.1
+Requests==2.31.0
+rich==13.7.1
+streamlit==1.33.0
+streamlit_code_editor==0.1.10
+torch==2.2.0
+transformers==4.37.1
+uvicorn==0.29.0