|
import ast
|
|
import gradio as gr
|
|
from functions import doc_db_example_question_generator, doc_db_chatbot_with_fc
|
|
from data_sources import connect_doc_db
|
|
from utils import message_dict
|
|
|
|
def hide_info():
|
|
return gr.update(visible=False)
|
|
|
|
with gr.Blocks() as demo:
|
|
description = gr.HTML("""
|
|
<!-- Header -->
|
|
<div class="max-w-4xl mx-auto mb-12 text-center">
|
|
<div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
|
|
<p>This tool allows users to communicate with and query real time data from a Document DB (MongoDB for now, others can be added if requested) using natural
|
|
language and the above features.</p>
|
|
<p style="font-weight:bold;">Notice: the way this system is designed, no login information is retained and credentials are passed as session variables until the user leaves or
|
|
refreshes the page in which they disappear. They are never saved to any files. I also make use of the PyMongoArrow aggregate_pandas_all function to apply pipelines,
|
|
which can't delete, drop, or add database lines to avoid unhappy accidents or glitches.
|
|
That being said, it's probably not a good idea to connect a production database to a strange AI tool with an unfamiliar author.
|
|
This should be for demonstration purposes.</p>
|
|
<p>Contact me if this is something you would like built in your organization, on your infrastructure, and with the requisite privacy and control a production
|
|
database analytics tool requires.</p>
|
|
</div>
|
|
</div>
|
|
""", elem_classes="description_component")
|
|
|
|
status_message = gr.HTML(value='<p style="color:green;text-align:center;font-size:18px;">Please be patient while connecting as we need to generate '
|
|
'and read a schema before connection can be successful. This process can take a few minutes.</p>', padding=False)
|
|
|
|
connection_string = gr.Textbox(label="Connection String", value="dataanalyst0.l1klmww.mongodb.net/")
|
|
with gr.Row():
|
|
connection_user = gr.Textbox(label="Connection User", value="virtual-data-analyst")
|
|
connection_password = gr.Textbox(label="Connection Password", value="zcpbmoGJ3mC8o", type="password")
|
|
doc_db_name = gr.Textbox(label="Database Name", value="sample_mflix")
|
|
|
|
submit = gr.Button(value="Submit")
|
|
submit.click(fn=hide_info, outputs=description)
|
|
|
|
@gr.render(inputs=[connection_string,connection_user,connection_password,doc_db_name], triggers=[submit.click])
|
|
def sql_chat(request: gr.Request, connection_string=connection_string.value, connection_user=connection_user.value, connection_password=connection_password.value, doc_db_name=doc_db_name.value):
|
|
if request.session_hash not in message_dict:
|
|
message_dict[request.session_hash] = {}
|
|
message_dict[request.session_hash]['doc_db'] = None
|
|
connection_login_value = "mongodb+srv://" + connection_user + ":" + connection_password + "@" + connection_string
|
|
if connection_login_value:
|
|
print("MONGO APP")
|
|
process_message = process_doc_db(connection_login_value, doc_db_name, request.session_hash)
|
|
gr.HTML(value=process_message[1], padding=False)
|
|
if process_message[0] == "success":
|
|
if "dataanalyst0.l1klmww.mongodb.net" in connection_login_value:
|
|
example_questions = [
|
|
["Describe the dataset"],
|
|
["What are the top 5 most common movie genres?"],
|
|
["How do user comment counts on a movie correlate with the movie award wins?"],
|
|
["Can you generate a pie chart showing the top 10 states with the most movie theaters?"],
|
|
["What are the top 10 most represented directors in the database?"],
|
|
["What are the different movie categories and how many movies are in each category?"]
|
|
]
|
|
else:
|
|
try:
|
|
generated_examples = ast.literal_eval(doc_db_example_question_generator(request.session_hash, process_message[2], doc_db_name, process_message[3]))
|
|
example_questions = [
|
|
["Describe the dataset"]
|
|
]
|
|
for example in generated_examples:
|
|
example_questions.append([example])
|
|
except Exception as e:
|
|
print("DOC DB QUESTION GENERATION ERROR")
|
|
print(e)
|
|
example_questions = [
|
|
["Describe the dataset"],
|
|
["List the columns in the dataset"],
|
|
["What could this data be used for?"],
|
|
]
|
|
session_hash = gr.Textbox(visible=False, value=request.session_hash)
|
|
db_connection_string = gr.Textbox(visible=False, value=connection_login_value)
|
|
db_name = gr.Textbox(visible=False, value=doc_db_name)
|
|
db_collections = gr.Textbox(value=process_message[2], interactive=False, label="DB Collections")
|
|
db_schema = gr.Textbox(visible=False, value=process_message[3])
|
|
bot = gr.Chatbot(type='messages', label="CSV Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
|
|
chat = gr.ChatInterface(
|
|
fn=doc_db_chatbot_with_fc,
|
|
type='messages',
|
|
chatbot=bot,
|
|
title="Chat with your Database",
|
|
examples=example_questions,
|
|
concurrency_limit=None,
|
|
additional_inputs=[session_hash, db_connection_string, db_name, db_collections,db_schema]
|
|
)
|
|
|
|
def process_doc_db(connection_string, nosql_db_name, session_hash):
|
|
if connection_string:
|
|
process_message = connect_doc_db(connection_string, nosql_db_name, session_hash)
|
|
return process_message
|
|
|
|
if __name__ == "__main__":
|
|
demo.launch() |