Spaces:
Sleeping
Sleeping
from flask import Flask, render_template, request, redirect, url_for, send_from_directory, flash | |
from flask_socketio import SocketIO | |
import threading | |
import os | |
from dotenv import load_dotenv | |
import sqlite3 | |
from werkzeug.utils import secure_filename | |
import traceback | |
# LangChain and agent imports | |
from langchain_community.chat_models.huggingface import ChatHuggingFace # if needed later | |
from langchain.agents import Tool | |
from langchain.agents.format_scratchpad import format_log_to_str | |
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser | |
from langchain_core.callbacks import CallbackManager, BaseCallbackHandler | |
from langchain_community.agent_toolkits.load_tools import load_tools | |
from langchain_core.tools import tool | |
from langchain_community.agent_toolkits import SQLDatabaseToolkit | |
from langchain.chains import LLMMathChain | |
from langchain import hub | |
from langchain_community.tools import DuckDuckGoSearchRun | |
# Agent requirements and type hints | |
from typing import Annotated, Literal, TypedDict, Any | |
from langchain_core.messages import AIMessage, ToolMessage | |
from pydantic import BaseModel, Field | |
from typing_extensions import TypedDict | |
from langgraph.graph import END, StateGraph, START | |
from langgraph.graph.message import AnyMessage, add_messages | |
from langchain_core.runnables import RunnableLambda, RunnableWithFallbacks | |
from langgraph.prebuilt import ToolNode | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_community.utilities import SQLDatabase | |
# Load environment variables | |
load_dotenv() | |
# Global configuration variables | |
UPLOAD_FOLDER = os.path.join(os.getcwd(), "uploads") | |
os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
BASE_DIR = os.path.abspath(os.path.dirname(__file__)) | |
# API Keys from .env file | |
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") | |
os.environ["MISTRAL_API_KEY"] = os.getenv("MISTRAL_API_KEY") | |
# Global state: dynamic agent and DB file path | |
agent_app = None | |
abs_file_path = None | |
db_path = None | |
# ============================================================================= | |
# create_agent_app: Given a database path, initialize the agent workflow. | |
# ============================================================================= | |
def create_agent_app(db_path: str): | |
# Use ChatGroq as our LLM here; swap to ChatMistralAI if preferred. | |
from langchain_groq import ChatGroq | |
llm = ChatGroq(model="llama3-70b-8192") | |
# ------------------------------------------------------------------------- | |
# Define a tool for executing SQL queries, with an explicit description. | |
# ------------------------------------------------------------------------- | |
def db_query_tool(query: str) -> str: | |
""" | |
Executes a SQL query on the connected SQLite database. | |
""" | |
try: | |
result = db_instance.run_no_throw(query) | |
return result if result else "Error: Query failed. Please rewrite your query and try again." | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# ------------------------------------------------------------------------- | |
# Pydantic model for final answer. | |
# ------------------------------------------------------------------------- | |
class SubmitFinalAnswer(BaseModel): | |
final_answer: str = Field(..., description="The final answer to the user") | |
# ------------------------------------------------------------------------- | |
# Define state type for our workflow. | |
# ------------------------------------------------------------------------- | |
class State(TypedDict): | |
messages: Annotated[list[AnyMessage], add_messages] | |
# ------------------------------------------------------------------------- | |
# Set up prompt templates for query checking and generation. | |
# ------------------------------------------------------------------------- | |
query_check_system = ( | |
"You are a SQL expert with a strong attention to detail.\n" | |
"Double check the SQLite query for common mistakes, including:\n" | |
"- Using NOT IN with NULL values\n" | |
"- Using UNION when UNION ALL should have been used\n" | |
"- Using BETWEEN for exclusive ranges\n" | |
"- Data type mismatch in predicates\n" | |
"- Properly quoting identifiers\n" | |
"- Using the correct number of arguments for functions\n" | |
"- Casting to the correct data type\n" | |
"- Using the proper columns for joins\n\n" | |
"If there are any of the above mistakes, rewrite the query. If there are no mistakes, just reproduce the original query.\n" | |
"You will call the appropriate tool to execute the query after running this check." | |
) | |
query_check_prompt = ChatPromptTemplate.from_messages([ | |
("system", query_check_system), | |
("placeholder", "{messages}") | |
]) | |
query_check = query_check_prompt | llm.bind_tools([db_query_tool]) | |
query_gen_system = ( | |
"You are a SQL expert with a strong attention to detail.\n\n" | |
"Given an input question, output a syntactically correct SQLite query to run, then look at the results of the query and return the answer.\n\n" | |
"DO NOT call any tool besides SubmitFinalAnswer to submit the final answer.\n\n" | |
"When generating the query:\n" | |
"Output the SQL query that answers the input question without a tool call.\n" | |
"Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 5 results.\n" | |
"You can order the results by a relevant column to return the most interesting examples in the database.\n" | |
"Never query for all the columns from a specific table, only ask for the relevant columns given the question.\n\n" | |
"If you get an error while executing a query, rewrite the query and try again.\n" | |
"If you get an empty result set, you should try to rewrite the query to get a non-empty result set.\n" | |
"NEVER make stuff up if you don't have enough information to answer the query... just say you don't have enough information.\n\n" | |
"If you have enough information to answer the input question, simply invoke the appropriate tool to submit the final answer to the user.\n" | |
"DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database. Do not return any SQL query except answer." | |
) | |
query_gen_prompt = ChatPromptTemplate.from_messages([ | |
("system", query_gen_system), | |
("placeholder", "{messages}") | |
]) | |
query_gen = query_gen_prompt | llm.bind_tools([SubmitFinalAnswer]) | |
# ------------------------------------------------------------------------- | |
# Create SQLDatabase connection. | |
# ------------------------------------------------------------------------- | |
db_uri = f"sqlite:///{os.path.abspath(db_path)}" | |
print("db_uri", db_uri) | |
try: | |
db_instance = SQLDatabase.from_uri(db_uri) | |
except Exception as e: | |
raise Exception(f"Failed to create SQLDatabase connection: {e}") | |
print("db_instance----->", db_instance) | |
# ------------------------------------------------------------------------- | |
# Create SQL toolkit. | |
# ------------------------------------------------------------------------- | |
toolkit_instance = SQLDatabaseToolkit(db=db_instance, llm=llm) | |
tools_instance = toolkit_instance.get_tools() | |
# ------------------------------------------------------------------------- | |
# Define workflow nodes and fallback functions. | |
# ------------------------------------------------------------------------- | |
def first_tool_call(state: State): | |
return {"messages": [AIMessage(content="", tool_calls=[{"name": "sql_db_list_tables", "args": {}, "id": "tool_abcd123"}])]} | |
def handle_tool_error(state: State): | |
tool_calls = state["messages"][-1].tool_calls | |
return {"messages": [ | |
ToolMessage(content="Error occurred. Please revise.", tool_call_id=tc["id"]) for tc in tool_calls | |
]} | |
def create_tool_node_with_fallback(tools_list): | |
return ToolNode(tools_list).with_fallbacks([RunnableLambda(handle_tool_error)], exception_key="error") | |
def query_gen_node(state: State): | |
try: | |
message = query_gen.invoke(state) | |
except Exception as e: | |
raise Exception(f"Exception in query_gen_node: {e}") | |
tool_messages = [] | |
if message.tool_calls: | |
for tc in message.tool_calls: | |
if tc["name"] != "SubmitFinalAnswer": | |
tool_messages.append(ToolMessage( | |
content=f"Error: Wrong tool called: {tc['name']}", | |
tool_call_id=tc["id"] | |
)) | |
return {"messages": [message] + tool_messages} | |
def should_continue(state: State) -> Literal[END, "correct_query", "query_gen"]: | |
messages = state["messages"] | |
last_message = messages[-1] | |
if getattr(last_message, "tool_calls", None): | |
return END | |
if last_message.content.startswith("Error:"): | |
return "query_gen" | |
return "correct_query" | |
def model_check_query(state: State): | |
return {"messages": [query_check.invoke({"messages": [state["messages"][-1]]})]} | |
# ------------------------------------------------------------------------- | |
# Get tools for listing tables and fetching schema. | |
# ------------------------------------------------------------------------- | |
list_tables_tool = next((t for t in tools_instance if t.name == "sql_db_list_tables"), None) | |
schema_tool = next((t for t in tools_instance if t.name == "sql_db_schema"), None) | |
model_get_schema = llm.bind_tools([schema_tool]) | |
workflow = StateGraph(State) | |
workflow.add_node("first_tool_call", first_tool_call) | |
workflow.add_node("list_tables_tool", create_tool_node_with_fallback([list_tables_tool])) | |
workflow.add_node("get_schema_tool", create_tool_node_with_fallback([schema_tool])) | |
workflow.add_node("model_get_schema", lambda state: {"messages": [model_get_schema.invoke(state["messages"])]}) | |
workflow.add_node("query_gen", query_gen_node) | |
workflow.add_node("correct_query", model_check_query) | |
workflow.add_node("execute_query", create_tool_node_with_fallback([db_query_tool])) | |
workflow.add_edge(START, "first_tool_call") | |
workflow.add_edge("first_tool_call", "list_tables_tool") | |
workflow.add_edge("list_tables_tool", "model_get_schema") | |
workflow.add_edge("model_get_schema", "get_schema_tool") | |
workflow.add_edge("get_schema_tool", "query_gen") | |
workflow.add_conditional_edges("query_gen", should_continue) | |
workflow.add_edge("correct_query", "execute_query") | |
workflow.add_edge("execute_query", "query_gen") | |
return workflow.compile() | |
# ============================================================================= | |
# create_app: The application factory. | |
# ============================================================================= | |
def create_app(): | |
flask_app = Flask(__name__, static_url_path='/uploads', static_folder='uploads') | |
socketio = SocketIO(flask_app, cors_allowed_origins="*") | |
if not os.path.exists(UPLOAD_FOLDER): | |
os.makedirs(UPLOAD_FOLDER) | |
flask_app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
flask_app.config['SECRET_KEY'] = os.getenv("FLASK_SECRET_KEY", "mysecretkey") | |
def uploaded_file(filename): | |
try: | |
return send_from_directory(flask_app.config['UPLOAD_FOLDER'], filename) | |
except Exception as e: | |
flash(f"Could not send file: {str(e)}", "error") | |
return redirect(url_for("index")) | |
def run_agent(prompt, socketio): | |
global agent_app, abs_file_path, db_path | |
if not abs_file_path: | |
socketio.emit("log", {"message": "[ERROR]: No DB file uploaded."}) | |
socketio.emit("final", {"message": "No database available. Please upload one and try again."}) | |
return | |
try: | |
if agent_app is None: | |
socketio.emit("log", {"message": "[INFO]: Initializing agent for the first time..."}) | |
try: | |
agent_app = create_agent_app(abs_file_path) | |
socketio.emit("log", {"message": "[INFO]: Agent initialized."}) | |
except Exception as e: | |
error_message = f"Agent initialization failed: {str(e)}" | |
socketio.emit("log", {"message": f"[ERROR]: {error_message}"}) | |
socketio.emit("final", {"message": "Agent initialization failed."}) | |
return | |
query = {"messages": [("user", prompt)]} | |
result = agent_app.invoke(query) | |
try: | |
# Attempt to extract the final answer from the tool call arguments | |
if result and result["messages"] and len(result["messages"]) > 0 and result["messages"][-1].tool_calls and len(result["messages"][-1].tool_calls) > 0: | |
result = result["messages"][-1].tool_calls[0]["args"]["final_answer"] | |
else: | |
result = "Query execution did not return a valid final answer." | |
with app.app_context(): | |
flash("Query execution did not return a valid final answer.", "warning") | |
except KeyError: | |
result = "The agent's response did not contain the expected 'final_answer' key." | |
with app.app_context(): | |
flash("Unexpected agent response format.", "warning") | |
except Exception as e: | |
result = f"An error occurred while processing the agent's response: {str(e)}" | |
with app.app_context(): | |
flash("Error processing agent response.", "error") | |
print("final_answer------>", result) | |
socketio.emit("final", {"message": result}) | |
except Exception as e: | |
error_message = f"Generation failed: {str(e)}" | |
socketio.emit("log", {"message": f"[ERROR]: {error_message}"}) | |
socketio.emit("final", {"message": "Generation failed."}) | |
with app.app_context(): | |
flash(error_message, "error") | |
traceback.print_exc() | |
def index(): | |
return render_template("index.html") | |
def generate(): | |
try: | |
socketio.emit("log", {"message": "[STEP]: Entering query generation..."}) | |
data = request.json | |
prompt = data.get("prompt", "") | |
socketio.emit("log", {"message": f"[INFO]: Received prompt: {prompt}"}) | |
thread = threading.Thread(target=run_agent, args=(prompt, socketio)) | |
socketio.emit("log", {"message": f"[INFO]: Starting thread: {thread}"}) | |
thread.start() | |
with flask_app.app_context(): | |
flash("Query submitted successfully.", "info") | |
return "OK", 200 | |
except Exception as e: | |
error_message = f"[ERROR]: {str(e)}" | |
socketio.emit("log", {"message": error_message}) | |
with flask_app.app_context(): | |
flash(error_message, "error") | |
return "ERROR", 500 | |
def is_sqlite_db(file_path): | |
try: | |
with open(file_path, 'rb') as f: | |
header = f.read(16) | |
return header[:16] == b'SQLite format 3\x00' | |
except Exception: | |
return False | |
def upload(): | |
global abs_file_path, agent_app, db_path | |
try: | |
if request.method == "POST": | |
file = request.files.get("file") | |
if not file: | |
flash("No file uploaded.", "error") | |
return "No file uploaded", 400 | |
filename = secure_filename(file.filename) | |
if filename.endswith('.db'): | |
db_path = os.path.join(flask_app.config['UPLOAD_FOLDER'], "uploaded.db") | |
print("Saving file to:", db_path) | |
file.save(db_path) | |
if not is_sqlite_db(db_path): | |
os.remove(db_path) | |
flash("Uploaded file is not a valid SQLite database.", "error") | |
socketio.emit("log", {"message": "[ERROR]: Invalid database file uploaded."}) | |
return render_template("upload.html") | |
abs_file_path = os.path.abspath(db_path) | |
print(f"[INFO]: File '{filename}' uploaded. Agent will be initialized on first query.") | |
socketio.emit("log", {"message": f"[INFO]: Database file '{filename}' uploaded."}) | |
flash(f"Database file '{filename}' uploaded successfully.", "info") | |
agent_app = None # Reset the agent to be lazily reinitialized on next query. | |
return redirect(url_for("index")) | |
else: | |
flash("Invalid file format. Please upload a .db file.", "error") | |
return render_template("upload.html") | |
return render_template("upload.html") | |
except Exception as e: | |
error_message = f"[ERROR]: {str(e)}" | |
print(error_message) | |
flash(error_message, "error") | |
socketio.emit("log", {"message": error_message}) | |
return render_template("upload.html") | |
return flask_app, socketio | |
# ============================================================================= | |
# Create the app for Gunicorn compatibility. | |
# ============================================================================= | |
app, socketio_instance = create_app() | |
if __name__ == "__main__": | |
socketio_instance.run(app, debug=True) | |