Spaces:

JulsdL
/

SmartQuery

Paused

App Files Files Community

JulsdL commited on May 23, 2024

Commit

cea0ce1

1 Parent(s): f0e847a

Add initial implementation of SQL agent with few-shot learning and ChainLit integration for dynamic SQL query generation and execution based on user input.

Browse files

Files changed (6) hide show

.tables +0 -0
SELECT +0 -0
app.py +30 -0
prompt_templates.py +60 -0
requirements.txt +6 -0
sql_agent.py +63 -0

.tables ADDED Viewed

File without changes

SELECT ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import chainlit as cl
+from langchain.schema.runnable.config import RunnableConfig
+from sql_agent import SQLAgent
+# Test the agent
+# agent.invoke({"input": "How many artists are there?"})
+# ChainLit Integration
+@cl.on_chat_start
+async def on_chat_start():
+    cl.user_session.set("agent", SQLAgent)
+@cl.on_message
+async def on_message(message: cl.Message):
+    agent = cl.user_session.get("agent")  # Get the agent from the session
+    cb = cl.AsyncLangchainCallbackHandler(stream_final_answer=True)
+    config = RunnableConfig(callbacks=[cb])
+    result = await agent.ainvoke(message.content, config=config)
+    msg = cl.Message(content="")
+    async for chunk in result:
+        await msg.stream_token(chunk)
+    await msg.send()
+# Run the app
+if __name__ == "__main__":
+    cl.run()

prompt_templates.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# Few-shot examples
+few_shot_examples = [
+    {"input": "List all artists.", "query": "SELECT * FROM Artist;"},
+    {
+        "input": "Find all albums for the artist 'AC/DC'.",
+        "query": "SELECT * FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'AC/DC');",
+    },
+    {
+        "input": "List all tracks in the 'Rock' genre.",
+        "query": "SELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');",
+    },
+    {
+        "input": "Find the total duration of all tracks.",
+        "query": "SELECT SUM(Milliseconds) FROM Track;",
+    },
+    {
+        "input": "List all customers from Canada.",
+        "query": "SELECT * FROM Customer WHERE Country = 'Canada';",
+    },
+    {
+        "input": "How many tracks are there in the album with ID 5?",
+        "query": "SELECT COUNT(*) FROM Track WHERE AlbumId = 5;",
+    },
+    {
+        "input": "Find the total number of invoices.",
+        "query": "SELECT COUNT(*) FROM Invoice;",
+    },
+    {
+        "input": "List all tracks that are longer than 5 minutes.",
+        "query": "SELECT * FROM Track WHERE Milliseconds > 300000;",
+    },
+    {
+        "input": "Who are the top 5 customers by total purchase?",
+        "query": "SELECT CustomerId, SUM(Total) AS TotalPurchase FROM Invoice GROUP BY CustomerId ORDER BY TotalPurchase DESC LIMIT 5;",
+    },
+    {
+        "input": "Which albums are from the year 2000?",
+        "query": "SELECT * FROM Album WHERE strftime('%Y', ReleaseDate) = '2000';",
+    },
+    {
+        "input": "How many employees are there",
+        "query": 'SELECT COUNT(*) FROM "Employee"',
+    },
+]
+# System Prompt template prefix
+system_prefix = """You are an agent designed to interact with a SQL database.
+Given an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
+Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
+You can order the results by a relevant column to return the most interesting examples in the database.
+Never query for all the columns from a specific table, only ask for the relevant columns given the question.
+You have access to tools for interacting with the database.
+Only use the given tools. Only use the information returned by the tools to construct your final answer.
+You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.
+DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.
+If the question does not seem related to the database, just return "I don't know" as the answer.
+Here are some examples of user inputs and their corresponding SQL queries:"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain
+langchain-community
+langchain-openai
+python-dotenv
+faiss-cpu
+chainlit

sql_agent.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from dotenv import load_dotenv
+from langchain_community.agent_toolkits import create_sql_agent
+from langchain_community.vectorstores import FAISS
+from langchain_core.example_selectors import SemanticSimilarityExampleSelector
+from langchain_core.prompts import ChatPromptTemplate, FewShotPromptTemplate, MessagesPlaceholder, PromptTemplate, SystemMessagePromptTemplate
+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_community.utilities import SQLDatabase
+from prompt_templates import few_shot_examples, system_prefix
+# Load the .env file
+load_dotenv()
+# Initialize the SQL database
+db = SQLDatabase.from_uri("sqlite:///Chinook.db")
+# Check the database connection
+print(db.dialect)
+print(db.get_usable_table_names())
+db.run("SELECT * FROM Artist LIMIT 10;")
+# Initialize the LLM
+llm = ChatOpenAI(model="gpt-4o", temperature=0)
+# Example selector will dynamically select examples based on the input question
+example_selector = SemanticSimilarityExampleSelector.from_examples(
+    few_shot_examples,
+    OpenAIEmbeddings(),
+    FAISS,
+    k=5,
+    input_keys=["input"],
+)
+# Few-shot prompt template
+few_shot_prompt = FewShotPromptTemplate(
+    example_selector=example_selector,
+    example_prompt=PromptTemplate.from_template(
+        "User input: {input}\nSQL query: {query}"
+    ),
+    input_variables=["input", "dialect", "top_k"],
+    prefix=system_prefix,
+    suffix="",
+)
+# Full prompt template
+full_prompt = ChatPromptTemplate.from_messages(
+    [
+        SystemMessagePromptTemplate(prompt=few_shot_prompt),
+        ("human", "{input}"),
+        MessagesPlaceholder("agent_scratchpad"),
+    ]
+)
+# Create the SQL agent
+SQLAgent = create_sql_agent(
+    llm=llm,
+    db=db,
+    prompt=full_prompt,
+    verbose=True,
+    agent_type="openai-tools",
+)