Spaces:

LeonceNsh
/

usgov-contracts-rag

Sleeping

App Files Files Community

LeonceNsh commited on Oct 31, 2024

Commit

8cb3a33

verified ·

1 Parent(s): 0fd7668

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -94

app.py CHANGED Viewed

@@ -1,109 +1,99 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
-def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
-    """
-    Generates the prompt by reading the prompt template and table metadata,
-    then formatting them with the user's question.
-    """
-    try:
-        with open(prompt_file, "r") as f:
-            prompt = f.read()
-    except FileNotFoundError:
-        return "Error: prompt.md file not found."
-    try:
-        with open(metadata_file, "r") as f:
-            table_metadata_string = f.read()
-    except FileNotFoundError:
-        return "Error: metadata.sql file not found."
-    prompt = prompt.format(
-        user_question=question, table_metadata_string=table_metadata_string
-    )
-    return prompt
-def get_tokenizer_model(model_name):
-    """
-    Loads the tokenizer and model from the specified model repository.
-    """
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        trust_remote_code=True,  # Set to True if the model uses custom code
-        torch_dtype=torch.float16,
-        device_map="auto",        # Automatically maps the model to available devices
-        use_cache=True,
-    )
-    return tokenizer, model
-# Load the tokenizer and model once when the script starts
-model_name = "defog/sqlcoder-7b-2"  # Replace with your model name
-print("Loading model and tokenizer...")
-tokenizer, model = get_tokenizer_model(model_name)
-print("Model and tokenizer loaded successfully.")
-# Initialize the text generation pipeline
-text_gen_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=300,
-    do_sample=False,       # Disable sampling for deterministic output
-    return_full_text=False,
-    num_beams=5,           # Use beam search for better quality
-)
-def run_inference_gradio(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
-    """
-    Generates an SQL query based on the user's natural language question.
-    """
-    if not question.strip():
-        return "Please enter a valid question."
-    prompt = generate_prompt(question, prompt_file, metadata_file)
-    if prompt.startswith("Error:"):
-        return prompt  # Return the error message if files are missing
-    eos_token_id = tokenizer.eos_token_id
-    try:
-        generated = text_gen_pipeline(
-            prompt,
-            num_return_sequences=1,
-            eos_token_id=eos_token_id,
-            pad_token_id=eos_token_id,
         )
-    except Exception as e:
-        return f"Error during model inference: {str(e)}"
-    generated_text = generated[0]["generated_text"]
-    # Extract the SQL query from the generated text
-    sql_query = generated_text.split(";")[0].split("```")[0].strip() + ";"
-    return sql_query
-# Define the Gradio interface
-iface = gr.Interface(
-    fn=run_inference_gradio,
-    inputs=gr.Textbox(
-        lines=4,
-        placeholder="Enter your natural language question here...",
-        label="Question"
-    ),
-    outputs=gr.Textbox(label="Generated SQL Query"),
-    title="Text-to-SQL Generator",
-    description=(
-        "Enter a natural language question related to your database, and this tool "
-        "will generate the corresponding SQL query. Ensure that 'prompt.md' and "
-        "'metadata.sql' are correctly set up in the application directory."
-    ),
-    examples=[
-        ["Do we get more sales from customers in New York compared to customers in San Francisco? Give me the total sales for each city, and the difference between the two."]
-    ],
-    allow_flagging="never"
-)
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+import torch
+import pyperclip
+import openai
+import os
+import pandas as pd
+from sqlalchemy import create_engine, inspect
+from llama_index.legacy import (
+    VectorStoreIndex,
+    SQLDatabase,
+    ServiceContext,
+)
+from llama_index.legacy.indices.struct_store import NLSQLTableQueryEngine
+from llama_index.legacy.llms import OpenAI
+import sqlite3
+# Set up OpenAI API Key
+os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
+# Function to load database and LLM
+def load_db_llm():
+    engine = create_engine("sqlite:///gov-contracts.db")
+    sql_database = SQLDatabase(engine)
+    llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-1106")
+    service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
+    return sql_database, service_context, engine
+# Load LLM and database context
+sql_database, service_context, engine = load_db_llm()
+query_engine = NLSQLTableQueryEngine(
+    sql_database=sql_database, synthesize_response=True, service_context=service_context
+)
+# Initialize table schema and connection for query
+inspector = inspect(engine)
+table_names = inspector.get_table_names()
+# Load table data function
+def get_table_data(table_name):
+    conn = sqlite3.connect('gov-contracts.db')
+    query = f"SELECT * FROM {table_name}"
+    df = pd.read_sql_query(query, conn)
+    conn.close()
+    return df
+# Chat-based interaction for Gradio
+def generate_response(user_input, selected_table=None, example_prompt=None):
+    if example_prompt:
+        user_input = example_prompt
+    response = query_engine.query(f"User Question: {user_input}")
+    sql_query = f"```sql\n{response.metadata['sql_query']}\n```\n**Response:**\n{response.response}\n"
+    pyperclip.copy(sql_query)  # Optional: Copy to clipboard
+    return sql_query
+# Define Gradio app layout and components
+with gr.Blocks() as gradio_app:
+    gr.Markdown("## Natural Language to SQL Query Application")
+    gr.Markdown("### Ask a question about the data in the database to receive a precise SQL query.")
+    # Sidebar: Database schema and example prompts
+    with gr.Row():
+        with gr.Column():
+            table_dropdown = gr.Dropdown(choices=table_names, label="Select a Table")
+            example_prompt_box = gr.Radio(
+                choices=[
+                    "Return the department_ind_agency and the sum of award in descending order",
+                    "Return the sum of award in descending order grouped by type limited to the top 10",
+                    "Return the sum of award by year where the sub_tier is the FEDERAL ACQUISITION SERVICE"
+                ],
+                label="Select an Example Prompt"
+            )
+            query_btn = gr.Button("Generate Query")
+    with gr.Column():
+        user_query = gr.Textbox(
+            label="Enter your natural language query about the database",
+            placeholder="Ask your question here..."
+        )
+        chat_output = gr.Textbox(
+            label="Generated SQL Query",
+            placeholder="SQL query will appear here..."
         )
+    # Function to call on click
+    def query_callback(user_input, table_name, example_prompt):
+        return generate_response(user_input, selected_table=table_name, example_prompt=example_prompt)
+    # Button click event
+    query_btn.click(query_callback, inputs=[user_query, table_dropdown, example_prompt_box], outputs=chat_output)
+    gr.Markdown("#### Created by Leonce Nshuti")
+    gr.Markdown("""
+        - [LinkedIn](https://www.linkedin.com/in/leoncenshuti/)
+        - [GitHub](https://github.com/LNshuti)
+    """)
 if __name__ == "__main__":
+    gradio_app.launch()