LeonceNsh commited on
Commit
06f01b3
Β·
verified Β·
1 Parent(s): 456d6ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -93
app.py CHANGED
@@ -1,104 +1,93 @@
1
- import gradio as gr
2
- import torch
3
- import pyperclip
4
- import openai
5
- import os
6
- import pandas as pd
7
- from sqlalchemy import create_engine, inspect
8
- from llama_index.legacy import (
9
- VectorStoreIndex,
10
- SQLDatabase,
11
- ServiceContext,
12
- )
13
- from llama_index.legacy.indices.struct_store import NLSQLTableQueryEngine
14
- from llama_index.legacy.llms import OpenAI
15
- import sqlite3
16
-
17
- # Set up OpenAI API Key
18
- # Load the OpenAI API key from the environment variable
19
- openai.api_key = os.getenv("OPENAI_API_KEY")
20
 
21
- # Check if the API key is set
22
- if not openai.api_key:
23
- raise ValueError("OPENAI_API_KEY environment variable is not set")
24
-
25
- # Function to load database and LLM
26
- def load_db_llm():
27
- engine = create_engine("sqlite:///gov-contracts.db")
28
- sql_database = SQLDatabase(engine)
29
- llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo-1106")
30
- service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
31
- return sql_database, service_context, engine
32
 
33
- # Load LLM and database context
34
- sql_database, service_context, engine = load_db_llm()
35
- query_engine = NLSQLTableQueryEngine(
36
- sql_database=sql_database, synthesize_response=True, service_context=service_context
37
  )
38
 
39
- # Initialize table schema and connection for query
40
- inspector = inspect(engine)
41
- table_names = inspector.get_table_names()
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Load table data function
44
- def get_table_data(table_name):
45
- conn = sqlite3.connect('gov-contracts.db')
46
- query = f"SELECT * FROM {table_name}"
47
- df = pd.read_sql_query(query, conn)
48
- conn.close()
49
- return df
 
 
 
 
 
50
 
51
- # Chat-based interaction for Gradio
52
- def generate_response(user_input, selected_table=None, example_prompt=None):
53
- if example_prompt:
54
- user_input = example_prompt
55
-
56
- response = query_engine.query(f"User Question: {user_input}")
57
- sql_query = f"```sql\n{response.metadata['sql_query']}\n```\n**Response:**\n{response.response}\n"
58
- pyperclip.copy(sql_query) # Optional: Copy to clipboard
59
- return sql_query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # Define Gradio app layout and components
62
- with gr.Blocks() as gradio_app:
63
- gr.Markdown("## Natural Language to SQL Query Application")
64
- gr.Markdown("### Ask a question about the data in the database to receive a precise SQL query.")
65
-
66
- # Sidebar: Database schema and example prompts
 
67
  with gr.Row():
68
  with gr.Column():
69
- table_dropdown = gr.Dropdown(choices=table_names, label="Select a Table")
70
- example_prompt_box = gr.Radio(
71
- choices=[
72
- "Return the department_ind_agency and the sum of award in descending order",
73
- "Return the sum of award in descending order grouped by type limited to the top 10",
74
- "Return the sum of award by year where the sub_tier is the FEDERAL ACQUISITION SERVICE"
75
- ],
76
- label="Select an Example Prompt"
77
- )
78
- query_btn = gr.Button("Generate Query")
79
-
80
- with gr.Column():
81
- user_query = gr.Textbox(
82
- label="Enter your natural language query about the database",
83
- placeholder="Ask your question here..."
84
- )
85
- chat_output = gr.Textbox(
86
- label="Generated SQL Query",
87
- placeholder="SQL query will appear here..."
88
- )
89
-
90
- # Function to call on click
91
- def query_callback(user_input, table_name, example_prompt):
92
- return generate_response(user_input, selected_table=table_name, example_prompt=example_prompt)
93
-
94
- # Button click event
95
- query_btn.click(query_callback, inputs=[user_query, table_dropdown, example_prompt_box], outputs=chat_output)
96
 
97
- gr.Markdown("#### Created by Leonce Nshuti")
98
- gr.Markdown("""
99
- - [LinkedIn](https://www.linkedin.com/in/leoncenshuti/)
100
- - [GitHub](https://github.com/LNshuti)
101
- """)
102
 
103
- if __name__ == "__main__":
104
- gradio_app.launch()
 
1
+ # Install necessary libraries
2
+ !pip install gradio requests duckdb huggingface_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ import json
5
+ import os
6
+ import urllib.parse
7
+ import gradio as gr
8
+ import requests
9
+ from huggingface_hub import InferenceClient
 
 
 
 
 
10
 
11
+ # InferenceClient setup (you must add your HF token as an environment variable in Colab)
12
+ client = InferenceClient(
13
+ "meta-llama/Meta-Llama-3.1-70B-Instruct",
14
+ token=os.environ["HF_TOKEN"],
15
  )
16
 
17
+ # Function to generate iframe for dataset viewer
18
+ def get_iframe(hub_repo_id, sql_query=None):
19
+ if not hub_repo_id:
20
+ raise ValueError("Hub repo id is required")
21
+ if sql_query:
22
+ sql_query = urllib.parse.quote(sql_query)
23
+ url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
24
+ else:
25
+ url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
26
+ iframe = f"""
27
+ <iframe src="{url}" frameborder="0" width="100%" height="800px"></iframe>
28
+ """
29
+ return iframe
30
 
31
+ # Function to fetch dataset column information
32
+ def get_column_info(hub_repo_id):
33
+ url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
34
+ response = requests.get(url)
35
+ try:
36
+ data = response.json()
37
+ dataset_info = data.get("dataset_info")
38
+ key = list(dataset_info.keys())[0]
39
+ features = json.dumps(dataset_info.get(key).get("features"), indent=2)
40
+ except Exception as e:
41
+ return f"Error getting column info: {e}"
42
+ return features
43
 
44
+ # Function to generate SQL query based on natural language input
45
+ def query_dataset(hub_repo_id, features, query):
46
+ messages = [
47
+ {
48
+ "role": "system",
49
+ "content": "You are a SQL query expert assistant that returns a DuckDB SQL query based on the user's natural language query and dataset features.",
50
+ },
51
+ {
52
+ "role": "user",
53
+ "content": f"""table train
54
+ # Features
55
+ {features}
56
+ # Query
57
+ {query}
58
+ """,
59
+ },
60
+ ]
61
+ response = client.chat_completion(
62
+ messages=messages,
63
+ max_tokens=1000,
64
+ stream=False,
65
+ )
66
+ query = response.choices[0].message.content
67
+ return query, get_iframe(hub_repo_id, query)
68
 
69
+ # Gradio app UI
70
+ with gr.Blocks() as demo:
71
+ gr.Markdown("""
72
+ # πŸ₯ πŸ¦™ πŸ€— Text To SQL Hub Datasets πŸ€— πŸ¦™ πŸ₯
73
+ Use this tool to search and query datasets on Huggingface Hub.
74
+ Built with DuckDB, Huggingface's Inference API, and LLaMA 3.1 70B.
75
+ """)
76
  with gr.Row():
77
  with gr.Column():
78
+ search_in = gr.Textbox(label="Search Huggingface Hub", placeholder="Search for datasets")
79
+ query = gr.Textbox(label="Natural Language Query", placeholder="Enter a query to generate SQL")
80
+ sql_out = gr.Code(label="SQL Query", language="sql")
81
+ with gr.Row():
82
+ btn = gr.Button("Show Dataset")
83
+ btn2 = gr.Button("Query Dataset")
84
+ with gr.Row():
85
+ search_out = gr.HTML(label="Search Results")
86
+ features = gr.Code(label="Features", language="json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # Event handling
89
+ btn.click(fn=get_iframe, inputs=[search_in], outputs=[search_out])
90
+ btn2.click(fn=query_dataset, inputs=[search_in, features, query], outputs=[sql_out, search_out])
 
 
91
 
92
+ # Launch the app
93
+ demo.launch()