Spaces:

mubbashir-ahmed
/

MyModelTestingSpace

Running

App Files Files Community

Mubbashir Ahmed commited on 23 days ago

Commit

dccb5da

1 Parent(s): 26fe788

added prompting

Browse files

Files changed (1) hide show

app.py +63 -10

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import time
 import gradio as gr
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
 # ------------------------
 # Auth
@@ -15,6 +16,22 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 # ------------------------
 spider_dataset = load_dataset("spider", split="train")
 # ------------------------
 # Inference Clients
 # ------------------------
@@ -33,9 +50,42 @@ model_list = {
 }
 # ------------------------
-# Inference + Evaluation Logic
 # ------------------------
-def evaluate_all_models(user_input, expected_sql, chat_history):
     evaluations = []
     full_chat_transcript = ""
@@ -43,22 +93,23 @@ def evaluate_all_models(user_input, expected_sql, chat_history):
         client = model_config["client"]
         model_id = model_config["model_id"]
-        messages = chat_history + [{"role": "user", "content": user_input}]
         try:
             start_time = time.time()
             result = client.chat.completions.create(
                 model=model_id,
                 messages=messages
             )
-            model_sql = result.choices[0].message.content
             latency = int((time.time() - start_time) * 1000)
         except Exception as e:
             model_sql = f"⚠️ Error: {str(e)}"
             latency = -1
-        # Evaluation criteria (simulated)
         sql_gen_accuracy = "✅" if expected_sql.strip().lower() in model_sql.strip().lower() else "❌"
         exec_response_accuracy = "✅" if sql_gen_accuracy == "✅" else "❌"
         intent_clarity = "✅" if len(user_input.strip().split()) < 5 and "SELECT" in model_sql.upper() else "❌"
@@ -84,16 +135,17 @@ def evaluate_all_models(user_input, expected_sql, chat_history):
 # ------------------------
 def get_random_spider_prompt():
     sample = random.choice(spider_dataset)
-    return sample["question"], sample["query"], sample["query"]
 # ------------------------
 # Gradio UI
 # ------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Spider Dataset Model Evaluation")
     prompt_input = gr.Textbox(label="Your Prompt", lines=3, placeholder="Ask your BI question...")
     expected_sql_display = gr.Textbox(label="Expected SQL", lines=2, interactive=False)
     load_spider_btn = gr.Button("🔀 Load Random Spider Prompt")
     run_button = gr.Button("Send & Evaluate All Models")
@@ -103,16 +155,17 @@ with gr.Blocks() as demo:
     chat_memory = gr.State([])
     expected_sql = gr.State("")
     load_spider_btn.click(
         fn=get_random_spider_prompt,
         inputs=[],
-        outputs=[prompt_input, expected_sql, expected_sql_display]
     )
     run_button.click(
         fn=evaluate_all_models,
-        inputs=[prompt_input, expected_sql, chat_memory],
         outputs=[chat_display, chat_memory, evaluation_display]
     )

 import gradio as gr
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
+import json
 # ------------------------
 # Auth
 # ------------------------
 spider_dataset = load_dataset("spider", split="train")
+# Load table schemas from Spider
+with open("spider/tables.json", "r") as f:
+    tables_json = json.load(f)
+# Build db_id → schema_string mapping
+def extract_schema(db_id):
+    for db in tables_json:
+        if db["db_id"] == db_id:
+            tables = []
+            for table_name, columns in zip(db["table_names_original"], db["column_names_original"]):
+                col_list = [col[1] for col in db["column_names_original"] if col[0] == db["table_names_original"].index(table_name)]
+                table_def = f"{table_name}({', '.join(col for col in col_list if col != '*')})"
+                tables.append(table_def)
+            return "\n".join(tables)
+    return "Schema not found."
 # ------------------------
 # Inference Clients
 # ------------------------
 }
 # ------------------------
+# Few-shot examples
+# ------------------------
+few_shot_examples = """Q: Show all department names.
+A: SELECT name FROM department;
+Q: Count number of students.
+A: SELECT COUNT(*) FROM student;"""
+# ------------------------
+# Prompt Constructor
+# ------------------------
+def build_sql_prompt(user_question, db_id):
+    schema = extract_schema(db_id)
+    prompt = f"""You are an expert SQL assistant. Convert the given question into a valid SQL query using the database schema provided below.
+Instructions:
+- Respond with only the SQL query.
+- Do not include markdown, explanations, or additional formatting.
+- Use correct table and column names from the schema.
+- Follow SQL best practices and Spider dataset formatting.
+Schema (db_id: {db_id}):
+{schema}
+Examples:
+{few_shot_examples}
+Now answer this:
+Q: {user_question}
+A:"""
+    return prompt
+# ------------------------
+# Evaluate Models with Engineered Prompt
 # ------------------------
+def evaluate_all_models(user_input, expected_sql, db_id, chat_history):
     evaluations = []
     full_chat_transcript = ""
         client = model_config["client"]
         model_id = model_config["model_id"]
+        prompt = build_sql_prompt(user_input, db_id)
+        messages = [{"role": "user", "content": prompt}]
         try:
             start_time = time.time()
             result = client.chat.completions.create(
                 model=model_id,
                 messages=messages
             )
+            model_sql = result.choices[0].message.content.strip()
             latency = int((time.time() - start_time) * 1000)
         except Exception as e:
             model_sql = f"⚠️ Error: {str(e)}"
             latency = -1
+        # Evaluation criteria
         sql_gen_accuracy = "✅" if expected_sql.strip().lower() in model_sql.strip().lower() else "❌"
         exec_response_accuracy = "✅" if sql_gen_accuracy == "✅" else "❌"
         intent_clarity = "✅" if len(user_input.strip().split()) < 5 and "SELECT" in model_sql.upper() else "❌"
 # ------------------------
 def get_random_spider_prompt():
     sample = random.choice(spider_dataset)
+    return sample["question"], sample["query"], sample["query"], sample["db_id"]
 # ------------------------
 # Gradio UI
 # ------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Advanced SQL Generation Evaluation (Spider Dataset + Prompt Engineering)")
     prompt_input = gr.Textbox(label="Your Prompt", lines=3, placeholder="Ask your BI question...")
     expected_sql_display = gr.Textbox(label="Expected SQL", lines=2, interactive=False)
+    dbid_display = gr.Textbox(label="DB ID", lines=1, interactive=False)
     load_spider_btn = gr.Button("🔀 Load Random Spider Prompt")
     run_button = gr.Button("Send & Evaluate All Models")
     chat_memory = gr.State([])
     expected_sql = gr.State("")
+    db_id = gr.State("")
     load_spider_btn.click(
         fn=get_random_spider_prompt,
         inputs=[],
+        outputs=[prompt_input, expected_sql, expected_sql_display, db_id, dbid_display]
     )
     run_button.click(
         fn=evaluate_all_models,
+        inputs=[prompt_input, expected_sql, db_id, chat_memory],
         outputs=[chat_display, chat_memory, evaluation_display]
     )