Spaces:

mubbashir-ahmed
/

MyModelTestingSpace

Running

App Files Files Community

Mubbashir Ahmed commited on 25 days ago

Commit

6725b24

1 Parent(s): 1aa45ea

UPDATES 2

Browse files

Files changed (1) hide show

app.py +63 -73

app.py CHANGED Viewed

@@ -1,137 +1,127 @@
 import os
 import random
 import gradio as gr
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
-# from transformers import AutoTokenizer, AutoModelForCausalLM
-# import torch
-HF_TOKEN = os.environ.get("HF_TOKEN")
 # ------------------------
-# Load Spider Dataset (Hugging Face Datasets)
 # ------------------------
-spider_dataset = load_dataset("spider", split="train")
 # ------------------------
-# API Clients
 # ------------------------
-llama_client = InferenceClient(
-    provider="fireworks-ai",
-    api_key=HF_TOKEN,
-)
-qwen_client = InferenceClient(
-    provider="featherless-ai",
-    api_key=HF_TOKEN,
-)
 # ------------------------
-# Mixtral Local Setup (DISABLED)
 # ------------------------
-# mixtral_model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-# mixtral_tokenizer = AutoTokenizer.from_pretrained(mixtral_model_id)
-# mixtral_model = AutoModelForCausalLM.from_pretrained(
-#     mixtral_model_id, torch_dtype=torch.float16
-# ).to("cuda")
 # ------------------------
-# Unified Inference Function with Chat History
 # ------------------------
-def run_model_with_history(model_name, user_input, chat_history):
     messages = chat_history + [{"role": "user", "content": user_input}]
     try:
         if model_name == "LLaMA 4":
             result = llama_client.chat.completions.create(
                 model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
                 messages=messages
             )
-            reply = result.choices[0].message.content
         elif model_name == "Qwen3 14B":
             result = qwen_client.chat.completions.create(
                 model="Qwen/Qwen3-14B",
                 messages=messages
             )
-            reply = result.choices[0].message.content
-        # Mixtral section disabled due to space constraints
-        # elif model_name == "Mixtral 8x7B":
-        #     full_prompt = ""
-        #     for msg in messages:
-        #         prefix = "User: " if msg["role"] == "user" else "Assistant: "
-        #         full_prompt += f"{prefix}{msg['content']}\n"
-        #     inputs = mixtral_tokenizer(full_prompt, return_tensors="pt").to("cuda")
-        #     outputs = mixtral_model.generate(
-        #         **inputs,
-        #         max_new_tokens=512,
-        #         do_sample=True,
-        #         temperature=0.7,
-        #         top_k=50,
-        #         top_p=0.95
-        #     )
-        #     reply = mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True)
         else:
-            reply = "❌ Invalid model selection."
     except Exception as e:
-        reply = f"⚠️ Error: {str(e)}"
-    # Update chat history
     chat_history.append({"role": "user", "content": user_input})
-    chat_history.append({"role": "assistant", "content": reply})
-    # Format display
     chat_transcript = "\n".join([
         f"👤 User: {msg['content']}" if msg["role"] == "user" else f"🤖 Assistant: {msg['content']}"
         for msg in chat_history
     ])
-    return chat_transcript, chat_history
 # ------------------------
-# Get Random Spider Question
 # ------------------------
-def get_random_spider_question():
     sample = random.choice(spider_dataset)
-    return sample["question"]
 # ------------------------
 # Gradio UI
 # ------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Generative AI Model Evaluation with Context")
-    with gr.Row():
-        model_choice = gr.Dropdown(
-            choices=["LLaMA 4", "Qwen3 14B"],
-            label="Select Model",
-            value="LLaMA 4"
-        )
-        load_spider_btn = gr.Button("🔀 Load Random Spider Prompt")
-    chat_display = gr.Textbox(label="Chat History", lines=20, interactive=False)
     prompt_input = gr.Textbox(label="Your Prompt", lines=3, placeholder="Ask your BI question...")
-    run_button = gr.Button("Send")
-    # Hidden chat history state
     chat_memory = gr.State([])
-    run_button.click(
-        fn=run_model_with_history,
-        inputs=[model_choice, prompt_input, chat_memory],
-        outputs=[chat_display, chat_memory]
-    )
     load_spider_btn.click(
-        fn=get_random_spider_question,
         inputs=[],
-        outputs=prompt_input
     )
-# Launch app
 demo.launch()

 import os
 import random
+import time
 import gradio as gr
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
 # ------------------------
+# Auth
 # ------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")
 # ------------------------
+# Load Spider Dataset
 # ------------------------
+spider_dataset = load_dataset("spider", split="train")
 # ------------------------
+# Inference Clients
 # ------------------------
+llama_client = InferenceClient(provider="fireworks-ai", api_key=HF_TOKEN)
+qwen_client = InferenceClient(provider="featherless-ai", api_key=HF_TOKEN)
 # ------------------------
+# Inference + Evaluation Logic
 # ------------------------
+def evaluate_model(model_name, user_input, expected_sql, chat_history):
     messages = chat_history + [{"role": "user", "content": user_input}]
     try:
+        start_time = time.time()
         if model_name == "LLaMA 4":
             result = llama_client.chat.completions.create(
                 model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
                 messages=messages
             )
+            model_sql = result.choices[0].message.content
         elif model_name == "Qwen3 14B":
             result = qwen_client.chat.completions.create(
                 model="Qwen/Qwen3-14B",
                 messages=messages
             )
+            model_sql = result.choices[0].message.content
         else:
+            model_sql = "❌ Invalid model selected."
+        end_time = time.time()
+        latency = int((end_time - start_time) * 1000)  # ms
     except Exception as e:
+        model_sql = f"⚠️ Error: {str(e)}"
+        latency = -1
+    # Evaluation criteria (simulated, can be replaced with real validation)
+    sql_gen_accuracy = "✅" if expected_sql.strip().lower() in model_sql.strip().lower() else "❌"
+    exec_response_accuracy = "✅" if sql_gen_accuracy == "✅" else "❌"
+    intent_clarity = "✅" if len(user_input.strip().split()) < 5 and "SELECT" in model_sql.upper() else "❌"
+    semantic_clarity = "✅" if any(word in model_sql.lower() for word in ["from", "join", "group by"]) else "❌"
+    latency_status = "✅" if latency <= 1000 else "❌"
+    evaluation_summary = (
+        f"📊 **Evaluation Summary**\n"
+        f"- SQL Generation Match: {sql_gen_accuracy}\n"
+        f"- Execution Accuracy: {exec_response_accuracy}\n"
+        f"- Intent Clarification: {intent_clarity}\n"
+        f"- Semantic Mapping: {semantic_clarity}\n"
+        f"- Response Latency: {latency} ms ({latency_status})\n"
+    )
     chat_history.append({"role": "user", "content": user_input})
+    chat_history.append({"role": "assistant", "content": model_sql})
     chat_transcript = "\n".join([
         f"👤 User: {msg['content']}" if msg["role"] == "user" else f"🤖 Assistant: {msg['content']}"
         for msg in chat_history
     ])
+    return chat_transcript, chat_history, evaluation_summary
 # ------------------------
+# Load Random Spider Prompt
 # ------------------------
+def get_random_spider_prompt():
     sample = random.choice(spider_dataset)
+    return sample["question"], sample["query"]
 # ------------------------
 # Gradio UI
 # ------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Spider Dataset Model Evaluation")
+    model_choice = gr.Dropdown(
+        choices=["LLaMA 4", "Qwen3 14B"],
+        label="Select Model",
+        value="LLaMA 4"
+    )
     prompt_input = gr.Textbox(label="Your Prompt", lines=3, placeholder="Ask your BI question...")
+    expected_sql_display = gr.Textbox(label="Expected SQL", lines=2, interactive=False)
+    load_spider_btn = gr.Button("🔀 Load Random Spider Prompt")
+    run_button = gr.Button("Send & Evaluate")
+    chat_display = gr.Textbox(label="Chat History", lines=20, interactive=False)
+    evaluation_display = gr.Markdown()
     chat_memory = gr.State([])
+    expected_sql = gr.State("")
     load_spider_btn.click(
+        fn=get_random_spider_prompt,
         inputs=[],
+        outputs=[prompt_input, expected_sql, expected_sql_display]
+    )
+    run_button.click(
+        fn=evaluate_model,
+        inputs=[model_choice, prompt_input, expected_sql, chat_memory],
+        outputs=[chat_display, chat_memory, evaluation_display]
     )
+# Launch
 demo.launch()