Spaces:

ulab-ai
/

RoutePilot

Running

App Files Files Community

cmulgy commited on Jun 27

Commit

3468847

1 Parent(s): be0b6e7

Fix dropdown functionality for Hugging Face Spaces - improve dataset loading with multiple fallback approaches and better error handling

Browse files

Files changed (2) hide show

.gitignore +5 -1
demo.py +107 -15

.gitignore CHANGED Viewed

@@ -59,4 +59,8 @@ logs/
 *.temp
 # Hugging Face Spaces specific
-.cache/

 *.temp
 # Hugging Face Spaces specific
+.cache/
+# HuggingFace datasets cache
+cache/
+.huggingface/

demo.py CHANGED Viewed

@@ -250,20 +250,51 @@ def get_template_subset_name(model_size: str, template_size: str) -> str:
 def load_template_dataset(model_size: str, template_size: str) -> pd.DataFrame:
     """
-    Load thought templates from HuggingFace dataset.
     """
     subset_name = get_template_subset_name(model_size, template_size)
-    try:
-        print(f"Loading templates from HuggingFace dataset: ulab-ai/FusionBench, subset: {subset_name}")
-        dataset = load_dataset("ulab-ai/FusionBench", subset_name)
-        template_df = pd.DataFrame(dataset['data'])
-        print(f"Loaded {len(template_df)} templates from {subset_name}")
-        return template_df
-    except Exception as e:
-        raise ValueError(f"Failed to load template dataset {subset_name}: {e}")
 def enhance_query_with_templates(
     model_size: str,
@@ -286,6 +317,11 @@ def enhance_query_with_templates(
     # Load template data from HuggingFace dataset
     template_df = load_template_dataset(model_size, template_size)
     # Generate embedding for the query if not provided
     if query_embedding is None:
         try:
@@ -296,8 +332,9 @@ def enhance_query_with_templates(
             return query, []
     # Filter templates by task description if provided
-    if task_description is None:
         matching_templates = template_df
     else:
         matching_templates = template_df[template_df['task_description'] == task_description]
@@ -307,15 +344,17 @@ def enhance_query_with_templates(
             if not partial_matches.empty:
                 matching_templates = partial_matches
-                print(f"Found partial matches for task: {task_description[:50]}...")
             else:
-                print(f"No matching templates found for task: {task_description[:50]}...")
                 matching_templates = template_df
     if matching_templates.empty:
         print("No matching templates found. Returning original query.")
         return query, []
     # Calculate similarities with template embeddings
     similarities = []
@@ -324,7 +363,11 @@ def enhance_query_with_templates(
         # Try to parse existing template embedding
         if 'query_embedding' in t_row and not pd.isna(t_row['query_embedding']):
-            template_embedding = parse_embedding(t_row['query_embedding'])
         # If no valid embedding found, generate one for the template query
         if template_embedding is None and 'query' in t_row:
@@ -992,6 +1035,55 @@ def process_thought_template_query(query, template_style, task_description, top_
         return error_msg, "", ""
 # Create Gradio interface
 def create_interface():
     with gr.Blocks(

 def load_template_dataset(model_size: str, template_size: str) -> pd.DataFrame:
     """
+    Load thought templates from HuggingFace dataset with robust error handling for Spaces deployment.
     """
     subset_name = get_template_subset_name(model_size, template_size)
+    # Try multiple approaches to load the dataset
+    approaches = [
+        # Approach 1: Direct load with timeout
+        lambda: load_dataset("ulab-ai/FusionBench", subset_name, trust_remote_code=True),
+        # Approach 2: Load with cache_dir specification
+        lambda: load_dataset("ulab-ai/FusionBench", subset_name, cache_dir="./cache", trust_remote_code=True),
+        # Approach 3: Load with streaming (for large datasets)
+        lambda: load_dataset("ulab-ai/FusionBench", subset_name, streaming=True, trust_remote_code=True),
+    ]
+    for i, approach in enumerate(approaches, 1):
+        try:
+            print(f"Attempting to load templates (approach {i}): ulab-ai/FusionBench, subset: {subset_name}")
+            dataset = approach()
+            # Handle streaming dataset
+            if hasattr(dataset, 'iter') and callable(dataset.iter):
+                # Convert streaming dataset to list
+                data_list = list(dataset['data'])
+                template_df = pd.DataFrame(data_list)
+            else:
+                # Regular dataset
+                template_df = pd.DataFrame(dataset['data'])
+            print(f"✅ Successfully loaded {len(template_df)} templates from {subset_name}")
+            return template_df
+        except Exception as e:
+            print(f"❌ Approach {i} failed: {str(e)}")
+            if i == len(approaches):
+                # All approaches failed, provide detailed error
+                error_msg = f"Failed to load template dataset {subset_name} after trying {len(approaches)} approaches. Last error: {str(e)}"
+                print(error_msg)
+                # Return empty DataFrame with warning
+                print("⚠️ Returning empty template DataFrame - functionality will be limited")
+                return pd.DataFrame(columns=['query', 'thought_template', 'task_description', 'query_embedding'])
+    # This should never be reached, but just in case
+    return pd.DataFrame(columns=['query', 'thought_template', 'task_description', 'query_embedding'])
 def enhance_query_with_templates(
     model_size: str,
     # Load template data from HuggingFace dataset
     template_df = load_template_dataset(model_size, template_size)
+    # Check if dataset is empty (failed to load)
+    if template_df.empty:
+        print("⚠️ Template dataset is empty - returning original query")
+        return query, []
     # Generate embedding for the query if not provided
     if query_embedding is None:
         try:
             return query, []
     # Filter templates by task description if provided
+    if task_description is None or not task_description.strip():
         matching_templates = template_df
+        print(f"Using all {len(matching_templates)} templates (no task filter)")
     else:
         matching_templates = template_df[template_df['task_description'] == task_description]
             if not partial_matches.empty:
                 matching_templates = partial_matches
+                print(f"Found partial matches for task: {task_description[:50]}... ({len(matching_templates)} templates)")
             else:
+                print(f"No matching templates found for task: {task_description[:50]}... - using all templates")
                 matching_templates = template_df
     if matching_templates.empty:
         print("No matching templates found. Returning original query.")
         return query, []
+    print(f"Processing {len(matching_templates)} templates for similarity calculation...")
     # Calculate similarities with template embeddings
     similarities = []
         # Try to parse existing template embedding
         if 'query_embedding' in t_row and not pd.isna(t_row['query_embedding']):
+            try:
+                template_embedding = parse_embedding(t_row['query_embedding'])
+            except Exception as e:
+                print(f"Failed to parse template embedding: {str(e)}")
+                template_embedding = None
         # If no valid embedding found, generate one for the template query
         if template_embedding is None and 'query' in t_row:
         return error_msg, "", ""
+# Test function to verify dropdown functionality
+def test_dropdown_functionality():
+    """Test function to verify dropdown components are working"""
+    print("Testing dropdown functionality...")
+    # Test template style mapping
+    style_mapping = {
+        "8b_full": ("8b", "full"),
+        "8b_small": ("8b", "small"),
+        "70b_full": ("70b", "full"),
+        "70b_small": ("70b", "small")
+    }
+    for style, (model_size, template_size) in style_mapping.items():
+        print(f"✅ Template style '{style}' maps to model_size='{model_size}', template_size='{template_size}'")
+    # Test benchmark task options
+    benchmark_tasks = [
+        ("All Tasks", ""),
+        ("ARC-Challenge", "ARC-Challenge"),
+        ("BoolQ", "BoolQ"),
+        ("CommonsenseQA", "CommonsenseQA"),
+        ("GPQA", "GPQA"),
+        ("GSM8K", "GSM8K"),
+        ("HellaSwag", "HellaSwag"),
+        ("HumanEval", "HumanEval"),
+        ("MATH", "MATH"),
+        ("MBPP", "MBPP"),
+        ("MMLU", "MMLU"),
+        ("Natural Questions", "Natural Questions"),
+        ("OpenBookQA", "OpenBookQA"),
+        ("SQuAD", "SQuAD"),
+        ("TriviaQA", "TriviaQA")
+    ]
+    print(f"✅ {len(benchmark_tasks)} benchmark task options available")
+    return True
+# Run test on import
+if __name__ == "__main__":
+    test_dropdown_functionality()
+else:
+    # Run test when module is imported
+    try:
+        test_dropdown_functionality()
+    except Exception as e:
+        print(f"Warning: Dropdown functionality test failed: {e}")
 # Create Gradio interface
 def create_interface():
     with gr.Blocks(