Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 7

Commit

c2b7f1b

1 Parent(s): c488314

add config file for models | fix github link in intro

Browse files

Files changed (6) hide show

backend/config/__init__.py +3 -0
backend/config/models_config.py +38 -0
backend/routes/benchmark.py +3 -1
backend/tasks/create_bench_config_file.py +38 -42
backend/tasks/evaluation_task.py +1 -13
backend/tasks/get_available_model_provider.py +1 -3

backend/config/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Configuration module for the application
+"""

backend/config/models_config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""
+Central configuration for models and providers
+This file centralizes all configurations related to models and providers used in the application.
+"""
+# Definition of preferred providers, used in get_available_model_provider.py
+PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
+# Default models to evaluate for evaluation
+DEFAULT_EVALUATION_MODELS = [
+    "Qwen/QwQ-32B",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-32B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+    "mistralai/Mistral-Small-24B-Instruct-2501",
+]
+# Required model for create_bench_config_file.py (only one default model)
+DEFAULT_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
+# Models by roles for benchmark configuration
+# All roles use the default model except chunking
+MODEL_ROLES = {
+    "ingestion": [DEFAULT_MODEL],
+    "summarization": [DEFAULT_MODEL],
+    "chunking": ["intfloat/multilingual-e5-large-instruct"],
+    "single_shot_question_generation": [DEFAULT_MODEL],
+    "multi_hop_question_generation": [DEFAULT_MODEL],
+}
+# Default evaluation timeout (in seconds)
+DEFAULT_EVALUATION_TIMEOUT = 60.0
+# Default benchmark timeout (in seconds)
+DEFAULT_BENCHMARK_TIMEOUT = 300.0

backend/routes/benchmark.py CHANGED Viewed

@@ -177,7 +177,9 @@ class UnifiedBenchmarkTask:
         try:
             # Step 1: Configuration
             self._add_log("[INFO] Starting configuration process")
-            self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
             # Execute the configuration task
             try:

         try:
             # Step 1: Configuration
             self._add_log("[INFO] Starting configuration process")
+            # Import and use DEFAULT_BENCHMARK_TIMEOUT
+            from config.models_config import DEFAULT_BENCHMARK_TIMEOUT
+            self.config_task = CreateBenchConfigTask(session_uid=self.session_uid, timeout=DEFAULT_BENCHMARK_TIMEOUT)
             # Execute the configuration task
             try:

backend/tasks/create_bench_config_file.py CHANGED Viewed

@@ -14,6 +14,11 @@ from loguru import logger
 from huggingface_hub import HfApi
 from tasks.get_available_model_provider import get_available_model_provider
 class CreateBenchConfigTask:
@@ -21,18 +26,20 @@ class CreateBenchConfigTask:
     Task to create and save a configuration file for YourbenchSimpleDemo
     """
-    def __init__(self, session_uid: Optional[str] = None):
         """
         Initialize the task with a session ID
         Args:
             session_uid: Optional session ID, will be generated if None
         """
         self.session_uid = session_uid or str(uuid.uuid4())
         self.logs: List[str] = []
         self.is_completed = False
         self.is_running_flag = threading.Event()
         self.thread = None
         self._add_log("[INFO] Initializing configuration creation task")
     def _add_log(self, message: str) -> None:
@@ -116,40 +123,22 @@ class CreateBenchConfigTask:
         if not hf_token:
             raise RuntimeError("HF_TOKEN environment variable is not defined")
-        # Get providers for models
-        model_list = []
-        # Define required models
-        required_models = [
-            # "Qwen/Qwen2.5-72B-Instruct"
-            # "meta-llama/Llama-3.1-8B-Instruct"
-            # "Qwen/Qwen2.5-32B-Instruct",
-            "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
-        ]
-        # Track found models
-        found_models = set()
-        for model_name in required_models:
-            provider = self.get_model_provider(model_name)
-            if provider:
-                model_list.append({
-                    "model_name": model_name,
-                    "provider": provider,
-                    "api_key": "$HF_TOKEN",
-                    "max_concurrent_requests": 32,
-                })
-                found_models.add(model_name)
-        # # Check if both required models are available
-        if len(found_models) < len(required_models):
-            missing_models = set(required_models) - found_models
-            missing_models_str = ", ".join(missing_models)
-            error_msg = f"Required models not available: {missing_models_str}. Cannot proceed with benchmark."
             self._add_log(f"[ERROR] {error_msg}")
             raise RuntimeError(error_msg)
-        # Ajouter un délai minimum de 2 secondes pour l'étape provider_check
         self._add_log("[INFO] Finalizing provider check...")
         time.sleep(2)
@@ -158,36 +147,35 @@ class CreateBenchConfigTask:
         return {
             "hf_configuration": {
-                "token": "$HF_TOKEN",  # Utiliser directement le token de l'environnement
                 "hf_organization": "$HF_ORGANIZATION",
                 "private": True,
                 "hf_dataset_name": hf_dataset_name,
                 "concat_if_exist": False,
             },
             "model_list": model_list,
-            "model_roles": {
-                "ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
-                "summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
-                "chunking": ["intfloat/multilingual-e5-large-instruct"],
-                "single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
-                "multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
-            },
             "pipeline": {
                 "ingestion": {
                     "source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
                     "output_dir": f"uploaded_files/{self.session_uid}/ingested",
                     "run": True,
                 },
                 "upload_ingest_to_hub": {
                     "source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
-                    "run": True,  # Réactivé pour l'upload sur le Hub
                 },
                 "summarization": {
                     "run": True,
                 },
                 "chunking": {
                     "run": True,
                     "chunking_configuration": {
                         "l_min_tokens": 64,
                         "l_max_tokens": 128,
@@ -199,6 +187,7 @@ class CreateBenchConfigTask:
                 },
                 "single_shot_question_generation": {
                     "run": True,
                     "additional_instructions": "Generate rich and creative questions to test a curious adult",
                     "chunk_sampling": {
                         "mode": "count",
@@ -208,9 +197,11 @@ class CreateBenchConfigTask:
                 },
                 "multi_hop_question_generation": {
                     "run": False,
                 },
                 "lighteval": {
                     "run": False,
                 },
             },
         }
@@ -310,17 +301,22 @@ class CreateBenchConfigTask:
             self.mark_task_completed()
             raise RuntimeError(error_msg)
-    def run(self, file_path: str, token: Optional[str] = None) -> str:
         """
         Run the task to create and save the configuration file asynchronously
         Args:
             file_path: Path to the uploaded file
             token: Hugging Face token (not used, using HF_TOKEN from environment)
         Returns:
             Path to the configuration file
         """
         # Mark the task as running
         self.is_running_flag.set()

 from huggingface_hub import HfApi
 from tasks.get_available_model_provider import get_available_model_provider
+from config.models_config import (
+    DEFAULT_MODEL,
+    MODEL_ROLES,
+    DEFAULT_BENCHMARK_TIMEOUT,
+)
 class CreateBenchConfigTask:
     Task to create and save a configuration file for YourbenchSimpleDemo
     """
+    def __init__(self, session_uid: Optional[str] = None, timeout: float = None):
         """
         Initialize the task with a session ID
         Args:
             session_uid: Optional session ID, will be generated if None
+            timeout: Timeout in seconds for benchmark operations (if None, uses default)
         """
         self.session_uid = session_uid or str(uuid.uuid4())
         self.logs: List[str] = []
         self.is_completed = False
         self.is_running_flag = threading.Event()
         self.thread = None
+        self.timeout = timeout if timeout is not None else DEFAULT_BENCHMARK_TIMEOUT
         self._add_log("[INFO] Initializing configuration creation task")
     def _add_log(self, message: str) -> None:
         if not hf_token:
             raise RuntimeError("HF_TOKEN environment variable is not defined")
+        # Get provider for the default model
+        provider = self.get_model_provider(DEFAULT_MODEL)
+        if not provider:
+            error_msg = f"Required model not available: {DEFAULT_MODEL}. Cannot proceed with benchmark."
             self._add_log(f"[ERROR] {error_msg}")
             raise RuntimeError(error_msg)
+        # Create model configuration
+        model_list = [{
+            "model_name": DEFAULT_MODEL,
+            "provider": provider,
+            "api_key": "$HF_TOKEN",
+            "max_concurrent_requests": 32,
+        }]
+        # Add minimum delay of 2 seconds for provider_check stage
         self._add_log("[INFO] Finalizing provider check...")
         time.sleep(2)
         return {
             "hf_configuration": {
+                "token": "$HF_TOKEN",
                 "hf_organization": "$HF_ORGANIZATION",
                 "private": True,
                 "hf_dataset_name": hf_dataset_name,
                 "concat_if_exist": False,
+                "timeout": self.timeout,  # Add timeout to configuration
             },
             "model_list": model_list,
+            "model_roles": MODEL_ROLES,
             "pipeline": {
                 "ingestion": {
                     "source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
                     "output_dir": f"uploaded_files/{self.session_uid}/ingested",
                     "run": True,
+                    "timeout": self.timeout,  # Add timeout to ingestion
                 },
                 "upload_ingest_to_hub": {
                     "source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
+                    "run": True,
+                    "timeout": self.timeout,  # Add timeout to upload
                 },
                 "summarization": {
                     "run": True,
+                    "timeout": self.timeout,  # Add timeout to summarization
                 },
                 "chunking": {
                     "run": True,
+                    "timeout": self.timeout,  # Add timeout to chunking
                     "chunking_configuration": {
                         "l_min_tokens": 64,
                         "l_max_tokens": 128,
                 },
                 "single_shot_question_generation": {
                     "run": True,
+                    "timeout": self.timeout,  # Add timeout to question generation
                     "additional_instructions": "Generate rich and creative questions to test a curious adult",
                     "chunk_sampling": {
                         "mode": "count",
                 },
                 "multi_hop_question_generation": {
                     "run": False,
+                    "timeout": self.timeout,  # Add timeout to multi-hop question generation
                 },
                 "lighteval": {
                     "run": False,
+                    "timeout": self.timeout,  # Add timeout to lighteval
                 },
             },
         }
             self.mark_task_completed()
             raise RuntimeError(error_msg)
+    def run(self, file_path: str, token: Optional[str] = None, timeout: Optional[float] = None) -> str:
         """
         Run the task to create and save the configuration file asynchronously
         Args:
             file_path: Path to the uploaded file
             token: Hugging Face token (not used, using HF_TOKEN from environment)
+            timeout: Timeout in seconds for benchmark operations (if None, uses default)
         Returns:
             Path to the configuration file
         """
+        # Update timeout if provided
+        if timeout is not None:
+            self.timeout = timeout
         # Mark the task as running
         self.is_running_flag.set()

backend/tasks/evaluation_task.py CHANGED Viewed

@@ -16,19 +16,7 @@ from tasks.get_available_model_provider import get_available_model_provider
 from huggingface_hub import HfApi
 import asyncio
 from datasets import load_dataset
-# Default timeout value
-DEFAULT_EVALUATION_TIMEOUT = 60.0  # 1 minute by default
-# Models to evaluate - only accessible models
-DEFAULT_EVALUATION_MODELS = [
-    "Qwen/QwQ-32B",
-    "Qwen/Qwen2.5-72B-Instruct",
-    "Qwen/Qwen2.5-32B-Instruct",
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
-    "mistralai/Mistral-Small-24B-Instruct-2501",
-]
 class EvaluationTask:
     """

 from huggingface_hub import HfApi
 import asyncio
 from datasets import load_dataset
+from config.models_config import DEFAULT_EVALUATION_MODELS, DEFAULT_EVALUATION_TIMEOUT
 class EvaluationTask:
     """

backend/tasks/get_available_model_provider.py CHANGED Viewed

@@ -3,13 +3,11 @@ import logging
 import json
 from huggingface_hub import model_info, InferenceClient
 from dotenv import load_dotenv
 # Load environment variables once at the module level
 load_dotenv()
-# Define preferred providers
-PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)

 import json
 from huggingface_hub import model_info, InferenceClient
 from dotenv import load_dotenv
+from config.models_config import PREFERRED_PROVIDERS
 # Load environment variables once at the module level
 load_dotenv()
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)