Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add config file for models | fix github link in intro
Browse files
backend/config/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration module for the application
|
| 3 |
+
"""
|
backend/config/models_config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Central configuration for models and providers
|
| 3 |
+
|
| 4 |
+
This file centralizes all configurations related to models and providers used in the application.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
# Definition of preferred providers, used in get_available_model_provider.py
|
| 8 |
+
PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
|
| 9 |
+
|
| 10 |
+
# Default models to evaluate for evaluation
|
| 11 |
+
DEFAULT_EVALUATION_MODELS = [
|
| 12 |
+
"Qwen/QwQ-32B",
|
| 13 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 14 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
| 15 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 16 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
| 17 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 18 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
# Required model for create_bench_config_file.py (only one default model)
|
| 22 |
+
DEFAULT_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
| 23 |
+
|
| 24 |
+
# Models by roles for benchmark configuration
|
| 25 |
+
# All roles use the default model except chunking
|
| 26 |
+
MODEL_ROLES = {
|
| 27 |
+
"ingestion": [DEFAULT_MODEL],
|
| 28 |
+
"summarization": [DEFAULT_MODEL],
|
| 29 |
+
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
| 30 |
+
"single_shot_question_generation": [DEFAULT_MODEL],
|
| 31 |
+
"multi_hop_question_generation": [DEFAULT_MODEL],
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Default evaluation timeout (in seconds)
|
| 35 |
+
DEFAULT_EVALUATION_TIMEOUT = 60.0
|
| 36 |
+
|
| 37 |
+
# Default benchmark timeout (in seconds)
|
| 38 |
+
DEFAULT_BENCHMARK_TIMEOUT = 300.0
|
backend/routes/benchmark.py
CHANGED
|
@@ -177,7 +177,9 @@ class UnifiedBenchmarkTask:
|
|
| 177 |
try:
|
| 178 |
# Step 1: Configuration
|
| 179 |
self._add_log("[INFO] Starting configuration process")
|
| 180 |
-
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# Execute the configuration task
|
| 183 |
try:
|
|
|
|
| 177 |
try:
|
| 178 |
# Step 1: Configuration
|
| 179 |
self._add_log("[INFO] Starting configuration process")
|
| 180 |
+
# Import and use DEFAULT_BENCHMARK_TIMEOUT
|
| 181 |
+
from config.models_config import DEFAULT_BENCHMARK_TIMEOUT
|
| 182 |
+
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid, timeout=DEFAULT_BENCHMARK_TIMEOUT)
|
| 183 |
|
| 184 |
# Execute the configuration task
|
| 185 |
try:
|
backend/tasks/create_bench_config_file.py
CHANGED
|
@@ -14,6 +14,11 @@ from loguru import logger
|
|
| 14 |
from huggingface_hub import HfApi
|
| 15 |
|
| 16 |
from tasks.get_available_model_provider import get_available_model_provider
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
class CreateBenchConfigTask:
|
|
@@ -21,18 +26,20 @@ class CreateBenchConfigTask:
|
|
| 21 |
Task to create and save a configuration file for YourbenchSimpleDemo
|
| 22 |
"""
|
| 23 |
|
| 24 |
-
def __init__(self, session_uid: Optional[str] = None):
|
| 25 |
"""
|
| 26 |
Initialize the task with a session ID
|
| 27 |
|
| 28 |
Args:
|
| 29 |
session_uid: Optional session ID, will be generated if None
|
|
|
|
| 30 |
"""
|
| 31 |
self.session_uid = session_uid or str(uuid.uuid4())
|
| 32 |
self.logs: List[str] = []
|
| 33 |
self.is_completed = False
|
| 34 |
self.is_running_flag = threading.Event()
|
| 35 |
self.thread = None
|
|
|
|
| 36 |
self._add_log("[INFO] Initializing configuration creation task")
|
| 37 |
|
| 38 |
def _add_log(self, message: str) -> None:
|
|
@@ -116,40 +123,22 @@ class CreateBenchConfigTask:
|
|
| 116 |
if not hf_token:
|
| 117 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
| 118 |
|
| 119 |
-
# Get
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
required_models = [
|
| 124 |
-
# "Qwen/Qwen2.5-72B-Instruct"
|
| 125 |
-
# "meta-llama/Llama-3.1-8B-Instruct"
|
| 126 |
-
# "Qwen/Qwen2.5-32B-Instruct",
|
| 127 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 128 |
-
]
|
| 129 |
-
|
| 130 |
-
# Track found models
|
| 131 |
-
found_models = set()
|
| 132 |
-
|
| 133 |
-
for model_name in required_models:
|
| 134 |
-
provider = self.get_model_provider(model_name)
|
| 135 |
-
if provider:
|
| 136 |
-
model_list.append({
|
| 137 |
-
"model_name": model_name,
|
| 138 |
-
"provider": provider,
|
| 139 |
-
"api_key": "$HF_TOKEN",
|
| 140 |
-
"max_concurrent_requests": 32,
|
| 141 |
-
})
|
| 142 |
-
found_models.add(model_name)
|
| 143 |
-
|
| 144 |
-
# # Check if both required models are available
|
| 145 |
-
if len(found_models) < len(required_models):
|
| 146 |
-
missing_models = set(required_models) - found_models
|
| 147 |
-
missing_models_str = ", ".join(missing_models)
|
| 148 |
-
error_msg = f"Required models not available: {missing_models_str}. Cannot proceed with benchmark."
|
| 149 |
self._add_log(f"[ERROR] {error_msg}")
|
| 150 |
raise RuntimeError(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
#
|
| 153 |
self._add_log("[INFO] Finalizing provider check...")
|
| 154 |
time.sleep(2)
|
| 155 |
|
|
@@ -158,36 +147,35 @@ class CreateBenchConfigTask:
|
|
| 158 |
|
| 159 |
return {
|
| 160 |
"hf_configuration": {
|
| 161 |
-
"token": "$HF_TOKEN",
|
| 162 |
"hf_organization": "$HF_ORGANIZATION",
|
| 163 |
"private": True,
|
| 164 |
"hf_dataset_name": hf_dataset_name,
|
| 165 |
"concat_if_exist": False,
|
|
|
|
| 166 |
},
|
| 167 |
"model_list": model_list,
|
| 168 |
|
| 169 |
-
"model_roles":
|
| 170 |
-
"ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
| 171 |
-
"summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
| 172 |
-
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
| 173 |
-
"single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
| 174 |
-
"multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
| 175 |
-
},
|
| 176 |
"pipeline": {
|
| 177 |
"ingestion": {
|
| 178 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
|
| 179 |
"output_dir": f"uploaded_files/{self.session_uid}/ingested",
|
| 180 |
"run": True,
|
|
|
|
| 181 |
},
|
| 182 |
"upload_ingest_to_hub": {
|
| 183 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
|
| 184 |
-
"run": True,
|
|
|
|
| 185 |
},
|
| 186 |
"summarization": {
|
| 187 |
"run": True,
|
|
|
|
| 188 |
},
|
| 189 |
"chunking": {
|
| 190 |
"run": True,
|
|
|
|
| 191 |
"chunking_configuration": {
|
| 192 |
"l_min_tokens": 64,
|
| 193 |
"l_max_tokens": 128,
|
|
@@ -199,6 +187,7 @@ class CreateBenchConfigTask:
|
|
| 199 |
},
|
| 200 |
"single_shot_question_generation": {
|
| 201 |
"run": True,
|
|
|
|
| 202 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
| 203 |
"chunk_sampling": {
|
| 204 |
"mode": "count",
|
|
@@ -208,9 +197,11 @@ class CreateBenchConfigTask:
|
|
| 208 |
},
|
| 209 |
"multi_hop_question_generation": {
|
| 210 |
"run": False,
|
|
|
|
| 211 |
},
|
| 212 |
"lighteval": {
|
| 213 |
"run": False,
|
|
|
|
| 214 |
},
|
| 215 |
},
|
| 216 |
}
|
|
@@ -310,17 +301,22 @@ class CreateBenchConfigTask:
|
|
| 310 |
self.mark_task_completed()
|
| 311 |
raise RuntimeError(error_msg)
|
| 312 |
|
| 313 |
-
def run(self, file_path: str, token: Optional[str] = None) -> str:
|
| 314 |
"""
|
| 315 |
Run the task to create and save the configuration file asynchronously
|
| 316 |
|
| 317 |
Args:
|
| 318 |
file_path: Path to the uploaded file
|
| 319 |
token: Hugging Face token (not used, using HF_TOKEN from environment)
|
|
|
|
| 320 |
|
| 321 |
Returns:
|
| 322 |
Path to the configuration file
|
| 323 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
# Mark the task as running
|
| 325 |
self.is_running_flag.set()
|
| 326 |
|
|
|
|
| 14 |
from huggingface_hub import HfApi
|
| 15 |
|
| 16 |
from tasks.get_available_model_provider import get_available_model_provider
|
| 17 |
+
from config.models_config import (
|
| 18 |
+
DEFAULT_MODEL,
|
| 19 |
+
MODEL_ROLES,
|
| 20 |
+
DEFAULT_BENCHMARK_TIMEOUT,
|
| 21 |
+
)
|
| 22 |
|
| 23 |
|
| 24 |
class CreateBenchConfigTask:
|
|
|
|
| 26 |
Task to create and save a configuration file for YourbenchSimpleDemo
|
| 27 |
"""
|
| 28 |
|
| 29 |
+
def __init__(self, session_uid: Optional[str] = None, timeout: float = None):
|
| 30 |
"""
|
| 31 |
Initialize the task with a session ID
|
| 32 |
|
| 33 |
Args:
|
| 34 |
session_uid: Optional session ID, will be generated if None
|
| 35 |
+
timeout: Timeout in seconds for benchmark operations (if None, uses default)
|
| 36 |
"""
|
| 37 |
self.session_uid = session_uid or str(uuid.uuid4())
|
| 38 |
self.logs: List[str] = []
|
| 39 |
self.is_completed = False
|
| 40 |
self.is_running_flag = threading.Event()
|
| 41 |
self.thread = None
|
| 42 |
+
self.timeout = timeout if timeout is not None else DEFAULT_BENCHMARK_TIMEOUT
|
| 43 |
self._add_log("[INFO] Initializing configuration creation task")
|
| 44 |
|
| 45 |
def _add_log(self, message: str) -> None:
|
|
|
|
| 123 |
if not hf_token:
|
| 124 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
| 125 |
|
| 126 |
+
# Get provider for the default model
|
| 127 |
+
provider = self.get_model_provider(DEFAULT_MODEL)
|
| 128 |
+
if not provider:
|
| 129 |
+
error_msg = f"Required model not available: {DEFAULT_MODEL}. Cannot proceed with benchmark."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
self._add_log(f"[ERROR] {error_msg}")
|
| 131 |
raise RuntimeError(error_msg)
|
| 132 |
+
|
| 133 |
+
# Create model configuration
|
| 134 |
+
model_list = [{
|
| 135 |
+
"model_name": DEFAULT_MODEL,
|
| 136 |
+
"provider": provider,
|
| 137 |
+
"api_key": "$HF_TOKEN",
|
| 138 |
+
"max_concurrent_requests": 32,
|
| 139 |
+
}]
|
| 140 |
|
| 141 |
+
# Add minimum delay of 2 seconds for provider_check stage
|
| 142 |
self._add_log("[INFO] Finalizing provider check...")
|
| 143 |
time.sleep(2)
|
| 144 |
|
|
|
|
| 147 |
|
| 148 |
return {
|
| 149 |
"hf_configuration": {
|
| 150 |
+
"token": "$HF_TOKEN",
|
| 151 |
"hf_organization": "$HF_ORGANIZATION",
|
| 152 |
"private": True,
|
| 153 |
"hf_dataset_name": hf_dataset_name,
|
| 154 |
"concat_if_exist": False,
|
| 155 |
+
"timeout": self.timeout, # Add timeout to configuration
|
| 156 |
},
|
| 157 |
"model_list": model_list,
|
| 158 |
|
| 159 |
+
"model_roles": MODEL_ROLES,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
"pipeline": {
|
| 161 |
"ingestion": {
|
| 162 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
|
| 163 |
"output_dir": f"uploaded_files/{self.session_uid}/ingested",
|
| 164 |
"run": True,
|
| 165 |
+
"timeout": self.timeout, # Add timeout to ingestion
|
| 166 |
},
|
| 167 |
"upload_ingest_to_hub": {
|
| 168 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
|
| 169 |
+
"run": True,
|
| 170 |
+
"timeout": self.timeout, # Add timeout to upload
|
| 171 |
},
|
| 172 |
"summarization": {
|
| 173 |
"run": True,
|
| 174 |
+
"timeout": self.timeout, # Add timeout to summarization
|
| 175 |
},
|
| 176 |
"chunking": {
|
| 177 |
"run": True,
|
| 178 |
+
"timeout": self.timeout, # Add timeout to chunking
|
| 179 |
"chunking_configuration": {
|
| 180 |
"l_min_tokens": 64,
|
| 181 |
"l_max_tokens": 128,
|
|
|
|
| 187 |
},
|
| 188 |
"single_shot_question_generation": {
|
| 189 |
"run": True,
|
| 190 |
+
"timeout": self.timeout, # Add timeout to question generation
|
| 191 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
| 192 |
"chunk_sampling": {
|
| 193 |
"mode": "count",
|
|
|
|
| 197 |
},
|
| 198 |
"multi_hop_question_generation": {
|
| 199 |
"run": False,
|
| 200 |
+
"timeout": self.timeout, # Add timeout to multi-hop question generation
|
| 201 |
},
|
| 202 |
"lighteval": {
|
| 203 |
"run": False,
|
| 204 |
+
"timeout": self.timeout, # Add timeout to lighteval
|
| 205 |
},
|
| 206 |
},
|
| 207 |
}
|
|
|
|
| 301 |
self.mark_task_completed()
|
| 302 |
raise RuntimeError(error_msg)
|
| 303 |
|
| 304 |
+
def run(self, file_path: str, token: Optional[str] = None, timeout: Optional[float] = None) -> str:
|
| 305 |
"""
|
| 306 |
Run the task to create and save the configuration file asynchronously
|
| 307 |
|
| 308 |
Args:
|
| 309 |
file_path: Path to the uploaded file
|
| 310 |
token: Hugging Face token (not used, using HF_TOKEN from environment)
|
| 311 |
+
timeout: Timeout in seconds for benchmark operations (if None, uses default)
|
| 312 |
|
| 313 |
Returns:
|
| 314 |
Path to the configuration file
|
| 315 |
"""
|
| 316 |
+
# Update timeout if provided
|
| 317 |
+
if timeout is not None:
|
| 318 |
+
self.timeout = timeout
|
| 319 |
+
|
| 320 |
# Mark the task as running
|
| 321 |
self.is_running_flag.set()
|
| 322 |
|
backend/tasks/evaluation_task.py
CHANGED
|
@@ -16,19 +16,7 @@ from tasks.get_available_model_provider import get_available_model_provider
|
|
| 16 |
from huggingface_hub import HfApi
|
| 17 |
import asyncio
|
| 18 |
from datasets import load_dataset
|
| 19 |
-
|
| 20 |
-
DEFAULT_EVALUATION_TIMEOUT = 60.0 # 1 minute by default
|
| 21 |
-
|
| 22 |
-
# Models to evaluate - only accessible models
|
| 23 |
-
DEFAULT_EVALUATION_MODELS = [
|
| 24 |
-
"Qwen/QwQ-32B",
|
| 25 |
-
"Qwen/Qwen2.5-72B-Instruct",
|
| 26 |
-
"Qwen/Qwen2.5-32B-Instruct",
|
| 27 |
-
"meta-llama/Llama-3.1-8B-Instruct",
|
| 28 |
-
"meta-llama/Llama-3.3-70B-Instruct",
|
| 29 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 30 |
-
"mistralai/Mistral-Small-24B-Instruct-2501",
|
| 31 |
-
]
|
| 32 |
|
| 33 |
class EvaluationTask:
|
| 34 |
"""
|
|
|
|
| 16 |
from huggingface_hub import HfApi
|
| 17 |
import asyncio
|
| 18 |
from datasets import load_dataset
|
| 19 |
+
from config.models_config import DEFAULT_EVALUATION_MODELS, DEFAULT_EVALUATION_TIMEOUT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
class EvaluationTask:
|
| 22 |
"""
|
backend/tasks/get_available_model_provider.py
CHANGED
|
@@ -3,13 +3,11 @@ import logging
|
|
| 3 |
import json
|
| 4 |
from huggingface_hub import model_info, InferenceClient
|
| 5 |
from dotenv import load_dotenv
|
|
|
|
| 6 |
|
| 7 |
# Load environment variables once at the module level
|
| 8 |
load_dotenv()
|
| 9 |
|
| 10 |
-
# Define preferred providers
|
| 11 |
-
PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
|
| 12 |
-
|
| 13 |
# Configure logging
|
| 14 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 15 |
logger = logging.getLogger(__name__)
|
|
|
|
| 3 |
import json
|
| 4 |
from huggingface_hub import model_info, InferenceClient
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
+
from config.models_config import PREFERRED_PROVIDERS
|
| 7 |
|
| 8 |
# Load environment variables once at the module level
|
| 9 |
load_dotenv()
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
# Configure logging
|
| 12 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 13 |
logger = logging.getLogger(__name__)
|