Commit
·
19098d4
1
Parent(s):
d4d544b
Implement code changes to enhance functionality and improve performance
Browse files
app.py
CHANGED
@@ -15,13 +15,18 @@ from abc import ABC, abstractmethod
|
|
15 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
16 |
from concurrent.futures import TimeoutError as FuturesTimeoutError
|
17 |
from collections import defaultdict
|
|
|
18 |
|
19 |
try:
|
20 |
import google.generativeai as genai
|
21 |
-
from google.generativeai.types import GenerationConfig
|
22 |
except ImportError:
|
23 |
genai = None
|
24 |
GenerationConfig = None
|
|
|
|
|
|
|
|
|
25 |
print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
|
26 |
|
27 |
try:
|
@@ -65,7 +70,7 @@ except ImportError:
|
|
65 |
print("WARNING: librosa library not found. Audio processing may be impaired. Install with: pip install librosa")
|
66 |
|
67 |
try:
|
68 |
-
import openpyxl
|
69 |
except ImportError:
|
70 |
openpyxl = None
|
71 |
print("WARNING: openpyxl library not found. .xlsx file processing might fail. Install with: pip install openpyxl")
|
@@ -103,28 +108,28 @@ GOOGLE_GEMINI_API_KEY = os.getenv("GOOGLE_GEMINI_API_KEY")
|
|
103 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
104 |
|
105 |
AGENT_DEFAULT_TIMEOUT = 15
|
106 |
-
MAX_CONTEXT_LENGTH_LLM = 30000
|
107 |
|
108 |
-
MAX_FILE_SIZE = 5 * 1024 * 1024
|
109 |
CSV_SAMPLE_ROWS = 10
|
110 |
-
MAX_FILE_CONTEXT_LENGTH = 10000
|
111 |
|
112 |
# Global instances for video analysis pipelines
|
113 |
video_object_detector_pipeline: Optional[Any] = None
|
114 |
video_vqa_pipeline: Optional[Any] = None # Changed from species_classifier to VQA
|
115 |
|
116 |
-
VIDEO_ANALYSIS_DEVICE: int = -1
|
117 |
VIDEO_ANALYSIS_OBJECT_MODEL = "facebook/detr-resnet-50"
|
118 |
VIDEO_ANALYSIS_VQA_MODEL = "Salesforce/blip-vqa-capfilt-large" # Using a VQA model
|
119 |
|
120 |
-
VIDEO_MAX_FRAMES_TO_PROCESS = 120
|
121 |
VIDEO_CONFIDENCE_THRESHOLD_BIRD = 0.6
|
122 |
VIDEO_VQA_MIN_ANSWER_LENGTH = 3 # Minimum length for a VQA answer to be considered a species
|
123 |
-
VIDEO_VQA_CONFIDENCE_THRESHOLD = 0.3
|
124 |
|
125 |
asr_pipeline_instance: Optional[Any] = None
|
126 |
-
ASR_MODEL_NAME = "openai/whisper-tiny"
|
127 |
-
ASR_PROCESSING_TIMEOUT_SECONDS = 1024
|
128 |
|
129 |
|
130 |
DEFAULT_RAG_CONFIG = {
|
@@ -136,7 +141,7 @@ DEFAULT_RAG_CONFIG = {
|
|
136 |
'tavily_api_key': TAVILY_API_KEY,
|
137 |
'default_max_results': 3, 'retry_attempts': 2, 'retry_delay': 2,
|
138 |
'google_timeout': 8, 'tavily_depth': "basic",
|
139 |
-
'max_query_length_tavily': 380
|
140 |
},
|
141 |
'processing': {
|
142 |
'trusted_sources': {'wikipedia.org': 0.8, 'reuters.com': 0.75, 'apnews.com': 0.75},
|
@@ -165,7 +170,7 @@ def _get_video_object_detector():
|
|
165 |
# Simplified device selection, consistent with FileProcessor's ASR
|
166 |
device_id = 0 if torch.cuda.is_available() else -1
|
167 |
if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id # Set global if not user-overridden
|
168 |
-
|
169 |
target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
|
170 |
|
171 |
video_object_detector_pipeline = hf_transformers_pipeline(
|
@@ -185,7 +190,7 @@ def _get_video_vqa_pipeline(): # Renamed and changed to load VQA
|
|
185 |
try:
|
186 |
device_id = 0 if torch.cuda.is_available() else -1
|
187 |
if VIDEO_ANALYSIS_DEVICE == -1: VIDEO_ANALYSIS_DEVICE = device_id
|
188 |
-
|
189 |
target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
|
190 |
|
191 |
video_vqa_pipeline = hf_transformers_pipeline(
|
@@ -205,7 +210,7 @@ class FileProcessor:
|
|
205 |
global asr_pipeline_instance
|
206 |
if asr_pipeline_instance is None and hf_transformers_pipeline and torch:
|
207 |
try:
|
208 |
-
device = -1
|
209 |
asr_pipeline_instance = hf_transformers_pipeline(
|
210 |
"automatic-speech-recognition",
|
211 |
model=ASR_MODEL_NAME,
|
@@ -287,7 +292,7 @@ class FileProcessor:
|
|
287 |
f"Columns: {', '.join(df.columns)}\nFirst {min(CSV_SAMPLE_ROWS, len(df))} sample rows:\n{df.head(CSV_SAMPLE_ROWS).to_markdown(index=False)}"
|
288 |
)
|
289 |
return FileProcessor._truncate_text(summary, filename, "CSV")
|
290 |
-
except Exception as e:
|
291 |
if "tabulate" in str(e).lower() and df is not None:
|
292 |
gaia_logger.error(f"CSV to_markdown error for '{filename}' (missing tabulate): {e}", exc_info=False)
|
293 |
try:
|
@@ -330,7 +335,7 @@ class FileProcessor:
|
|
330 |
break
|
331 |
except UnicodeDecodeError: continue
|
332 |
if text is None: text = content.decode('utf-8', errors='ignore')
|
333 |
-
|
334 |
summary = f"Text Document: '{filename}':\n{text}"
|
335 |
return FileProcessor._truncate_text(summary, filename, "Text")
|
336 |
except Exception as e:
|
@@ -341,13 +346,13 @@ class FileProcessor:
|
|
341 |
gaia_logger.info(f"Processing Excel file: {filename}")
|
342 |
if not openpyxl: return f"Error: Excel processing skipped for '{filename}', openpyxl library not available."
|
343 |
xls = None
|
344 |
-
df_list_for_fallback = []
|
345 |
try:
|
346 |
xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
|
347 |
summary_parts = [f"Excel Document Summary: '{filename}'"]
|
348 |
for sheet_name in xls.sheet_names:
|
349 |
df = xls.parse(sheet_name)
|
350 |
-
df_list_for_fallback.append((sheet_name, df))
|
351 |
sheet_summary = (
|
352 |
f"\n---\nSheet: '{sheet_name}' ({len(df)} rows, {len(df.columns)} columns):\n"
|
353 |
f"Columns: {', '.join(df.columns)}\nFirst {min(CSV_SAMPLE_ROWS, len(df))} sample rows:\n{df.head(CSV_SAMPLE_ROWS).to_markdown(index=False)}"
|
@@ -358,19 +363,20 @@ class FileProcessor:
|
|
358 |
break
|
359 |
full_summary = "".join(summary_parts)
|
360 |
return FileProcessor._truncate_text(full_summary, filename, "Excel")
|
361 |
-
except Exception as e:
|
362 |
if "tabulate" in str(e).lower():
|
363 |
gaia_logger.error(f"Excel to_markdown error for '{filename}' (missing tabulate): {e}", exc_info=False)
|
364 |
try:
|
365 |
summary_parts_fallback = [f"Excel Document Summary: '{filename}'"]
|
366 |
-
if not df_list_for_fallback and xls:
|
367 |
for sheet_name in xls.sheet_names:
|
368 |
df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
|
369 |
-
elif not xls and not df_list_for_fallback:
|
370 |
temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
|
371 |
for sheet_name in temp_xls.sheet_names:
|
372 |
df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
|
373 |
|
|
|
374 |
for sheet_name_fb, df_fb in df_list_for_fallback:
|
375 |
sheet_summary_fallback = (
|
376 |
f"\n---\nSheet: '{sheet_name_fb}' ({len(df_fb)} rows, {len(df_fb.columns)} columns):\n"
|
@@ -400,7 +406,7 @@ class FileProcessor:
|
|
400 |
page_text = page.extract_text()
|
401 |
if page_text:
|
402 |
text_content += page_text + "\n"
|
403 |
-
if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2:
|
404 |
break
|
405 |
if not text_content:
|
406 |
return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
|
@@ -412,7 +418,7 @@ class FileProcessor:
|
|
412 |
@staticmethod
|
413 |
def _perform_asr_transcription(asr_pipeline_ref, audio_data_np, filename_for_log):
|
414 |
gaia_logger.info(f"ASR: Starting transcription for {filename_for_log} in thread.")
|
415 |
-
|
416 |
return asr_pipeline_ref(audio_data_np, chunk_length_s=30, return_timestamps=False, generate_kwargs={"language": "en"})
|
417 |
|
418 |
|
@@ -424,11 +430,11 @@ class FileProcessor:
|
|
424 |
return f"Error: Audio processing skipped for '{filename}', ASR pipeline not available."
|
425 |
if not librosa:
|
426 |
return f"Error: Audio processing skipped for '{filename}', librosa library not available."
|
427 |
-
|
428 |
try:
|
429 |
with io.BytesIO(content) as audio_buffer:
|
430 |
y, sr = librosa.load(audio_buffer, sr=16000, mono=True)
|
431 |
-
|
432 |
duration_seconds = len(y) / sr
|
433 |
gaia_logger.info(f"Audio file: {filename}, Duration: {duration_seconds:.2f} seconds. Timeout set to: {ASR_PROCESSING_TIMEOUT_SECONDS}s")
|
434 |
start_time = time.time()
|
@@ -442,7 +448,7 @@ class FileProcessor:
|
|
442 |
except FuturesTimeoutError:
|
443 |
gaia_logger.warning(f"ASR transcription for '{filename}' timed out after {ASR_PROCESSING_TIMEOUT_SECONDS} seconds.")
|
444 |
return f"Error: Audio transcription for '{filename}' timed out after {ASR_PROCESSING_TIMEOUT_SECONDS}s."
|
445 |
-
except Exception as e_thread:
|
446 |
gaia_logger.error(f"ASR transcription thread for '{filename}' failed: {e_thread}", exc_info=True)
|
447 |
if "3000 mel input features" in str(e_thread) or "return_timestamps" in str(e_thread):
|
448 |
return f"Error processing Audio file '{filename}': Transcription failed due to long-form audio issue (mel features/timestamps). Original error: {str(e_thread)}"
|
@@ -453,7 +459,7 @@ class FileProcessor:
|
|
453 |
|
454 |
if not transcribed_text.strip():
|
455 |
return f"Audio Document: '{filename}'. Transcription result was empty or ASR failed."
|
456 |
-
|
457 |
summary = f"Audio Document (Transcription): '{filename}':\n{transcribed_text}"
|
458 |
return FileProcessor._truncate_text(summary, filename, "Audio Transcription")
|
459 |
|
@@ -472,7 +478,7 @@ class FileProcessor:
|
|
472 |
except Exception:
|
473 |
return f"File with Unknown Content Type: '{filename}'. Content is likely binary and cannot be displayed as text."
|
474 |
|
475 |
-
class CacheManager:
|
476 |
def __init__(self, ttl: int = 300, max_size: int = 100, name: str = "Cache"):
|
477 |
self.ttl = ttl; self.max_size = max_size
|
478 |
self._cache: Dict[Any, Any] = {}; self._timestamps: Dict[Any, float] = {}
|
@@ -483,31 +489,31 @@ class CacheManager:
|
|
483 |
try:
|
484 |
self._access_order.remove(key); self._access_order.append(key)
|
485 |
return copy.deepcopy(self._cache[key])
|
486 |
-
except (ValueError, TypeError) as e:
|
487 |
self.delete(key); return None
|
488 |
-
elif key in self._cache:
|
489 |
self.delete(key)
|
490 |
return None
|
491 |
def set(self, key: Any, value: Any):
|
492 |
-
if key in self._cache: self.delete(key)
|
493 |
while len(self._cache) >= self.max_size and self._access_order:
|
494 |
old_key = self._access_order.pop(0)
|
495 |
-
if old_key in self._cache:
|
496 |
del self._cache[old_key]; del self._timestamps[old_key]
|
497 |
try: self._cache[key] = copy.deepcopy(value)
|
498 |
-
except TypeError: self._cache[key] = value
|
499 |
self._timestamps[key] = time.time(); self._access_order.append(key)
|
500 |
def delete(self, key: Any):
|
501 |
if key in self._cache:
|
502 |
try:
|
503 |
del self._cache[key]; del self._timestamps[key]
|
504 |
if key in self._access_order: self._access_order.remove(key)
|
505 |
-
except (ValueError, KeyError): pass
|
506 |
def clear(self): self._cache.clear();self._timestamps.clear();self._access_order.clear();gaia_logger.info(f"[{self.name}] Cleared.")
|
507 |
def __len__(self): return len(self._cache)
|
508 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
509 |
|
510 |
-
class SearchProvider(ABC):
|
511 |
def __init__(self, config_dict: Dict):
|
512 |
self.provider_config = config_dict.get('search', {})
|
513 |
self._enabled = False
|
@@ -532,7 +538,7 @@ class SearchProvider(ABC):
|
|
532 |
return self._perform_search(query, max_results)
|
533 |
def available(self) -> bool: return self._enabled
|
534 |
|
535 |
-
class GoogleProvider(SearchProvider):
|
536 |
@property
|
537 |
def provider_name(self) -> str: return "Google"
|
538 |
def __init__(self, config_dict: Dict):
|
@@ -554,7 +560,7 @@ class GoogleProvider(SearchProvider):
|
|
554 |
except requests.exceptions.RequestException as e: gaia_logger.warning(f"[{self.provider_name}] RequestEx: '{query[:70]}': {e}"); return None
|
555 |
except Exception as e: gaia_logger.error(f"[{self.provider_name}] Error: '{query[:70]}': {e}", exc_info=True); return None
|
556 |
|
557 |
-
class TavilyProvider(SearchProvider):
|
558 |
@property
|
559 |
def provider_name(self) -> str: return "Tavily"
|
560 |
def __init__(self, config_dict: Dict):
|
@@ -579,7 +585,7 @@ class TavilyProvider(SearchProvider):
|
|
579 |
return [{'href': h.get('url'), 'title': h.get('title',''), 'body': h.get('content','')} for h in hits]
|
580 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
581 |
|
582 |
-
class DuckDuckGoProvider(SearchProvider):
|
583 |
@property
|
584 |
def provider_name(self) -> str: return "DuckDuckGo"
|
585 |
def __init__(self, config_dict: Dict):
|
@@ -596,9 +602,9 @@ class DuckDuckGoProvider(SearchProvider):
|
|
596 |
return [{'href': r.get('href'), 'title': r.get('title',''), 'body': r.get('body','')} for r in hits]
|
597 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
598 |
|
599 |
-
class CompositeSearchClient:
|
600 |
def __init__(self, config_dict: Dict):
|
601 |
-
self.config = config_dict
|
602 |
self._search_config = config_dict.get('search', {})
|
603 |
self.providers = self._init_providers(config_dict)
|
604 |
self.cache = CacheManager(
|
@@ -634,24 +640,24 @@ class CompositeSearchClient:
|
|
634 |
results = prov.search(q, actual_r)
|
635 |
if results is not None: self.cache.set(cache_key, results); return results
|
636 |
if attempt < self._retry_att: time.sleep(self._retry_del)
|
637 |
-
except Exception as e:
|
638 |
if attempt < self._retry_att: time.sleep(self._retry_del)
|
639 |
self.cache.set(cache_key, [])
|
640 |
return []
|
641 |
|
642 |
-
class GaiaQueryBuilder:
|
643 |
def __init__(self, base_query: str, config_dict: Dict):
|
644 |
self.base_query = base_query.strip()
|
645 |
self.config = config_dict
|
646 |
def get_queries(self) -> Dict[str, List[Tuple[str, str]]]:
|
647 |
return {'primary': [(self.base_query, 'GENERAL')]} if self.base_query else {'primary': []}
|
648 |
|
649 |
-
class ResultProcessor:
|
650 |
def __init__(self, config_dict: Dict):
|
651 |
self.proc_config = config_dict.get('processing', {})
|
652 |
self.trusted_sources = self.proc_config.get('trusted_sources', {})
|
653 |
self.seen_urls: Set[str] = set()
|
654 |
-
self.date_pattern = DEFAULT_RAG_CONFIG['processing'].get('date_pattern', r'\b\d{4}\b')
|
655 |
def process_batch(self, results: List[Dict], query_tag: str, initial_cat: str='GENERAL') -> List[Dict]:
|
656 |
processed: List[Dict] = []
|
657 |
if not results: return processed
|
@@ -675,7 +681,7 @@ class ResultProcessor:
|
|
675 |
result['temporal_relevance'] = temporal_r
|
676 |
result['combined_score'] = (source_q * 0.6 + temporal_r * 0.4)
|
677 |
|
678 |
-
class ContentEnricher:
|
679 |
def __init__(self, config_dict: Dict):
|
680 |
self.enrich_config = config_dict.get('enrichment', {})
|
681 |
self._enabled = self.enrich_config.get('enabled', False) and bool(BeautifulSoup)
|
@@ -717,7 +723,7 @@ class ContentEnricher:
|
|
717 |
except Exception as e: result['enrichment_failed'] = type(e).__name__
|
718 |
return result
|
719 |
|
720 |
-
class GeneralRAGPipeline:
|
721 |
def __init__(self, config_dict: Optional[Dict] = None):
|
722 |
self.config = config_dict if config_dict is not None else DEFAULT_RAG_CONFIG
|
723 |
self.search_client = CompositeSearchClient(self.config)
|
@@ -735,13 +741,13 @@ class GeneralRAGPipeline:
|
|
735 |
max_r_pq = cfg_search.get('default_max_results', 3)
|
736 |
cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
|
737 |
if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
|
738 |
-
if force_refresh: self.search_client.cache.clear();
|
739 |
-
if self.enricher: self.enricher.cache.clear()
|
740 |
all_res, res_proc = [], ResultProcessor(self.config)
|
741 |
staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
|
742 |
for stage, qs_in_stage in staged_qs.items():
|
743 |
for query_s, cat in qs_in_stage:
|
744 |
-
if len(all_res) >= total_lim * 2: break
|
745 |
s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
|
746 |
all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
|
747 |
all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
|
@@ -758,36 +764,46 @@ class GaiaLevel1Agent:
|
|
758 |
self.api_url = api_url
|
759 |
self.llm_model: Optional[Any] = None
|
760 |
self.rag_pipeline = GeneralRAGPipeline(DEFAULT_RAG_CONFIG)
|
761 |
-
|
762 |
if genai and GOOGLE_GEMINI_API_KEY:
|
763 |
try:
|
764 |
genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
|
765 |
model_name = 'gemini-2.5-flash-preview-05-20'
|
|
|
|
|
766 |
self.llm_model = genai.GenerativeModel(model_name)
|
767 |
gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
|
768 |
except Exception as e:
|
769 |
gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
else:
|
771 |
gaia_logger.warning("Gemini LLM dependencies or API key missing.")
|
772 |
-
|
773 |
if not self.llm_model:
|
774 |
gaia_logger.warning("LLM (Gemini) unavailable. Limited capabilities.")
|
775 |
-
|
776 |
_get_video_object_detector()
|
777 |
-
_get_video_vqa_pipeline()
|
778 |
|
779 |
gaia_logger.info(f"GaiaLevel1Agent (RAG, FileProcessor, VideoAnalysis) initialized. API: {self.api_url}")
|
780 |
|
781 |
@lru_cache(maxsize=32)
|
782 |
def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
|
783 |
-
|
784 |
file_url = f"{self.api_url}/files/{task_id}"
|
785 |
-
for attempt in range(2):
|
786 |
try:
|
787 |
response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
|
788 |
response.raise_for_status()
|
789 |
-
|
790 |
-
filename = FileProcessor._get_filename_from_url(response.url)
|
791 |
content_disposition = response.headers.get('Content-Disposition')
|
792 |
if content_disposition:
|
793 |
header_filename = FileProcessor._get_filename_from_url(content_disposition)
|
@@ -800,7 +816,7 @@ class GaiaLevel1Agent:
|
|
800 |
except requests.exceptions.HTTPError as e:
|
801 |
if e.response.status_code == 404:
|
802 |
gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
|
803 |
-
return None
|
804 |
gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
|
805 |
except requests.exceptions.Timeout:
|
806 |
gaia_logger.warning(f"Timeout fetching file {task_id}")
|
@@ -813,37 +829,37 @@ class GaiaLevel1Agent:
|
|
813 |
def _clean_vqa_species_answer(self, answer_text: str) -> str:
|
814 |
"""Cleans and normalizes VQA answer to extract a potential species name."""
|
815 |
if not answer_text: return ""
|
816 |
-
|
817 |
cleaned = answer_text.lower().strip()
|
818 |
-
|
819 |
# Remove common prefixes
|
820 |
prefixes_to_remove = [
|
821 |
-
"a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
|
822 |
"this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
|
823 |
"it's an ", "it is an ", "this is an ", "this bird is an ", "it appears to be an ", "looks like an "
|
824 |
]
|
825 |
for prefix in prefixes_to_remove:
|
826 |
if cleaned.startswith(prefix):
|
827 |
cleaned = cleaned[len(prefix):]
|
828 |
-
|
829 |
# Remove common suffixes
|
830 |
suffixes_to_remove = [" bird", " species"]
|
831 |
for suffix in suffixes_to_remove:
|
832 |
if cleaned.endswith(suffix):
|
833 |
cleaned = cleaned[:-len(suffix)]
|
834 |
-
|
835 |
# Remove parenthetical content or descriptive clauses if simple
|
836 |
cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip() # e.g. "robin (american)" -> "robin"
|
837 |
cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip() # e.g. "sparrow, which is small" -> "sparrow"
|
838 |
|
839 |
# Basic character filtering (allow letters, numbers for things like "Type 2", spaces, hyphens)
|
840 |
cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
|
841 |
-
|
842 |
# Normalize whitespace
|
843 |
cleaned = " ".join(cleaned.split())
|
844 |
-
|
845 |
# Filter out very generic or uncertain answers post-cleaning
|
846 |
-
uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird"]
|
847 |
if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
|
848 |
return "" # Return empty if too generic or short
|
849 |
|
@@ -858,45 +874,48 @@ class GaiaLevel1Agent:
|
|
858 |
return "Video analysis skipped: Pillow library not available."
|
859 |
|
860 |
detector = _get_video_object_detector()
|
861 |
-
vqa_model = _get_video_vqa_pipeline()
|
862 |
|
863 |
if not detector or not vqa_model:
|
864 |
return "Video analysis skipped: ML pipelines (detector or VQA) not available."
|
865 |
|
866 |
-
video_file_path = None
|
867 |
-
|
868 |
-
|
|
|
869 |
|
870 |
try:
|
|
|
|
|
|
|
|
|
871 |
ydl_opts = {
|
872 |
-
'format': '
|
873 |
'outtmpl': os.path.join(temp_dir, '%(id)s.%(ext)s'),
|
874 |
-
'quiet': True,
|
875 |
-
'max_filesize': 75 * 1024 * 1024,
|
876 |
'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
|
877 |
-
'
|
|
|
878 |
}
|
879 |
gaia_logger.info(f"Attempting to download video: {video_url}")
|
880 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
881 |
-
info_dict = ydl.extract_info(video_url, download=True)
|
882 |
-
video_file_path = ydl.prepare_filename(info_dict)
|
883 |
-
|
884 |
-
#
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
|
893 |
-
|
894 |
-
|
895 |
-
|
896 |
-
|
897 |
-
try: os.rmdir(temp_dir)
|
898 |
-
except Exception: pass
|
899 |
-
return f"Video download resulted in a non-video file: {os.path.basename(video_file_path)}"
|
900 |
|
901 |
|
902 |
if not video_file_path or not os.path.exists(video_file_path):
|
@@ -908,43 +927,46 @@ class GaiaLevel1Agent:
|
|
908 |
cap = cv2.VideoCapture(video_file_path)
|
909 |
if not cap.isOpened():
|
910 |
gaia_logger.error(f"Cannot open video file: {video_file_path}")
|
911 |
-
return "Cannot open video file."
|
|
|
912 |
|
913 |
max_simultaneous_species = 0
|
914 |
species_details_for_max_frame = ""
|
915 |
-
|
916 |
total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
917 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
918 |
-
if not fps or fps
|
919 |
-
|
920 |
frame_interval = max(1, int(fps)) # Process ~1 frame per second
|
921 |
-
|
922 |
frames_analyzed_count = 0
|
923 |
current_frame_num = 0
|
924 |
-
|
925 |
-
gaia_logger.info(f"Video Info: ~{total_frames_video // fps:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
|
926 |
|
927 |
while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
|
928 |
cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num) # Jump to frame
|
929 |
ret, frame_data = cap.read()
|
930 |
if not ret: break
|
931 |
|
932 |
-
timestamp_sec = current_frame_num / fps
|
933 |
gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
|
934 |
-
|
935 |
try:
|
936 |
pil_image = Image.fromarray(cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB))
|
937 |
except Exception as e_conv:
|
938 |
gaia_logger.warning(f"Frame {current_frame_num} conversion to PIL failed: {e_conv}")
|
939 |
current_frame_num += frame_interval
|
940 |
continue
|
941 |
-
|
942 |
detected_objects = detector(pil_image)
|
943 |
bird_crops_this_frame = []
|
944 |
for obj in detected_objects:
|
945 |
-
|
|
|
946 |
box = obj['box']
|
947 |
xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
|
|
|
948 |
if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
|
949 |
gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
|
950 |
continue
|
@@ -953,6 +975,7 @@ class GaiaLevel1Agent:
|
|
953 |
except Exception as e_crop:
|
954 |
gaia_logger.warning(f"Cropping bird failed for box {box}: {e_crop}")
|
955 |
|
|
|
956 |
if not bird_crops_this_frame:
|
957 |
current_frame_num += frame_interval
|
958 |
frames_analyzed_count += 1
|
@@ -963,45 +986,45 @@ class GaiaLevel1Agent:
|
|
963 |
vqa_question = "What is the specific species of this bird?"
|
964 |
|
965 |
for idx, bird_crop_img in enumerate(bird_crops_this_frame):
|
966 |
-
if bird_crop_img.width < 20 or bird_crop_img.height < 20: continue
|
967 |
try:
|
968 |
-
vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
|
969 |
-
|
970 |
raw_vqa_answer_text = ""
|
971 |
-
vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD # Default
|
972 |
|
973 |
if isinstance(vqa_answer_list, list) and vqa_answer_list:
|
974 |
raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
|
975 |
vqa_confidence = vqa_answer_list[0].get('score', vqa_confidence)
|
976 |
-
elif isinstance(vqa_answer_list, dict):
|
977 |
raw_vqa_answer_text = vqa_answer_list.get('answer', "")
|
978 |
vqa_confidence = vqa_answer_list.get('score', vqa_confidence)
|
979 |
|
980 |
cleaned_species_name = self._clean_vqa_species_answer(raw_vqa_answer_text)
|
981 |
-
|
982 |
if cleaned_species_name and vqa_confidence >= VIDEO_VQA_CONFIDENCE_THRESHOLD :
|
983 |
frame_species_identified.add(cleaned_species_name)
|
984 |
current_frame_species_details.append(f"{cleaned_species_name} (VQA conf: {vqa_confidence:.2f})")
|
985 |
-
elif cleaned_species_name:
|
986 |
gaia_logger.debug(f"VQA species '{cleaned_species_name}' (raw: '{raw_vqa_answer_text}') for bird {idx} below confidence {VIDEO_VQA_CONFIDENCE_THRESHOLD} (score: {vqa_confidence:.2f})")
|
987 |
else:
|
988 |
gaia_logger.debug(f"VQA for bird {idx} resulted in unusable/generic species: '{raw_vqa_answer_text}'")
|
989 |
|
990 |
except Exception as e_vqa:
|
991 |
gaia_logger.warning(f"VQA inference error for bird crop {idx} (frame {current_frame_num}): {e_vqa}")
|
992 |
-
|
993 |
if len(frame_species_identified) > max_simultaneous_species:
|
994 |
max_simultaneous_species = len(frame_species_identified)
|
995 |
species_details_for_max_frame = f"At ~{timestamp_sec:.1f}s, inferred species: {', '.join(current_frame_species_details) if current_frame_species_details else 'None specific'}"
|
996 |
-
|
997 |
if frame_species_identified:
|
998 |
gaia_logger.info(f"Frame {current_frame_num} (~{timestamp_sec:.1f}s): Found {len(frame_species_identified)} distinct species types: {', '.join(list(frame_species_identified))}")
|
999 |
|
1000 |
current_frame_num += frame_interval
|
1001 |
frames_analyzed_count += 1
|
1002 |
-
|
1003 |
-
cap.release()
|
1004 |
-
|
1005 |
context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
|
1006 |
f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
|
1007 |
f"{('Details from a frame with this count: ' + species_details_for_max_frame) if species_details_for_max_frame else 'No specific species details captured for the max count frame or no birds found.'}")
|
@@ -1010,72 +1033,70 @@ class GaiaLevel1Agent:
|
|
1010 |
|
1011 |
except yt_dlp.utils.DownloadError as e:
|
1012 |
gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
|
1013 |
-
|
1014 |
-
|
1015 |
-
clean_msg =
|
1016 |
-
|
1017 |
-
elif "video
|
1018 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1019 |
except Exception as e:
|
1020 |
gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
|
1021 |
-
return f"An unexpected error occurred during video analysis: {type(e).__name__} - {str(e)}"
|
1022 |
finally:
|
1023 |
-
if
|
1024 |
-
|
1025 |
-
|
1026 |
-
|
1027 |
-
|
1028 |
-
|
1029 |
-
|
1030 |
-
|
1031 |
-
|
1032 |
-
|
1033 |
-
if not os.listdir(temp_dir):
|
1034 |
-
os.rmdir(temp_dir)
|
1035 |
-
gaia_logger.info(f"Removed empty temp video directory: {temp_dir}")
|
1036 |
-
else:
|
1037 |
-
# If not empty, it might contain other yt-dlp artifacts (like .part files, audio)
|
1038 |
-
# For robustness in a contest, maybe leave it and rely on system temp cleaning,
|
1039 |
-
# or implement more aggressive cleanup of all files within this specific temp_dir.
|
1040 |
-
# For now, just log.
|
1041 |
-
gaia_logger.warning(f"Temp video directory {temp_dir} not empty after processing. Manual cleanup might be needed for: {os.listdir(temp_dir)}")
|
1042 |
-
except OSError as e_rmdir: # Catch OSError for rmdir failures (e.g. dir not empty)
|
1043 |
-
gaia_logger.warning(f"Could not remove temp video directory {temp_dir} (possibly not empty or access issue): {e_rmdir}")
|
1044 |
-
except Exception as e_final_clean:
|
1045 |
-
gaia_logger.error(f"Unexpected error during final cleanup of {temp_dir}: {e_final_clean}")
|
1046 |
|
1047 |
|
1048 |
def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
|
1049 |
-
# ... (this method remains unchanged) ...
|
1050 |
reasoning_trace = ""
|
1051 |
model_answer = ""
|
1052 |
final_answer_sentinel = "FINAL ANSWER:"
|
1053 |
-
|
1054 |
parts = llm_text.split(final_answer_sentinel, 1)
|
1055 |
-
|
1056 |
if len(parts) == 2:
|
1057 |
reasoning_trace = parts[0].strip()
|
1058 |
model_answer = parts[1].strip()
|
1059 |
else:
|
1060 |
-
reasoning_trace = llm_text
|
1061 |
lines = llm_text.strip().split('\n')
|
1062 |
-
|
|
|
|
|
|
|
|
|
|
|
1063 |
gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing. Full LLM text: '{llm_text[:200]}...'")
|
1064 |
|
1065 |
return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
|
1066 |
|
1067 |
def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
|
1068 |
-
# ... (this method's prompt might need slight adjustment if video context phrasing changes, but core logic is fine) ...
|
1069 |
default_model_answer = "Information not available in provided context"
|
1070 |
default_reasoning = "LLM processing failed or context insufficient."
|
1071 |
|
1072 |
-
if not self.llm_model:
|
1073 |
-
gaia_logger.warning("LLM model (Gemini) not available for answer formulation.")
|
1074 |
-
reasoning = "LLM model (Gemini) not available for answer formulation."
|
1075 |
answer_val = default_model_answer
|
|
|
1076 |
if web_context and file_context:
|
1077 |
reasoning += " Context from file and web was found but not processed by LLM."
|
1078 |
-
elif web_context:
|
1079 |
reasoning += f" External context found: {web_context.splitlines()[0] if web_context.splitlines() else 'No specific snippet found.'}"
|
1080 |
elif file_context:
|
1081 |
reasoning += f" File context found: {file_context[:100]}..."
|
@@ -1083,8 +1104,9 @@ class GaiaLevel1Agent:
|
|
1083 |
reasoning += " No context found."
|
1084 |
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
1085 |
|
|
|
1086 |
prompt_parts = [
|
1087 |
-
"You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document, web search results, or video analysis).",
|
1088 |
"If the context comes from 'Video analysis result', understand that 'species types inferred' means the video was analyzed by an AI to identify birds and infer their species using visual question answering. The count refers to the maximum number of *distinct types* of birds identified in this way in any single analyzed video frame.",
|
1089 |
"First, think step-by-step and briefly explain your reasoning based on the context. This part is for clarity and should come before your final answer.",
|
1090 |
"After your reasoning, you MUST conclude your response with the exact phrase 'FINAL ANSWER:', followed by your answer on the same line or the next.",
|
@@ -1093,7 +1115,7 @@ class GaiaLevel1Agent:
|
|
1093 |
" - If the answer is a string: use as few words as possible. Do not use articles (a, an, the) unless grammatically essential. Do not use abbreviations (e.g., write 'United States' not 'USA', 'Los Angeles' not 'LA') unless the question implies an abbreviation or it's a very common, universally understood one relevant to the context. Write digits in plain text (e.g., 'two' not '2') if they are part of a descriptive phrase, but use numerical digits if the question implies a code, identifier, version number, or a direct numerical value is more natural (e.g., 'Windows 10', 'part number 5').",
|
1094 |
" - If the answer is a list of items: provide them as a comma-separated list (e.g., item1, item2, item3). Apply the number or string rules above to each element in the list.",
|
1095 |
" - If the context is insufficient to answer the question: your reasoning should clearly state this, and your FINAL ANSWER should be 'Information not available in provided context'. Do not invent answers.",
|
1096 |
-
"Prioritize information from 'Enriched Content' from web search results if available and relevant over shorter 'Snippets'. Information from 'Video Analysis Context' is highly specific to video-related questions.",
|
1097 |
"\nUser Question: ", question
|
1098 |
]
|
1099 |
|
@@ -1102,81 +1124,113 @@ class GaiaLevel1Agent:
|
|
1102 |
if file_context:
|
1103 |
file_header = "\n\nContext from Provided Document:\n---"
|
1104 |
file_footer = "\n---"
|
1105 |
-
# Calculate available length more carefully
|
1106 |
len_web_ctx = len(web_context) if web_context else 0
|
1107 |
-
max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500 # Buffer
|
1108 |
-
|
1109 |
-
if max_len_for_file > 100 :
|
1110 |
truncated_file_context = file_context[:max_len_for_file]
|
1111 |
if len(file_context) > len(truncated_file_context):
|
1112 |
truncated_file_context += " ... (file context truncated)"
|
1113 |
prompt_parts.extend([file_header, truncated_file_context, file_footer])
|
1114 |
current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
|
1115 |
context_added = True
|
1116 |
-
else: gaia_logger.warning(f"Not enough space for file context in LLM prompt.
|
1117 |
|
1118 |
|
1119 |
-
if web_context:
|
1120 |
-
# Determine header based on content
|
1121 |
header_text = "\n\nContext from External Sources (Web/Video):\n---"
|
1122 |
-
if "Video analysis result:" in web_context and "Source [" not in web_context:
|
1123 |
header_text = "\n\nContext from Video Analysis:\n---"
|
1124 |
-
elif "Source [" in web_context and "Video analysis result:" not in web_context:
|
1125 |
header_text = "\n\nContext from Web Search Results:\n---"
|
1126 |
-
|
|
|
1127 |
web_footer = "\n---"
|
1128 |
-
|
1129 |
-
# We need to calculate available length for web_context based on what's already added.
|
1130 |
-
available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300 # Buffer for LLM answer itself
|
1131 |
|
1132 |
-
if available_len_for_web > 100:
|
1133 |
truncated_web_context = web_context
|
1134 |
if len(web_context) > available_len_for_web:
|
1135 |
truncated_web_context = web_context[:available_len_for_web] + "\n... (external context truncated)"
|
1136 |
gaia_logger.info(f"Truncated external (web/video) context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
|
1137 |
prompt_parts.extend([header_text, truncated_web_context, web_footer])
|
1138 |
-
context_added = True
|
1139 |
-
else: gaia_logger.warning(f"Not enough space for web/video context in LLM prompt.
|
1140 |
|
1141 |
if not context_added: prompt_parts.append("\n\nNo document, web, or video context could be provided due to length constraints or availability.")
|
1142 |
-
prompt_parts.append("\n\nReasoning and Final Answer:")
|
1143 |
final_prompt = "\n".join(prompt_parts)
|
1144 |
-
|
1145 |
gaia_logger.info(f"LLM Prompt (first 300 chars): {final_prompt[:300]}...")
|
1146 |
gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
|
1147 |
|
1148 |
-
if not GenerationConfig: # Should be caught by class init
|
1149 |
-
return {"model_answer": "LLM configuration error", "reasoning_trace": "GenerationConfig class not available."}
|
1150 |
|
1151 |
try:
|
1152 |
-
gen_config = GenerationConfig(temperature=0.1, top_p=0.95, max_output_tokens=1024)
|
1153 |
-
|
1154 |
-
|
1155 |
-
|
1156 |
-
|
1157 |
-
|
1158 |
-
|
1159 |
-
|
1160 |
-
|
1161 |
-
|
1162 |
-
|
1163 |
-
|
1164 |
-
|
1165 |
-
|
1166 |
-
|
1167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1168 |
gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
|
1169 |
return self._parse_llm_output(llm_answer_text)
|
1170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1171 |
except Exception as e:
|
1172 |
gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
|
1173 |
error_type_name = type(e).__name__
|
1174 |
error_message = str(e)
|
1175 |
reasoning = f"Error calling Gemini API: {error_type_name} - {error_message}"
|
1176 |
answer_val = "LLM API error"
|
1177 |
-
|
1178 |
-
# Check for common API error types from google.generativeai.types.generation_types.BlockedPromptException or similar
|
1179 |
-
# This requires inspecting the actual exception object 'e' or its attributes if it's a specific API exception type
|
1180 |
if "API key" in error_message.lower() and ("invalid" in error_message.lower() or "not valid" in error_message.lower()):
|
1181 |
answer_val = "LLM Auth Error"
|
1182 |
reasoning = "LLM API key is invalid or not authorized."
|
@@ -1186,27 +1240,30 @@ class GaiaLevel1Agent:
|
|
1186 |
elif "InternalServerError" in error_type_name or "500" in error_message :
|
1187 |
answer_val = "LLM server error"
|
1188 |
reasoning = "Error: LLM experienced an internal server error."
|
1189 |
-
# Add
|
1190 |
-
|
|
|
|
|
|
|
1191 |
|
1192 |
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
1193 |
|
1194 |
def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
|
1195 |
gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
|
1196 |
q_lower = question.lower().strip()
|
1197 |
-
|
1198 |
video_context_str: Optional[str] = None
|
1199 |
-
#
|
1200 |
-
video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[\w\-=&%]+)", question)
|
1201 |
|
1202 |
|
1203 |
-
|
1204 |
-
|
1205 |
-
|
1206 |
-
action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera"]
|
1207 |
|
|
|
1208 |
if video_url_match and \
|
1209 |
-
any(vk in
|
1210 |
any(sk in q_lower for sk in species_keywords) and \
|
1211 |
any(ak in q_lower for ak in action_keywords):
|
1212 |
video_url = video_url_match.group(0)
|
@@ -1220,80 +1277,79 @@ class GaiaLevel1Agent:
|
|
1220 |
return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
|
1221 |
|
1222 |
file_ctx_str: Optional[str] = None
|
1223 |
-
|
1224 |
-
|
1225 |
-
# Question might imply a file without using the task_id directly if it refers to "the provided text" etc.
|
1226 |
-
# For GAIA, task_id is usually present if a file is relevant.
|
1227 |
-
if task_id and (any(fi in q_lower for fi in file_indicators) or "this task involves a file" in q_lower or "the provided" in q_lower):
|
1228 |
file_ctx_str = self._fetch_and_process_file_content(task_id)
|
1229 |
if file_ctx_str: gaia_logger.info(f"Processed file context ({len(file_ctx_str)} chars) for task {task_id}")
|
1230 |
else: gaia_logger.warning(f"No file content or failed to process for task {task_id}")
|
1231 |
-
|
1232 |
-
web_rag_ctx_str: Optional[str] = None
|
1233 |
needs_web_rag = True
|
1234 |
-
|
1235 |
-
if
|
1236 |
-
|
1237 |
-
#
|
1238 |
-
|
1239 |
-
|
1240 |
-
|
1241 |
-
|
1242 |
-
|
1243 |
-
|
1244 |
-
|
1245 |
-
|
1246 |
-
|
1247 |
-
#
|
1248 |
-
|
1249 |
-
|
1250 |
-
doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content"]
|
1251 |
if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
|
1252 |
needs_web_rag = False
|
1253 |
-
|
1254 |
-
|
1255 |
-
|
|
|
1256 |
needs_web_rag = False
|
|
|
|
|
1257 |
|
1258 |
-
# Explicit negative constraint for web search
|
1259 |
if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower or "without using the internet" in q_lower:
|
1260 |
needs_web_rag = False
|
1261 |
gaia_logger.info("Web RAG explicitly disabled by user query.")
|
1262 |
-
|
1263 |
if needs_web_rag:
|
1264 |
-
search_q = question.replace("?", "").strip()
|
1265 |
-
#
|
1266 |
-
#
|
1267 |
-
rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
|
1268 |
if rag_res:
|
1269 |
snippets = []
|
1270 |
for i, res_item in enumerate(rag_res):
|
1271 |
title = res_item.get('title','N/A')
|
1272 |
body = res_item.get('body','')
|
1273 |
href = res_item.get('href','#')
|
1274 |
-
|
1275 |
-
|
1276 |
-
body_preview = (body[:1500] + "...") if len(body) > 1500 else body
|
1277 |
-
snippets.append(f"Source [{i+1} - {
|
1278 |
web_rag_ctx_str = "\n\n".join(snippets)
|
1279 |
if web_rag_ctx_str: gaia_logger.info(f"RAG pipeline yielded web results ({len(web_rag_ctx_str)} chars).")
|
1280 |
else: gaia_logger.warning("RAG pipeline yielded no web results for the query.")
|
1281 |
-
|
1282 |
-
# Consolidate external context for the LLM
|
1283 |
final_llm_external_context_parts = []
|
1284 |
if video_context_str:
|
1285 |
-
final_llm_external_context_parts.append(f"
|
1286 |
if web_rag_ctx_str:
|
1287 |
-
|
1288 |
-
|
1289 |
-
|
1290 |
-
|
|
|
|
|
1291 |
agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, final_llm_external_context)
|
1292 |
gaia_logger.info(f"LLM-based model_answer (first 70 chars): {agent_response_dict.get('model_answer', '')[:70]}...")
|
1293 |
return agent_response_dict
|
1294 |
-
|
1295 |
|
1296 |
-
|
|
|
1297 |
space_id = os.getenv("SPACE_ID")
|
1298 |
if profile: username = f"{profile.username}"
|
1299 |
else: return "Please Login to Hugging Face.", None
|
@@ -1306,11 +1362,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
1306 |
questions_data = response.json()
|
1307 |
if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
|
1308 |
except Exception as e: return f"Error fetching questions: {e}", None
|
1309 |
-
|
1310 |
results_log_for_gradio, answers_for_api_submission = [], []
|
1311 |
-
|
1312 |
-
|
1313 |
-
|
|
|
|
|
|
|
|
|
1314 |
for i, item in enumerate(questions_data):
|
1315 |
task_id, q_text = item.get("task_id"), item.get("question")
|
1316 |
model_answer_val = "AGENT ERROR"
|
@@ -1322,7 +1382,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
1322 |
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val})
|
1323 |
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
1324 |
continue
|
1325 |
-
|
1326 |
gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
|
1327 |
try:
|
1328 |
agent_response_dict = agent(question=q_text, task_id=task_id)
|
@@ -1332,24 +1392,24 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
1332 |
gaia_logger.error(f"Error during agent call for task {task_id}: {e}", exc_info=True)
|
1333 |
model_answer_val = "AGENT EXECUTION ERROR"
|
1334 |
reasoning_trace_val = f"Agent call failed: {type(e).__name__} - {str(e)}"
|
1335 |
-
|
1336 |
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
1337 |
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace (first 500 chars)": reasoning_trace_val[:500] + ("..." if len(reasoning_trace_val) > 500 else "")})
|
1338 |
-
|
1339 |
if i < len(questions_data) - 1: time.sleep(sleep_llm)
|
1340 |
-
|
1341 |
if not answers_for_api_submission: return "Agent produced no answers for API submission.", pd.DataFrame(results_log_for_gradio or [{"Info": "No questions processed"}])
|
1342 |
-
|
1343 |
submission_payload_for_api = {
|
1344 |
-
"username": username.strip(),
|
1345 |
-
"agent_code": agent_code,
|
1346 |
-
"answers": answers_for_api_submission
|
1347 |
}
|
1348 |
gaia_logger.info(f"Submitting {len(answers_for_api_submission)} answers for '{username}' to API...")
|
1349 |
gaia_logger.debug(f"API Submission Payload Sample: {json.dumps(submission_payload_for_api)[:500]}")
|
1350 |
|
1351 |
try:
|
1352 |
-
response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
|
1353 |
response.raise_for_status()
|
1354 |
result_data = response.json()
|
1355 |
status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
|
@@ -1361,37 +1421,36 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
1361 |
return f"Submission Failed: {err_detail}", pd.DataFrame(results_log_for_gradio)
|
1362 |
except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
|
1363 |
|
1364 |
-
with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
|
1365 |
-
gr.Markdown("#
|
1366 |
gr.Markdown(
|
1367 |
"""
|
1368 |
**Instructions:**
|
1369 |
-
1.
|
1370 |
-
2. Click 'Run Evaluation & Submit All Answers'.
|
1371 |
---
|
1372 |
-
|
1373 |
"""
|
1374 |
)
|
1375 |
gr.LoginButton()
|
1376 |
run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
|
1377 |
status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
|
1378 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
1379 |
run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
|
1380 |
|
1381 |
-
if __name__ == "__main__":
|
1382 |
-
print("\n" + "-"*30 + " GAIA Level 1 Agent - RAG, FileProc, Video Analysis " + "-"*30)
|
1383 |
required_env = {
|
1384 |
-
"GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
|
1385 |
-
"GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
|
1386 |
-
"GOOGLE_CSE_ID": GOOGLE_CUSTOM_SEARCH_CSE_ID,
|
1387 |
"TAVILY_API_KEY": TAVILY_API_KEY,
|
1388 |
}
|
1389 |
missing_keys = [k for k, v in required_env.items() if not v]
|
1390 |
for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
|
1391 |
-
|
1392 |
-
# Check for all critical libraries
|
1393 |
libraries_to_check = [
|
1394 |
-
("transformers", hf_transformers_pipeline), ("torch", torch),
|
1395 |
("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber),
|
1396 |
("yt_dlp", yt_dlp), ("cv2 (opencv-python)", cv2), ("BeautifulSoup", BeautifulSoup),
|
1397 |
("duckduckgo_search", DDGS), ("googleapiclient", build_google_search_service),
|
@@ -1402,6 +1461,11 @@ if __name__ == "__main__":
|
|
1402 |
|
1403 |
if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
|
1404 |
else: print("\n--- All major API Key Environment Variables found. ---")
|
1405 |
-
|
|
|
|
|
|
|
|
|
|
|
1406 |
print("-"*(60 + len(" GAIA Level 1 Agent - RAG, FileProc, Video Analysis ")) + "\n")
|
1407 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)
|
|
|
15 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
16 |
from concurrent.futures import TimeoutError as FuturesTimeoutError
|
17 |
from collections import defaultdict
|
18 |
+
import tempfile # Added for robust temporary directory management
|
19 |
|
20 |
try:
|
21 |
import google.generativeai as genai
|
22 |
+
from google.generativeai.types import GenerationConfig, HarmCategory, HarmBlockThreshold, FinishReason, HarmProbability
|
23 |
except ImportError:
|
24 |
genai = None
|
25 |
GenerationConfig = None
|
26 |
+
HarmCategory = None # Added for safety settings/finish reason details
|
27 |
+
HarmBlockThreshold = None # Added for safety settings
|
28 |
+
FinishReason = None # Added for checking candidate finish reason
|
29 |
+
HarmProbability = None # Added for checking safety ratings probability
|
30 |
print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
|
31 |
|
32 |
try:
|
|
|
70 |
print("WARNING: librosa library not found. Audio processing may be impaired. Install with: pip install librosa")
|
71 |
|
72 |
try:
|
73 |
+
import openpyxl
|
74 |
except ImportError:
|
75 |
openpyxl = None
|
76 |
print("WARNING: openpyxl library not found. .xlsx file processing might fail. Install with: pip install openpyxl")
|
|
|
108 |
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
|
109 |
|
110 |
AGENT_DEFAULT_TIMEOUT = 15
|
111 |
+
MAX_CONTEXT_LENGTH_LLM = 30000
|
112 |
|
113 |
+
MAX_FILE_SIZE = 5 * 1024 * 1024
|
114 |
CSV_SAMPLE_ROWS = 10
|
115 |
+
MAX_FILE_CONTEXT_LENGTH = 10000
|
116 |
|
117 |
# Global instances for video analysis pipelines
|
118 |
video_object_detector_pipeline: Optional[Any] = None
|
119 |
video_vqa_pipeline: Optional[Any] = None # Changed from species_classifier to VQA
|
120 |
|
121 |
+
VIDEO_ANALYSIS_DEVICE: int = -1
|
122 |
VIDEO_ANALYSIS_OBJECT_MODEL = "facebook/detr-resnet-50"
|
123 |
VIDEO_ANALYSIS_VQA_MODEL = "Salesforce/blip-vqa-capfilt-large" # Using a VQA model
|
124 |
|
125 |
+
VIDEO_MAX_FRAMES_TO_PROCESS = 120
|
126 |
VIDEO_CONFIDENCE_THRESHOLD_BIRD = 0.6
|
127 |
VIDEO_VQA_MIN_ANSWER_LENGTH = 3 # Minimum length for a VQA answer to be considered a species
|
128 |
+
VIDEO_VQA_CONFIDENCE_THRESHOLD = 0.3
|
129 |
|
130 |
asr_pipeline_instance: Optional[Any] = None
|
131 |
+
ASR_MODEL_NAME = "openai/whisper-tiny"
|
132 |
+
ASR_PROCESSING_TIMEOUT_SECONDS = 1024
|
133 |
|
134 |
|
135 |
DEFAULT_RAG_CONFIG = {
|
|
|
141 |
'tavily_api_key': TAVILY_API_KEY,
|
142 |
'default_max_results': 3, 'retry_attempts': 2, 'retry_delay': 2,
|
143 |
'google_timeout': 8, 'tavily_depth': "basic",
|
144 |
+
'max_query_length_tavily': 380
|
145 |
},
|
146 |
'processing': {
|
147 |
'trusted_sources': {'wikipedia.org': 0.8, 'reuters.com': 0.75, 'apnews.com': 0.75},
|
|
|
170 |
# Simplified device selection, consistent with FileProcessor's ASR
|
171 |
device_id = 0 if torch.cuda.is_available() else -1
|
172 |
if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id # Set global if not user-overridden
|
173 |
+
|
174 |
target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
|
175 |
|
176 |
video_object_detector_pipeline = hf_transformers_pipeline(
|
|
|
190 |
try:
|
191 |
device_id = 0 if torch.cuda.is_available() else -1
|
192 |
if VIDEO_ANALYSIS_DEVICE == -1: VIDEO_ANALYSIS_DEVICE = device_id
|
193 |
+
|
194 |
target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
|
195 |
|
196 |
video_vqa_pipeline = hf_transformers_pipeline(
|
|
|
210 |
global asr_pipeline_instance
|
211 |
if asr_pipeline_instance is None and hf_transformers_pipeline and torch:
|
212 |
try:
|
213 |
+
device = -1
|
214 |
asr_pipeline_instance = hf_transformers_pipeline(
|
215 |
"automatic-speech-recognition",
|
216 |
model=ASR_MODEL_NAME,
|
|
|
292 |
f"Columns: {', '.join(df.columns)}\nFirst {min(CSV_SAMPLE_ROWS, len(df))} sample rows:\n{df.head(CSV_SAMPLE_ROWS).to_markdown(index=False)}"
|
293 |
)
|
294 |
return FileProcessor._truncate_text(summary, filename, "CSV")
|
295 |
+
except Exception as e:
|
296 |
if "tabulate" in str(e).lower() and df is not None:
|
297 |
gaia_logger.error(f"CSV to_markdown error for '{filename}' (missing tabulate): {e}", exc_info=False)
|
298 |
try:
|
|
|
335 |
break
|
336 |
except UnicodeDecodeError: continue
|
337 |
if text is None: text = content.decode('utf-8', errors='ignore')
|
338 |
+
|
339 |
summary = f"Text Document: '{filename}':\n{text}"
|
340 |
return FileProcessor._truncate_text(summary, filename, "Text")
|
341 |
except Exception as e:
|
|
|
346 |
gaia_logger.info(f"Processing Excel file: {filename}")
|
347 |
if not openpyxl: return f"Error: Excel processing skipped for '{filename}', openpyxl library not available."
|
348 |
xls = None
|
349 |
+
df_list_for_fallback = []
|
350 |
try:
|
351 |
xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
|
352 |
summary_parts = [f"Excel Document Summary: '{filename}'"]
|
353 |
for sheet_name in xls.sheet_names:
|
354 |
df = xls.parse(sheet_name)
|
355 |
+
df_list_for_fallback.append((sheet_name, df))
|
356 |
sheet_summary = (
|
357 |
f"\n---\nSheet: '{sheet_name}' ({len(df)} rows, {len(df.columns)} columns):\n"
|
358 |
f"Columns: {', '.join(df.columns)}\nFirst {min(CSV_SAMPLE_ROWS, len(df))} sample rows:\n{df.head(CSV_SAMPLE_ROWS).to_markdown(index=False)}"
|
|
|
363 |
break
|
364 |
full_summary = "".join(summary_parts)
|
365 |
return FileProcessor._truncate_text(full_summary, filename, "Excel")
|
366 |
+
except Exception as e:
|
367 |
if "tabulate" in str(e).lower():
|
368 |
gaia_logger.error(f"Excel to_markdown error for '{filename}' (missing tabulate): {e}", exc_info=False)
|
369 |
try:
|
370 |
summary_parts_fallback = [f"Excel Document Summary: '{filename}'"]
|
371 |
+
if not df_list_for_fallback and xls:
|
372 |
for sheet_name in xls.sheet_names:
|
373 |
df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
|
374 |
+
elif not xls and not df_list_for_fallback: # Ensure df_list_for_fallback is populated if xls parsing failed early
|
375 |
temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
|
376 |
for sheet_name in temp_xls.sheet_names:
|
377 |
df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
|
378 |
|
379 |
+
|
380 |
for sheet_name_fb, df_fb in df_list_for_fallback:
|
381 |
sheet_summary_fallback = (
|
382 |
f"\n---\nSheet: '{sheet_name_fb}' ({len(df_fb)} rows, {len(df_fb.columns)} columns):\n"
|
|
|
406 |
page_text = page.extract_text()
|
407 |
if page_text:
|
408 |
text_content += page_text + "\n"
|
409 |
+
if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2: # Check slightly over to allow truncation logic to handle it
|
410 |
break
|
411 |
if not text_content:
|
412 |
return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
|
|
|
418 |
@staticmethod
|
419 |
def _perform_asr_transcription(asr_pipeline_ref, audio_data_np, filename_for_log):
|
420 |
gaia_logger.info(f"ASR: Starting transcription for {filename_for_log} in thread.")
|
421 |
+
|
422 |
return asr_pipeline_ref(audio_data_np, chunk_length_s=30, return_timestamps=False, generate_kwargs={"language": "en"})
|
423 |
|
424 |
|
|
|
430 |
return f"Error: Audio processing skipped for '{filename}', ASR pipeline not available."
|
431 |
if not librosa:
|
432 |
return f"Error: Audio processing skipped for '{filename}', librosa library not available."
|
433 |
+
|
434 |
try:
|
435 |
with io.BytesIO(content) as audio_buffer:
|
436 |
y, sr = librosa.load(audio_buffer, sr=16000, mono=True)
|
437 |
+
|
438 |
duration_seconds = len(y) / sr
|
439 |
gaia_logger.info(f"Audio file: {filename}, Duration: {duration_seconds:.2f} seconds. Timeout set to: {ASR_PROCESSING_TIMEOUT_SECONDS}s")
|
440 |
start_time = time.time()
|
|
|
448 |
except FuturesTimeoutError:
|
449 |
gaia_logger.warning(f"ASR transcription for '{filename}' timed out after {ASR_PROCESSING_TIMEOUT_SECONDS} seconds.")
|
450 |
return f"Error: Audio transcription for '{filename}' timed out after {ASR_PROCESSING_TIMEOUT_SECONDS}s."
|
451 |
+
except Exception as e_thread:
|
452 |
gaia_logger.error(f"ASR transcription thread for '{filename}' failed: {e_thread}", exc_info=True)
|
453 |
if "3000 mel input features" in str(e_thread) or "return_timestamps" in str(e_thread):
|
454 |
return f"Error processing Audio file '{filename}': Transcription failed due to long-form audio issue (mel features/timestamps). Original error: {str(e_thread)}"
|
|
|
459 |
|
460 |
if not transcribed_text.strip():
|
461 |
return f"Audio Document: '{filename}'. Transcription result was empty or ASR failed."
|
462 |
+
|
463 |
summary = f"Audio Document (Transcription): '{filename}':\n{transcribed_text}"
|
464 |
return FileProcessor._truncate_text(summary, filename, "Audio Transcription")
|
465 |
|
|
|
478 |
except Exception:
|
479 |
return f"File with Unknown Content Type: '{filename}'. Content is likely binary and cannot be displayed as text."
|
480 |
|
481 |
+
class CacheManager:
|
482 |
def __init__(self, ttl: int = 300, max_size: int = 100, name: str = "Cache"):
|
483 |
self.ttl = ttl; self.max_size = max_size
|
484 |
self._cache: Dict[Any, Any] = {}; self._timestamps: Dict[Any, float] = {}
|
|
|
489 |
try:
|
490 |
self._access_order.remove(key); self._access_order.append(key)
|
491 |
return copy.deepcopy(self._cache[key])
|
492 |
+
except (ValueError, TypeError) as e: # pragma: no cover
|
493 |
self.delete(key); return None
|
494 |
+
elif key in self._cache: # Expired
|
495 |
self.delete(key)
|
496 |
return None
|
497 |
def set(self, key: Any, value: Any):
|
498 |
+
if key in self._cache: self.delete(key) # Remove to update timestamp and order
|
499 |
while len(self._cache) >= self.max_size and self._access_order:
|
500 |
old_key = self._access_order.pop(0)
|
501 |
+
if old_key in self._cache: # Should always be true
|
502 |
del self._cache[old_key]; del self._timestamps[old_key]
|
503 |
try: self._cache[key] = copy.deepcopy(value)
|
504 |
+
except TypeError: self._cache[key] = value # For non-deep-copyable items
|
505 |
self._timestamps[key] = time.time(); self._access_order.append(key)
|
506 |
def delete(self, key: Any):
|
507 |
if key in self._cache:
|
508 |
try:
|
509 |
del self._cache[key]; del self._timestamps[key]
|
510 |
if key in self._access_order: self._access_order.remove(key)
|
511 |
+
except (ValueError, KeyError): pass # pragma: no cover
|
512 |
def clear(self): self._cache.clear();self._timestamps.clear();self._access_order.clear();gaia_logger.info(f"[{self.name}] Cleared.")
|
513 |
def __len__(self): return len(self._cache)
|
514 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
515 |
|
516 |
+
class SearchProvider(ABC):
|
517 |
def __init__(self, config_dict: Dict):
|
518 |
self.provider_config = config_dict.get('search', {})
|
519 |
self._enabled = False
|
|
|
538 |
return self._perform_search(query, max_results)
|
539 |
def available(self) -> bool: return self._enabled
|
540 |
|
541 |
+
class GoogleProvider(SearchProvider):
|
542 |
@property
|
543 |
def provider_name(self) -> str: return "Google"
|
544 |
def __init__(self, config_dict: Dict):
|
|
|
560 |
except requests.exceptions.RequestException as e: gaia_logger.warning(f"[{self.provider_name}] RequestEx: '{query[:70]}': {e}"); return None
|
561 |
except Exception as e: gaia_logger.error(f"[{self.provider_name}] Error: '{query[:70]}': {e}", exc_info=True); return None
|
562 |
|
563 |
+
class TavilyProvider(SearchProvider):
|
564 |
@property
|
565 |
def provider_name(self) -> str: return "Tavily"
|
566 |
def __init__(self, config_dict: Dict):
|
|
|
585 |
return [{'href': h.get('url'), 'title': h.get('title',''), 'body': h.get('content','')} for h in hits]
|
586 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
587 |
|
588 |
+
class DuckDuckGoProvider(SearchProvider):
|
589 |
@property
|
590 |
def provider_name(self) -> str: return "DuckDuckGo"
|
591 |
def __init__(self, config_dict: Dict):
|
|
|
602 |
return [{'href': r.get('href'), 'title': r.get('title',''), 'body': r.get('body','')} for r in hits]
|
603 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
604 |
|
605 |
+
class CompositeSearchClient:
|
606 |
def __init__(self, config_dict: Dict):
|
607 |
+
self.config = config_dict
|
608 |
self._search_config = config_dict.get('search', {})
|
609 |
self.providers = self._init_providers(config_dict)
|
610 |
self.cache = CacheManager(
|
|
|
640 |
results = prov.search(q, actual_r)
|
641 |
if results is not None: self.cache.set(cache_key, results); return results
|
642 |
if attempt < self._retry_att: time.sleep(self._retry_del)
|
643 |
+
except Exception as e: # pragma: no cover
|
644 |
if attempt < self._retry_att: time.sleep(self._retry_del)
|
645 |
self.cache.set(cache_key, [])
|
646 |
return []
|
647 |
|
648 |
+
class GaiaQueryBuilder:
|
649 |
def __init__(self, base_query: str, config_dict: Dict):
|
650 |
self.base_query = base_query.strip()
|
651 |
self.config = config_dict
|
652 |
def get_queries(self) -> Dict[str, List[Tuple[str, str]]]:
|
653 |
return {'primary': [(self.base_query, 'GENERAL')]} if self.base_query else {'primary': []}
|
654 |
|
655 |
+
class ResultProcessor:
|
656 |
def __init__(self, config_dict: Dict):
|
657 |
self.proc_config = config_dict.get('processing', {})
|
658 |
self.trusted_sources = self.proc_config.get('trusted_sources', {})
|
659 |
self.seen_urls: Set[str] = set()
|
660 |
+
self.date_pattern = DEFAULT_RAG_CONFIG['processing'].get('date_pattern', r'\b\d{4}\b')
|
661 |
def process_batch(self, results: List[Dict], query_tag: str, initial_cat: str='GENERAL') -> List[Dict]:
|
662 |
processed: List[Dict] = []
|
663 |
if not results: return processed
|
|
|
681 |
result['temporal_relevance'] = temporal_r
|
682 |
result['combined_score'] = (source_q * 0.6 + temporal_r * 0.4)
|
683 |
|
684 |
+
class ContentEnricher:
|
685 |
def __init__(self, config_dict: Dict):
|
686 |
self.enrich_config = config_dict.get('enrichment', {})
|
687 |
self._enabled = self.enrich_config.get('enabled', False) and bool(BeautifulSoup)
|
|
|
723 |
except Exception as e: result['enrichment_failed'] = type(e).__name__
|
724 |
return result
|
725 |
|
726 |
+
class GeneralRAGPipeline:
|
727 |
def __init__(self, config_dict: Optional[Dict] = None):
|
728 |
self.config = config_dict if config_dict is not None else DEFAULT_RAG_CONFIG
|
729 |
self.search_client = CompositeSearchClient(self.config)
|
|
|
741 |
max_r_pq = cfg_search.get('default_max_results', 3)
|
742 |
cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
|
743 |
if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
|
744 |
+
if force_refresh: self.search_client.cache.clear(); # Clears underlying search client cache
|
745 |
+
if self.enricher and force_refresh: self.enricher.cache.clear() # Clear enricher cache if force_refresh
|
746 |
all_res, res_proc = [], ResultProcessor(self.config)
|
747 |
staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
|
748 |
for stage, qs_in_stage in staged_qs.items():
|
749 |
for query_s, cat in qs_in_stage:
|
750 |
+
if len(all_res) >= total_lim * 2: break # Fetch more initially to allow for better selection
|
751 |
s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
|
752 |
all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
|
753 |
all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
|
|
|
764 |
self.api_url = api_url
|
765 |
self.llm_model: Optional[Any] = None
|
766 |
self.rag_pipeline = GeneralRAGPipeline(DEFAULT_RAG_CONFIG)
|
767 |
+
|
768 |
if genai and GOOGLE_GEMINI_API_KEY:
|
769 |
try:
|
770 |
genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
|
771 |
model_name = 'gemini-2.5-flash-preview-05-20'
|
772 |
+
|
773 |
+
|
774 |
self.llm_model = genai.GenerativeModel(model_name)
|
775 |
gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
|
776 |
except Exception as e:
|
777 |
gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
|
778 |
+
# Attempt fallback if specific model fails (e.g. not available in region, or name typo)
|
779 |
+
try:
|
780 |
+
gaia_logger.info("Attempting fallback to 'gemini-1.0-pro' for LLM.")
|
781 |
+
self.llm_model = genai.GenerativeModel('gemini-1.0-pro') # A common, generally available model
|
782 |
+
gaia_logger.info("Gemini LLM ('gemini-1.0-pro') initialized as fallback.")
|
783 |
+
except Exception as e_fallback:
|
784 |
+
gaia_logger.error(f"Fallback LLM initialization also failed: {e_fallback}", exc_info=True)
|
785 |
+
|
786 |
else:
|
787 |
gaia_logger.warning("Gemini LLM dependencies or API key missing.")
|
788 |
+
|
789 |
if not self.llm_model:
|
790 |
gaia_logger.warning("LLM (Gemini) unavailable. Limited capabilities.")
|
791 |
+
|
792 |
_get_video_object_detector()
|
793 |
+
_get_video_vqa_pipeline()
|
794 |
|
795 |
gaia_logger.info(f"GaiaLevel1Agent (RAG, FileProcessor, VideoAnalysis) initialized. API: {self.api_url}")
|
796 |
|
797 |
@lru_cache(maxsize=32)
|
798 |
def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
|
799 |
+
|
800 |
file_url = f"{self.api_url}/files/{task_id}"
|
801 |
+
for attempt in range(2): # Retry once
|
802 |
try:
|
803 |
response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
|
804 |
response.raise_for_status()
|
805 |
+
|
806 |
+
filename = FileProcessor._get_filename_from_url(response.url) # Fallback from URL
|
807 |
content_disposition = response.headers.get('Content-Disposition')
|
808 |
if content_disposition:
|
809 |
header_filename = FileProcessor._get_filename_from_url(content_disposition)
|
|
|
816 |
except requests.exceptions.HTTPError as e:
|
817 |
if e.response.status_code == 404:
|
818 |
gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
|
819 |
+
return None # No point retrying 404
|
820 |
gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
|
821 |
except requests.exceptions.Timeout:
|
822 |
gaia_logger.warning(f"Timeout fetching file {task_id}")
|
|
|
829 |
def _clean_vqa_species_answer(self, answer_text: str) -> str:
|
830 |
"""Cleans and normalizes VQA answer to extract a potential species name."""
|
831 |
if not answer_text: return ""
|
832 |
+
|
833 |
cleaned = answer_text.lower().strip()
|
834 |
+
|
835 |
# Remove common prefixes
|
836 |
prefixes_to_remove = [
|
837 |
+
"a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
|
838 |
"this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
|
839 |
"it's an ", "it is an ", "this is an ", "this bird is an ", "it appears to be an ", "looks like an "
|
840 |
]
|
841 |
for prefix in prefixes_to_remove:
|
842 |
if cleaned.startswith(prefix):
|
843 |
cleaned = cleaned[len(prefix):]
|
844 |
+
|
845 |
# Remove common suffixes
|
846 |
suffixes_to_remove = [" bird", " species"]
|
847 |
for suffix in suffixes_to_remove:
|
848 |
if cleaned.endswith(suffix):
|
849 |
cleaned = cleaned[:-len(suffix)]
|
850 |
+
|
851 |
# Remove parenthetical content or descriptive clauses if simple
|
852 |
cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip() # e.g. "robin (american)" -> "robin"
|
853 |
cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip() # e.g. "sparrow, which is small" -> "sparrow"
|
854 |
|
855 |
# Basic character filtering (allow letters, numbers for things like "Type 2", spaces, hyphens)
|
856 |
cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
|
857 |
+
|
858 |
# Normalize whitespace
|
859 |
cleaned = " ".join(cleaned.split())
|
860 |
+
|
861 |
# Filter out very generic or uncertain answers post-cleaning
|
862 |
+
uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird", "no bird", "not a bird"]
|
863 |
if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
|
864 |
return "" # Return empty if too generic or short
|
865 |
|
|
|
874 |
return "Video analysis skipped: Pillow library not available."
|
875 |
|
876 |
detector = _get_video_object_detector()
|
877 |
+
vqa_model = _get_video_vqa_pipeline()
|
878 |
|
879 |
if not detector or not vqa_model:
|
880 |
return "Video analysis skipped: ML pipelines (detector or VQA) not available."
|
881 |
|
882 |
+
video_file_path: Optional[str] = None
|
883 |
+
temp_dir_obj: Optional[tempfile.TemporaryDirectory] = None
|
884 |
+
cap: Optional[cv2.VideoCapture] = None
|
885 |
+
|
886 |
|
887 |
try:
|
888 |
+
temp_dir_obj = tempfile.TemporaryDirectory(prefix="gaia_video_")
|
889 |
+
temp_dir = temp_dir_obj.name
|
890 |
+
gaia_logger.info(f"Created temporary directory for video: {temp_dir}")
|
891 |
+
|
892 |
ydl_opts = {
|
893 |
+
'format': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/bestvideo[height<=480][ext=webm]+bestaudio[ext=webm]/best[height<=480][ext=mp4]/best[height<=480][ext=webm]/best[height<=480]',
|
894 |
'outtmpl': os.path.join(temp_dir, '%(id)s.%(ext)s'),
|
895 |
+
'quiet': True,
|
896 |
+
'max_filesize': 75 * 1024 * 1024,
|
897 |
'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
|
898 |
+
'merge_output_format': 'mp4', # Encourage mp4 output if merging
|
899 |
+
# Removed 'postprocessors': [{'key': 'FFmpegExtractAudio', ...}]
|
900 |
}
|
901 |
gaia_logger.info(f"Attempting to download video: {video_url}")
|
902 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
903 |
+
info_dict = ydl.extract_info(video_url, download=True)
|
904 |
+
video_file_path = ydl.prepare_filename(info_dict) # Get the final path
|
905 |
+
|
906 |
+
# Check if downloaded file is indeed a video format recognised by OpenCV
|
907 |
+
# Common video extensions that OpenCV usually handles well.
|
908 |
+
# This check is made more robust by also trying to open it.
|
909 |
+
if not video_file_path or not any(video_file_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.avi', '.mkv', '.mov', '.flv']):
|
910 |
+
gaia_logger.warning(f"Downloaded file '{video_file_path}' might not be a standard video format or download failed to produce one. Will attempt to open.")
|
911 |
+
# Try to find a plausible video file if the main one looks suspicious
|
912 |
+
possible_video_files = [f for f in os.listdir(temp_dir) if f.startswith(info_dict.get('id','')) and any(f.lower().endswith(ext) for ext in ['.mp4', '.webm'])]
|
913 |
+
if possible_video_files:
|
914 |
+
video_file_path = os.path.join(temp_dir, possible_video_files[0])
|
915 |
+
gaia_logger.info(f"Using alternative video file from temp_dir: {video_file_path}")
|
916 |
+
# else: # The cap.isOpened() check below will handle if it's truly unusable
|
917 |
+
# gaia_logger.error(f"No suitable video file found in temp_dir for {info_dict.get('id','')}")
|
918 |
+
# return "Video download resulted in a non-video or unusable file."
|
|
|
|
|
|
|
919 |
|
920 |
|
921 |
if not video_file_path or not os.path.exists(video_file_path):
|
|
|
927 |
cap = cv2.VideoCapture(video_file_path)
|
928 |
if not cap.isOpened():
|
929 |
gaia_logger.error(f"Cannot open video file: {video_file_path}")
|
930 |
+
return f"Cannot open video file: {os.path.basename(video_file_path if video_file_path else 'N/A')}"
|
931 |
+
|
932 |
|
933 |
max_simultaneous_species = 0
|
934 |
species_details_for_max_frame = ""
|
935 |
+
|
936 |
total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
937 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
938 |
+
if not fps or fps <= 0: fps = 25 # Default fps if detection fails or is zero
|
939 |
+
|
940 |
frame_interval = max(1, int(fps)) # Process ~1 frame per second
|
941 |
+
|
942 |
frames_analyzed_count = 0
|
943 |
current_frame_num = 0
|
944 |
+
|
945 |
+
gaia_logger.info(f"Video Info: ~{total_frames_video // fps if fps > 0 else total_frames_video:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
|
946 |
|
947 |
while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
|
948 |
cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num) # Jump to frame
|
949 |
ret, frame_data = cap.read()
|
950 |
if not ret: break
|
951 |
|
952 |
+
timestamp_sec = current_frame_num / fps if fps > 0 else frames_analyzed_count # Fallback timestamp if fps is bad
|
953 |
gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
|
954 |
+
|
955 |
try:
|
956 |
pil_image = Image.fromarray(cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB))
|
957 |
except Exception as e_conv:
|
958 |
gaia_logger.warning(f"Frame {current_frame_num} conversion to PIL failed: {e_conv}")
|
959 |
current_frame_num += frame_interval
|
960 |
continue
|
961 |
+
|
962 |
detected_objects = detector(pil_image)
|
963 |
bird_crops_this_frame = []
|
964 |
for obj in detected_objects:
|
965 |
+
# Check label case-insensitively
|
966 |
+
if obj['label'].lower() == 'bird' and obj['score'] > VIDEO_CONFIDENCE_THRESHOLD_BIRD:
|
967 |
box = obj['box']
|
968 |
xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
|
969 |
+
# Ensure box coordinates are valid
|
970 |
if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
|
971 |
gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
|
972 |
continue
|
|
|
975 |
except Exception as e_crop:
|
976 |
gaia_logger.warning(f"Cropping bird failed for box {box}: {e_crop}")
|
977 |
|
978 |
+
|
979 |
if not bird_crops_this_frame:
|
980 |
current_frame_num += frame_interval
|
981 |
frames_analyzed_count += 1
|
|
|
986 |
vqa_question = "What is the specific species of this bird?"
|
987 |
|
988 |
for idx, bird_crop_img in enumerate(bird_crops_this_frame):
|
989 |
+
if bird_crop_img.width < 20 or bird_crop_img.height < 20: continue
|
990 |
try:
|
991 |
+
vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
|
992 |
+
|
993 |
raw_vqa_answer_text = ""
|
994 |
+
vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD # Default
|
995 |
|
996 |
if isinstance(vqa_answer_list, list) and vqa_answer_list:
|
997 |
raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
|
998 |
vqa_confidence = vqa_answer_list[0].get('score', vqa_confidence)
|
999 |
+
elif isinstance(vqa_answer_list, dict):
|
1000 |
raw_vqa_answer_text = vqa_answer_list.get('answer', "")
|
1001 |
vqa_confidence = vqa_answer_list.get('score', vqa_confidence)
|
1002 |
|
1003 |
cleaned_species_name = self._clean_vqa_species_answer(raw_vqa_answer_text)
|
1004 |
+
|
1005 |
if cleaned_species_name and vqa_confidence >= VIDEO_VQA_CONFIDENCE_THRESHOLD :
|
1006 |
frame_species_identified.add(cleaned_species_name)
|
1007 |
current_frame_species_details.append(f"{cleaned_species_name} (VQA conf: {vqa_confidence:.2f})")
|
1008 |
+
elif cleaned_species_name:
|
1009 |
gaia_logger.debug(f"VQA species '{cleaned_species_name}' (raw: '{raw_vqa_answer_text}') for bird {idx} below confidence {VIDEO_VQA_CONFIDENCE_THRESHOLD} (score: {vqa_confidence:.2f})")
|
1010 |
else:
|
1011 |
gaia_logger.debug(f"VQA for bird {idx} resulted in unusable/generic species: '{raw_vqa_answer_text}'")
|
1012 |
|
1013 |
except Exception as e_vqa:
|
1014 |
gaia_logger.warning(f"VQA inference error for bird crop {idx} (frame {current_frame_num}): {e_vqa}")
|
1015 |
+
|
1016 |
if len(frame_species_identified) > max_simultaneous_species:
|
1017 |
max_simultaneous_species = len(frame_species_identified)
|
1018 |
species_details_for_max_frame = f"At ~{timestamp_sec:.1f}s, inferred species: {', '.join(current_frame_species_details) if current_frame_species_details else 'None specific'}"
|
1019 |
+
|
1020 |
if frame_species_identified:
|
1021 |
gaia_logger.info(f"Frame {current_frame_num} (~{timestamp_sec:.1f}s): Found {len(frame_species_identified)} distinct species types: {', '.join(list(frame_species_identified))}")
|
1022 |
|
1023 |
current_frame_num += frame_interval
|
1024 |
frames_analyzed_count += 1
|
1025 |
+
|
1026 |
+
# cap.release() should be in finally
|
1027 |
+
|
1028 |
context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
|
1029 |
f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
|
1030 |
f"{('Details from a frame with this count: ' + species_details_for_max_frame) if species_details_for_max_frame else 'No specific species details captured for the max count frame or no birds found.'}")
|
|
|
1033 |
|
1034 |
except yt_dlp.utils.DownloadError as e:
|
1035 |
gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
|
1036 |
+
msg_str = str(e)
|
1037 |
+
clean_msg = msg_str # Default to full message
|
1038 |
+
if "Unsupported URL" in msg_str: clean_msg = "Unsupported video URL."
|
1039 |
+
elif "video unavailable" in msg_str.lower(): clean_msg = "Video is unavailable."
|
1040 |
+
elif "private video" in msg_str.lower(): clean_msg = "Video is private."
|
1041 |
+
elif "age restricted" in msg_str.lower(): clean_msg = "Video is age-restricted and requires login."
|
1042 |
+
elif "Sign in to confirm" in msg_str or "cookies" in msg_str.lower() or "authentication" in msg_str.lower():
|
1043 |
+
clean_msg = "Video download failed due to YouTube restrictions (e.g., sign-in, cookies, or authentication required)."
|
1044 |
+
elif "HTTP Error 403" in msg_str or "Forbidden" in msg_str : clean_msg = "Access to video denied (Forbidden/403)."
|
1045 |
+
elif "HTTP Error 404" in msg_str or "Not Found" in msg_str : clean_msg = "Video not found (404)."
|
1046 |
+
# Keep the message relatively concise for the LLM
|
1047 |
+
return f"Video download failed: {clean_msg[:250] + '...' if len(clean_msg) > 250 else clean_msg}" # Limit length of detailed message
|
1048 |
+
|
1049 |
except Exception as e:
|
1050 |
gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
|
1051 |
+
return f"An unexpected error occurred during video analysis: {type(e).__name__} - {str(e)[:100]}"
|
1052 |
finally:
|
1053 |
+
if cap and cap.isOpened():
|
1054 |
+
cap.release()
|
1055 |
+
gaia_logger.info("Video capture released.")
|
1056 |
+
if temp_dir_obj:
|
1057 |
+
temp_dir_path_for_log = temp_dir_obj.name # Store before cleanup for logging
|
1058 |
+
try:
|
1059 |
+
temp_dir_obj.cleanup()
|
1060 |
+
gaia_logger.info(f"Successfully cleaned up temp video directory: {temp_dir_path_for_log}")
|
1061 |
+
except Exception as e_cleanup:
|
1062 |
+
gaia_logger.error(f"Error cleaning up temp video directory {temp_dir_path_for_log}: {e_cleanup}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1063 |
|
1064 |
|
1065 |
def _parse_llm_output(self, llm_text: str) -> Dict[str, str]:
|
|
|
1066 |
reasoning_trace = ""
|
1067 |
model_answer = ""
|
1068 |
final_answer_sentinel = "FINAL ANSWER:"
|
1069 |
+
|
1070 |
parts = llm_text.split(final_answer_sentinel, 1)
|
1071 |
+
|
1072 |
if len(parts) == 2:
|
1073 |
reasoning_trace = parts[0].strip()
|
1074 |
model_answer = parts[1].strip()
|
1075 |
else:
|
1076 |
+
reasoning_trace = llm_text # If sentinel not found, assume whole output is reasoning
|
1077 |
lines = llm_text.strip().split('\n')
|
1078 |
+
# Try to take the last non-empty line as answer, or a default if all reasoning
|
1079 |
+
model_answer = "Could not parse answer" # Default if no clear answer found
|
1080 |
+
for line in reversed(lines):
|
1081 |
+
if line.strip():
|
1082 |
+
model_answer = line.strip()
|
1083 |
+
break
|
1084 |
gaia_logger.warning(f"LLM output did not contain '{final_answer_sentinel}'. Using fallback parsing. Full LLM text: '{llm_text[:200]}...'")
|
1085 |
|
1086 |
return {"model_answer": model_answer, "reasoning_trace": reasoning_trace}
|
1087 |
|
1088 |
def _formulate_answer_with_llm(self, question: str, file_context: Optional[str], web_context: Optional[str]) -> Dict[str, str]:
|
|
|
1089 |
default_model_answer = "Information not available in provided context"
|
1090 |
default_reasoning = "LLM processing failed or context insufficient."
|
1091 |
|
1092 |
+
if not self.llm_model or not genai or not GenerationConfig or not FinishReason or not HarmCategory or not HarmBlockThreshold: # Added more checks
|
1093 |
+
gaia_logger.warning("LLM model (Gemini) or necessary enums/configs not available for answer formulation.")
|
1094 |
+
reasoning = "LLM model (Gemini) or its configuration components not available for answer formulation."
|
1095 |
answer_val = default_model_answer
|
1096 |
+
# Provide some context indication even if LLM is down
|
1097 |
if web_context and file_context:
|
1098 |
reasoning += " Context from file and web was found but not processed by LLM."
|
1099 |
+
elif web_context:
|
1100 |
reasoning += f" External context found: {web_context.splitlines()[0] if web_context.splitlines() else 'No specific snippet found.'}"
|
1101 |
elif file_context:
|
1102 |
reasoning += f" File context found: {file_context[:100]}..."
|
|
|
1104 |
reasoning += " No context found."
|
1105 |
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
1106 |
|
1107 |
+
|
1108 |
prompt_parts = [
|
1109 |
+
"You are a general AI assistant. Your primary goal is to answer the user's question accurately and concisely based *only* on the provided context (from a document, web search results, or video analysis).",
|
1110 |
"If the context comes from 'Video analysis result', understand that 'species types inferred' means the video was analyzed by an AI to identify birds and infer their species using visual question answering. The count refers to the maximum number of *distinct types* of birds identified in this way in any single analyzed video frame.",
|
1111 |
"First, think step-by-step and briefly explain your reasoning based on the context. This part is for clarity and should come before your final answer.",
|
1112 |
"After your reasoning, you MUST conclude your response with the exact phrase 'FINAL ANSWER:', followed by your answer on the same line or the next.",
|
|
|
1115 |
" - If the answer is a string: use as few words as possible. Do not use articles (a, an, the) unless grammatically essential. Do not use abbreviations (e.g., write 'United States' not 'USA', 'Los Angeles' not 'LA') unless the question implies an abbreviation or it's a very common, universally understood one relevant to the context. Write digits in plain text (e.g., 'two' not '2') if they are part of a descriptive phrase, but use numerical digits if the question implies a code, identifier, version number, or a direct numerical value is more natural (e.g., 'Windows 10', 'part number 5').",
|
1116 |
" - If the answer is a list of items: provide them as a comma-separated list (e.g., item1, item2, item3). Apply the number or string rules above to each element in the list.",
|
1117 |
" - If the context is insufficient to answer the question: your reasoning should clearly state this, and your FINAL ANSWER should be 'Information not available in provided context'. Do not invent answers.",
|
1118 |
+
"Prioritize information from 'Enriched Content' from web search results if available and relevant over shorter 'Snippets'. Information from 'Video Analysis Context' is highly specific to video-related questions.",
|
1119 |
"\nUser Question: ", question
|
1120 |
]
|
1121 |
|
|
|
1124 |
if file_context:
|
1125 |
file_header = "\n\nContext from Provided Document:\n---"
|
1126 |
file_footer = "\n---"
|
|
|
1127 |
len_web_ctx = len(web_context) if web_context else 0
|
1128 |
+
max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500 # Buffer
|
1129 |
+
|
1130 |
+
if max_len_for_file > 100 :
|
1131 |
truncated_file_context = file_context[:max_len_for_file]
|
1132 |
if len(file_context) > len(truncated_file_context):
|
1133 |
truncated_file_context += " ... (file context truncated)"
|
1134 |
prompt_parts.extend([file_header, truncated_file_context, file_footer])
|
1135 |
current_prompt_text_len += len(file_header) + len(truncated_file_context) + len(file_footer)
|
1136 |
context_added = True
|
1137 |
+
else: gaia_logger.warning(f"Not enough space for file context in LLM prompt. Available after other parts: {MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer)}")
|
1138 |
|
1139 |
|
1140 |
+
if web_context:
|
|
|
1141 |
header_text = "\n\nContext from External Sources (Web/Video):\n---"
|
1142 |
+
if "Video analysis result:" in web_context and "Source [" not in web_context: # Only video
|
1143 |
header_text = "\n\nContext from Video Analysis:\n---"
|
1144 |
+
elif "Source [" in web_context and "Video analysis result:" not in web_context: # Only web
|
1145 |
header_text = "\n\nContext from Web Search Results:\n---"
|
1146 |
+
# If both, the generic "External Sources" is fine.
|
1147 |
+
|
1148 |
web_footer = "\n---"
|
1149 |
+
available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300
|
|
|
|
|
1150 |
|
1151 |
+
if available_len_for_web > 100:
|
1152 |
truncated_web_context = web_context
|
1153 |
if len(web_context) > available_len_for_web:
|
1154 |
truncated_web_context = web_context[:available_len_for_web] + "\n... (external context truncated)"
|
1155 |
gaia_logger.info(f"Truncated external (web/video) context from {len(web_context)} to {len(truncated_web_context)} chars for LLM.")
|
1156 |
prompt_parts.extend([header_text, truncated_web_context, web_footer])
|
1157 |
+
context_added = True
|
1158 |
+
else: gaia_logger.warning(f"Not enough space for web/video context in LLM prompt. Available: {MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer)}")
|
1159 |
|
1160 |
if not context_added: prompt_parts.append("\n\nNo document, web, or video context could be provided due to length constraints or availability.")
|
1161 |
+
prompt_parts.append("\n\nReasoning and Final Answer:")
|
1162 |
final_prompt = "\n".join(prompt_parts)
|
1163 |
+
|
1164 |
gaia_logger.info(f"LLM Prompt (first 300 chars): {final_prompt[:300]}...")
|
1165 |
gaia_logger.info(f"LLM Total prompt length: {len(final_prompt)} chars.")
|
1166 |
|
|
|
|
|
1167 |
|
1168 |
try:
|
1169 |
+
gen_config = GenerationConfig(temperature=0.1, top_p=0.95, max_output_tokens=1024)
|
1170 |
+
safety_settings = [
|
1171 |
+
{"category": HarmCategory.HARM_CATEGORY_HARASSMENT, "threshold": HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
|
1172 |
+
{"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH, "threshold": HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
|
1173 |
+
{"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, "threshold": HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
|
1174 |
+
{"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, "threshold": HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE},
|
1175 |
+
]
|
1176 |
+
response = self.llm_model.generate_content(final_prompt, generation_config=gen_config, safety_settings=safety_settings)
|
1177 |
+
|
1178 |
+
if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason:
|
1179 |
+
reason_text = response.prompt_feedback.block_reason.name
|
1180 |
+
block_details = "; ".join([f"{sr.category.name}: {sr.probability.name}" for sr in response.prompt_feedback.safety_ratings if hasattr(sr, 'blocked') and sr.blocked])
|
1181 |
+
gaia_logger.warning(f"Gemini prompt blocked. Reason: {reason_text}. Details: {block_details}")
|
1182 |
+
return {"model_answer": "LLM Error: Prompt blocked", "reasoning_trace": f"My input was blocked by the LLM provider (Reason: {reason_text}). Details: {block_details}"}
|
1183 |
+
|
1184 |
+
if not response.candidates:
|
1185 |
+
gaia_logger.warning("Gemini response has no candidates.")
|
1186 |
+
return {"model_answer": "LLM Error: No response", "reasoning_trace": "LLM did not provide any response candidates."}
|
1187 |
+
|
1188 |
+
candidate = response.candidates[0]
|
1189 |
+
# Check candidate's finish_reason
|
1190 |
+
if candidate.finish_reason != FinishReason.STOP:
|
1191 |
+
reason_name = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
|
1192 |
+
safety_ratings_str = ""
|
1193 |
+
if candidate.safety_ratings: # Check if safety_ratings exist
|
1194 |
+
relevant_ratings = [
|
1195 |
+
f"{sr.category.name.split('_')[-1] if hasattr(sr.category, 'name') else 'CAT?'}: {(sr.probability.name if hasattr(sr.probability, 'name') else 'PROB?')}"
|
1196 |
+
for sr in candidate.safety_ratings if (hasattr(sr,'blocked') and sr.blocked) or (hasattr(sr,'probability') and HarmProbability and sr.probability.value >= HarmProbability.MEDIUM.value)
|
1197 |
+
]
|
1198 |
+
if relevant_ratings: safety_ratings_str = "; ".join(relevant_ratings)
|
1199 |
+
|
1200 |
+
gaia_logger.warning(f"Gemini candidate did not finish successfully. Reason: {reason_name}. Safety Ratings: {safety_ratings_str if safety_ratings_str else 'N/A'}")
|
1201 |
+
|
1202 |
+
user_message = "LLM Error: Response incomplete"
|
1203 |
+
if candidate.finish_reason == FinishReason.SAFETY: user_message = "LLM Error: Response blocked for safety"
|
1204 |
+
elif candidate.finish_reason == FinishReason.MAX_TOKENS: user_message = "LLM Error: Response truncated (max tokens)"
|
1205 |
+
elif candidate.finish_reason == FinishReason.RECITATION: user_message = "LLM Error: Response blocked (recitation)"
|
1206 |
+
|
1207 |
+
return {
|
1208 |
+
"model_answer": user_message,
|
1209 |
+
"reasoning_trace": f"LLM generation stopped. Reason: {reason_name}. " + (f"Details: {safety_ratings_str}" if safety_ratings_str else "")
|
1210 |
+
}
|
1211 |
+
|
1212 |
+
llm_answer_text = response.text # Safe to access now
|
1213 |
gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
|
1214 |
return self._parse_llm_output(llm_answer_text)
|
1215 |
+
|
1216 |
+
except ValueError as ve:
|
1217 |
+
if "finish_reason" in str(ve).lower() and ("part" in str(ve).lower() or "candidate" in str(ve).lower()):
|
1218 |
+
gaia_logger.error(f"ValueError accessing Gemini response.text, likely due to non-STOP finish_reason not caught explicitly: {ve}", exc_info=False) # exc_info=False as it's handled
|
1219 |
+
fr_from_ex = "Unknown (from ValueError)"
|
1220 |
+
match_fr = re.search(r"finish_reason.*?is\s*(\w+)", str(ve), re.IGNORECASE) # Try to get name or number
|
1221 |
+
if match_fr: fr_from_ex = match_fr.group(1)
|
1222 |
+
return {"model_answer": "LLM Error: Invalid response state",
|
1223 |
+
"reasoning_trace": f"Could not parse LLM response. Finish reason possibly {fr_from_ex}. Details: {str(ve)[:150]}"}
|
1224 |
+
else: # Other ValueErrors
|
1225 |
+
gaia_logger.error(f"ValueError during Gemini call or processing: {ve}", exc_info=True)
|
1226 |
+
return {"model_answer": "LLM Error: Value error", "reasoning_trace": f"A value error occurred: {str(ve)}"}
|
1227 |
except Exception as e:
|
1228 |
gaia_logger.error(f"Error calling Gemini API: {e}", exc_info=True)
|
1229 |
error_type_name = type(e).__name__
|
1230 |
error_message = str(e)
|
1231 |
reasoning = f"Error calling Gemini API: {error_type_name} - {error_message}"
|
1232 |
answer_val = "LLM API error"
|
1233 |
+
|
|
|
|
|
1234 |
if "API key" in error_message.lower() and ("invalid" in error_message.lower() or "not valid" in error_message.lower()):
|
1235 |
answer_val = "LLM Auth Error"
|
1236 |
reasoning = "LLM API key is invalid or not authorized."
|
|
|
1240 |
elif "InternalServerError" in error_type_name or "500" in error_message :
|
1241 |
answer_val = "LLM server error"
|
1242 |
reasoning = "Error: LLM experienced an internal server error."
|
1243 |
+
# Add specific handling for google.api_core.exceptions.ServiceUnavailable (503) if it occurs
|
1244 |
+
elif "ServiceUnavailable" in error_type_name or "503" in error_message:
|
1245 |
+
answer_val = "LLM service unavailable"
|
1246 |
+
reasoning = "Error: LLM service is temporarily unavailable (503)."
|
1247 |
+
|
1248 |
|
1249 |
return {"model_answer": answer_val, "reasoning_trace": reasoning}
|
1250 |
|
1251 |
def __call__(self, question: str, task_id: Optional[str] = None) -> Dict[str, str]:
|
1252 |
gaia_logger.info(f"Agent processing: '{question[:70]}...', TaskID: {task_id}")
|
1253 |
q_lower = question.lower().strip()
|
1254 |
+
|
1255 |
video_context_str: Optional[str] = None
|
1256 |
+
# Regex for YouTube URLs (watch, short, and youtu.be forms)
|
1257 |
+
video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)[\w\-=&%]+)", question)
|
1258 |
|
1259 |
|
1260 |
+
video_keywords = ["video", "youtube.com", "youtu.be", "clip", "recording"] # Broader keywords
|
1261 |
+
species_keywords = ["species", "bird", "birds", "type of bird", "kinds of bird", "different birds"]
|
1262 |
+
action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera", "identify", "list"]
|
|
|
1263 |
|
1264 |
+
# Trigger video analysis if a URL is found AND relevant keywords are present
|
1265 |
if video_url_match and \
|
1266 |
+
any(vk in q_lower for vk in video_keywords) and \
|
1267 |
any(sk in q_lower for sk in species_keywords) and \
|
1268 |
any(ak in q_lower for ak in action_keywords):
|
1269 |
video_url = video_url_match.group(0)
|
|
|
1277 |
return {"model_answer": "general AI assistant", "reasoning_trace": "User asked for my identity."}
|
1278 |
|
1279 |
file_ctx_str: Optional[str] = None
|
1280 |
+
file_indicators = ["document", "file", "text provided", "attachment", "content of the file", "data in the file", "excel sheet", ".pdf", ".csv", ".txt", "audio file", "code snippet", "log file", "spreadsheet"]
|
1281 |
+
if task_id and (any(fi in q_lower for fi in file_indicators) or "this task involves a file" in q_lower or "the provided" in q_lower or "attached" in q_lower):
|
|
|
|
|
|
|
1282 |
file_ctx_str = self._fetch_and_process_file_content(task_id)
|
1283 |
if file_ctx_str: gaia_logger.info(f"Processed file context ({len(file_ctx_str)} chars) for task {task_id}")
|
1284 |
else: gaia_logger.warning(f"No file content or failed to process for task {task_id}")
|
1285 |
+
|
1286 |
+
web_rag_ctx_str: Optional[str] = None
|
1287 |
needs_web_rag = True
|
1288 |
+
|
1289 |
+
# Logic to decide if RAG web search is needed
|
1290 |
+
if video_context_str:
|
1291 |
+
# If video analysis seems to directly answer a counting/identification question from video
|
1292 |
+
if "Video analysis result:" in video_context_str and not "download failed" in video_context_str.lower() and not "skipped" in video_context_str.lower():
|
1293 |
+
if (("count" in q_lower or "how many" in q_lower or "number of" in q_lower) and ("simultaneously" in q_lower or "at the same time" in q_lower or "distinct" in q_lower)) and any(sk_q in q_lower for sk_q in species_keywords):
|
1294 |
+
needs_web_rag = False # Video analysis likely sufficient
|
1295 |
+
gaia_logger.info("Video context seems primary for a specific video counting question; web RAG may be skipped.")
|
1296 |
+
|
1297 |
+
|
1298 |
+
if file_ctx_str and len(file_ctx_str) > 100 and not video_context_str: # Only consider file if no video context
|
1299 |
+
# Keywords suggesting the answer is likely within the document
|
1300 |
+
doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content", "from this file", "in this data"]
|
1301 |
+
# Keywords suggesting external info is needed despite file
|
1302 |
+
web_still_needed_kws = ["what is the current", "latest news on", "public opinion of", "search for more about", "compare this to", "what happened after"]
|
1303 |
+
|
|
|
1304 |
if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
|
1305 |
needs_web_rag = False
|
1306 |
+
gaia_logger.info("File context seems primary; web RAG may be skipped.")
|
1307 |
+
# Less strong heuristic: if it's a statement or simple file query not asking for external comparison/update
|
1308 |
+
elif not any(kw in q_lower for kw in web_still_needed_kws) and not question.strip().endswith("?"):
|
1309 |
+
if not any(qk in q_lower for qk in ["why is", "how does", "explain the impact of", "what if"]): # Questions often needing broader context
|
1310 |
needs_web_rag = False
|
1311 |
+
gaia_logger.info("File context seems sufficient for non-complex query; web RAG may be skipped.")
|
1312 |
+
|
1313 |
|
|
|
1314 |
if "don't search" in q_lower or "do not search" in q_lower or "without searching" in q_lower or "without using the internet" in q_lower:
|
1315 |
needs_web_rag = False
|
1316 |
gaia_logger.info("Web RAG explicitly disabled by user query.")
|
1317 |
+
|
1318 |
if needs_web_rag:
|
1319 |
+
search_q = question.replace("?", "").strip()
|
1320 |
+
# If video context failed, the question might still be about the video's topic, so RAG is useful.
|
1321 |
+
# If file context is present but RAG is still needed, LLM will have to reconcile.
|
1322 |
+
rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
|
1323 |
if rag_res:
|
1324 |
snippets = []
|
1325 |
for i, res_item in enumerate(rag_res):
|
1326 |
title = res_item.get('title','N/A')
|
1327 |
body = res_item.get('body','')
|
1328 |
href = res_item.get('href','#')
|
1329 |
+
provider_info = res_item.get('query_tag','WebSearch') # Can be refined if RAG provides more source details
|
1330 |
+
source_type = "EnrichedContent" if res_item.get('enriched') else "Snippet"
|
1331 |
+
body_preview = (body[:1500] + "...") if len(body) > 1500 else body
|
1332 |
+
snippets.append(f"Source [{i+1} - {provider_info}]: {title}\nURL: {href}\n{source_type}: {body_preview}\n---")
|
1333 |
web_rag_ctx_str = "\n\n".join(snippets)
|
1334 |
if web_rag_ctx_str: gaia_logger.info(f"RAG pipeline yielded web results ({len(web_rag_ctx_str)} chars).")
|
1335 |
else: gaia_logger.warning("RAG pipeline yielded no web results for the query.")
|
1336 |
+
|
|
|
1337 |
final_llm_external_context_parts = []
|
1338 |
if video_context_str:
|
1339 |
+
final_llm_external_context_parts.append(f"{video_context_str}") # Header already in video_context_str
|
1340 |
if web_rag_ctx_str:
|
1341 |
+
# No separate header needed if video_context_str already has "Video Analysis Context:"
|
1342 |
+
# and web_rag_ctx_str is structured with "Source [n]:"
|
1343 |
+
final_llm_external_context_parts.append(f"{web_rag_ctx_str}")
|
1344 |
+
|
1345 |
+
final_llm_external_context = "\n\n---\n\n".join(final_llm_external_context_parts).strip() if final_llm_external_context_parts else None
|
1346 |
+
|
1347 |
agent_response_dict = self._formulate_answer_with_llm(question, file_ctx_str, final_llm_external_context)
|
1348 |
gaia_logger.info(f"LLM-based model_answer (first 70 chars): {agent_response_dict.get('model_answer', '')[:70]}...")
|
1349 |
return agent_response_dict
|
|
|
1350 |
|
1351 |
+
|
1352 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
1353 |
space_id = os.getenv("SPACE_ID")
|
1354 |
if profile: username = f"{profile.username}"
|
1355 |
else: return "Please Login to Hugging Face.", None
|
|
|
1362 |
questions_data = response.json()
|
1363 |
if not questions_data or not isinstance(questions_data, list): return "Questions list empty/invalid.", None
|
1364 |
except Exception as e: return f"Error fetching questions: {e}", None
|
1365 |
+
|
1366 |
results_log_for_gradio, answers_for_api_submission = [], []
|
1367 |
+
# Use a more conservative default RPM if not set, matching free tier common limits.
|
1368 |
+
GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "10")) # Default to 10 RPM if not set, as per common free tier
|
1369 |
+
# Add a small buffer to sleep time
|
1370 |
+
sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
|
1371 |
+
gaia_logger.info(f"Using Gemini RPM limit: {GEMINI_RPM_LIMIT}, LLM call sleep: {sleep_llm:.2f}s")
|
1372 |
+
|
1373 |
+
|
1374 |
for i, item in enumerate(questions_data):
|
1375 |
task_id, q_text = item.get("task_id"), item.get("question")
|
1376 |
model_answer_val = "AGENT ERROR"
|
|
|
1382 |
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace": reasoning_trace_val})
|
1383 |
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
1384 |
continue
|
1385 |
+
|
1386 |
gaia_logger.info(f"Q {i+1}/{len(questions_data)} - Task: {task_id}")
|
1387 |
try:
|
1388 |
agent_response_dict = agent(question=q_text, task_id=task_id)
|
|
|
1392 |
gaia_logger.error(f"Error during agent call for task {task_id}: {e}", exc_info=True)
|
1393 |
model_answer_val = "AGENT EXECUTION ERROR"
|
1394 |
reasoning_trace_val = f"Agent call failed: {type(e).__name__} - {str(e)}"
|
1395 |
+
|
1396 |
answers_for_api_submission.append({"task_id": task_id, "submitted_answer": model_answer_val})
|
1397 |
results_log_for_gradio.append({"Task ID": task_id, "Question": q_text, "Submitted Answer": model_answer_val, "Reasoning Trace (first 500 chars)": reasoning_trace_val[:500] + ("..." if len(reasoning_trace_val) > 500 else "")})
|
1398 |
+
|
1399 |
if i < len(questions_data) - 1: time.sleep(sleep_llm)
|
1400 |
+
|
1401 |
if not answers_for_api_submission: return "Agent produced no answers for API submission.", pd.DataFrame(results_log_for_gradio or [{"Info": "No questions processed"}])
|
1402 |
+
|
1403 |
submission_payload_for_api = {
|
1404 |
+
"username": username.strip(),
|
1405 |
+
"agent_code": agent_code,
|
1406 |
+
"answers": answers_for_api_submission
|
1407 |
}
|
1408 |
gaia_logger.info(f"Submitting {len(answers_for_api_submission)} answers for '{username}' to API...")
|
1409 |
gaia_logger.debug(f"API Submission Payload Sample: {json.dumps(submission_payload_for_api)[:500]}")
|
1410 |
|
1411 |
try:
|
1412 |
+
response = requests.post(submit_url, json=submission_payload_for_api, timeout=60);
|
1413 |
response.raise_for_status()
|
1414 |
result_data = response.json()
|
1415 |
status = (f"Submission Successful!\nUser: {result_data.get('username')}\nScore: {result_data.get('score','N/A')}% "
|
|
|
1421 |
return f"Submission Failed: {err_detail}", pd.DataFrame(results_log_for_gradio)
|
1422 |
except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
|
1423 |
|
1424 |
+
with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
|
1425 |
+
gr.Markdown("# GAIA Level 1 Agent") # Updated Title
|
1426 |
gr.Markdown(
|
1427 |
"""
|
1428 |
**Instructions:**
|
1429 |
+
1. Ensure you are logged in via the Hugging Face Login button below.
|
1430 |
+
2. Click 'Run Evaluation & Submit All Answers' to process all questions from the GAIA benchmark and submit them.
|
1431 |
---
|
1432 |
+
This agent utilizes Retrieval-Augmented Generation (RAG) with multiple search providers, advanced file processing (CSV, JSON, Excel, PDF, Audio Transcription), and experimental video analysis capabilities (bird species identification/counting in YouTube videos) via Hugging Face Transformers. Answers are formulated by a Large Language Model (Google Gemini).
|
1433 |
"""
|
1434 |
)
|
1435 |
gr.LoginButton()
|
1436 |
run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
|
1437 |
status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
|
1438 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, max_rows=20, height=500) # Added max_rows and height
|
1439 |
run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
|
1440 |
|
1441 |
+
if __name__ == "__main__":
|
1442 |
+
print("\n" + "-"*30 + " GAIA Level 1 Agent - RAG, FileProc, Video Analysis " + "-"*30)
|
1443 |
required_env = {
|
1444 |
+
"GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
|
1445 |
+
"GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
|
1446 |
+
"GOOGLE_CSE_ID": GOOGLE_CUSTOM_SEARCH_CSE_ID,
|
1447 |
"TAVILY_API_KEY": TAVILY_API_KEY,
|
1448 |
}
|
1449 |
missing_keys = [k for k, v in required_env.items() if not v]
|
1450 |
for k, v in required_env.items(): print(f"✅ {k} found." if v else f"⚠️ WARNING: {k} not set.")
|
1451 |
+
|
|
|
1452 |
libraries_to_check = [
|
1453 |
+
("transformers", hf_transformers_pipeline), ("torch", torch),
|
1454 |
("librosa", librosa), ("openpyxl", openpyxl), ("pdfplumber", pdfplumber),
|
1455 |
("yt_dlp", yt_dlp), ("cv2 (opencv-python)", cv2), ("BeautifulSoup", BeautifulSoup),
|
1456 |
("duckduckgo_search", DDGS), ("googleapiclient", build_google_search_service),
|
|
|
1461 |
|
1462 |
if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
|
1463 |
else: print("\n--- All major API Key Environment Variables found. ---")
|
1464 |
+
|
1465 |
+
# Log the Gemini RPM limit being used
|
1466 |
+
gemini_rpm = os.getenv("GEMINI_RPM_LIMIT", "10 (defaulted)")
|
1467 |
+
print(f"--- Using GEMINI_RPM_LIMIT: {gemini_rpm} (Ensure this matches your Gemini API plan limits) ---")
|
1468 |
+
|
1469 |
+
|
1470 |
print("-"*(60 + len(" GAIA Level 1 Agent - RAG, FileProc, Video Analysis ")) + "\n")
|
1471 |
demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)
|