ChillThrills commited on
Commit
521480c
·
1 Parent(s): 1098883

gemini-2.5-flash-preview-05-20

Browse files
Files changed (1) hide show
  1. app.py +59 -108
app.py CHANGED
@@ -15,7 +15,7 @@ from abc import ABC, abstractmethod
15
  from concurrent.futures import ThreadPoolExecutor, as_completed
16
  from concurrent.futures import TimeoutError as FuturesTimeoutError
17
  from collections import defaultdict
18
- import tempfile # Added for robust temporary directory management
19
 
20
  try:
21
  import google.generativeai as genai
@@ -23,10 +23,10 @@ try:
23
  except ImportError:
24
  genai = None
25
  GenerationConfig = None
26
- HarmCategory = None # Added for safety settings/finish reason details
27
- HarmBlockThreshold = None # Added for safety settings
28
- FinishReason = None # Added for checking candidate finish reason
29
- HarmProbability = None # Added for checking safety ratings probability
30
  print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
31
 
32
  try:
@@ -167,9 +167,8 @@ def _get_video_object_detector():
167
  global video_object_detector_pipeline, VIDEO_ANALYSIS_DEVICE
168
  if video_object_detector_pipeline is None and hf_transformers_pipeline and torch:
169
  try:
170
- # Simplified device selection, consistent with FileProcessor's ASR
171
  device_id = 0 if torch.cuda.is_available() else -1
172
- if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id # Set global if not user-overridden
173
 
174
  target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
175
 
@@ -184,7 +183,7 @@ def _get_video_object_detector():
184
  return None
185
  return video_object_detector_pipeline
186
 
187
- def _get_video_vqa_pipeline(): # Renamed and changed to load VQA
188
  global video_vqa_pipeline, VIDEO_ANALYSIS_DEVICE
189
  if video_vqa_pipeline is None and hf_transformers_pipeline and torch:
190
  try:
@@ -194,8 +193,8 @@ def _get_video_vqa_pipeline(): # Renamed and changed to load VQA
194
  target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
195
 
196
  video_vqa_pipeline = hf_transformers_pipeline(
197
- "visual-question-answering", # Task is VQA
198
- model=VIDEO_ANALYSIS_VQA_MODEL, # VQA model
199
  device=target_device
200
  )
201
  gaia_logger.info(f"Video VQA pipeline ('{VIDEO_ANALYSIS_VQA_MODEL}') initialized on {'cuda' if target_device==0 else 'cpu'}.")
@@ -371,7 +370,7 @@ class FileProcessor:
371
  if not df_list_for_fallback and xls:
372
  for sheet_name in xls.sheet_names:
373
  df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
374
- elif not xls and not df_list_for_fallback: # Ensure df_list_for_fallback is populated if xls parsing failed early
375
  temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
376
  for sheet_name in temp_xls.sheet_names:
377
  df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
@@ -406,7 +405,7 @@ class FileProcessor:
406
  page_text = page.extract_text()
407
  if page_text:
408
  text_content += page_text + "\n"
409
- if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2: # Check slightly over to allow truncation logic to handle it
410
  break
411
  if not text_content:
412
  return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
@@ -495,13 +494,13 @@ class CacheManager:
495
  self.delete(key)
496
  return None
497
  def set(self, key: Any, value: Any):
498
- if key in self._cache: self.delete(key) # Remove to update timestamp and order
499
  while len(self._cache) >= self.max_size and self._access_order:
500
  old_key = self._access_order.pop(0)
501
- if old_key in self._cache: # Should always be true
502
  del self._cache[old_key]; del self._timestamps[old_key]
503
  try: self._cache[key] = copy.deepcopy(value)
504
- except TypeError: self._cache[key] = value # For non-deep-copyable items
505
  self._timestamps[key] = time.time(); self._access_order.append(key)
506
  def delete(self, key: Any):
507
  if key in self._cache:
@@ -741,13 +740,13 @@ class GeneralRAGPipeline:
741
  max_r_pq = cfg_search.get('default_max_results', 3)
742
  cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
743
  if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
744
- if force_refresh: self.search_client.cache.clear(); # Clears underlying search client cache
745
- if self.enricher and force_refresh: self.enricher.cache.clear() # Clear enricher cache if force_refresh
746
  all_res, res_proc = [], ResultProcessor(self.config)
747
  staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
748
  for stage, qs_in_stage in staged_qs.items():
749
  for query_s, cat in qs_in_stage:
750
- if len(all_res) >= total_lim * 2: break # Fetch more initially to allow for better selection
751
  s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
752
  all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
753
  all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
@@ -769,20 +768,11 @@ class GaiaLevel1Agent:
769
  try:
770
  genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
771
  model_name = 'gemini-2.5-flash-preview-05-20'
772
-
773
-
774
  self.llm_model = genai.GenerativeModel(model_name)
775
  gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
776
  except Exception as e:
777
- gaia_logger.error(f"Error initializing Gemini LLM: {e}", exc_info=True)
778
- # Attempt fallback if specific model fails (e.g. not available in region, or name typo)
779
- try:
780
- gaia_logger.info("Attempting fallback to 'gemini-1.0-pro' for LLM.")
781
- self.llm_model = genai.GenerativeModel('gemini-1.0-pro') # A common, generally available model
782
- gaia_logger.info("Gemini LLM ('gemini-1.0-pro') initialized as fallback.")
783
- except Exception as e_fallback:
784
- gaia_logger.error(f"Fallback LLM initialization also failed: {e_fallback}", exc_info=True)
785
-
786
  else:
787
  gaia_logger.warning("Gemini LLM dependencies or API key missing.")
788
 
@@ -798,12 +788,12 @@ class GaiaLevel1Agent:
798
  def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
799
 
800
  file_url = f"{self.api_url}/files/{task_id}"
801
- for attempt in range(2): # Retry once
802
  try:
803
  response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
804
  response.raise_for_status()
805
 
806
- filename = FileProcessor._get_filename_from_url(response.url) # Fallback from URL
807
  content_disposition = response.headers.get('Content-Disposition')
808
  if content_disposition:
809
  header_filename = FileProcessor._get_filename_from_url(content_disposition)
@@ -816,7 +806,7 @@ class GaiaLevel1Agent:
816
  except requests.exceptions.HTTPError as e:
817
  if e.response.status_code == 404:
818
  gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
819
- return None # No point retrying 404
820
  gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
821
  except requests.exceptions.Timeout:
822
  gaia_logger.warning(f"Timeout fetching file {task_id}")
@@ -832,7 +822,6 @@ class GaiaLevel1Agent:
832
 
833
  cleaned = answer_text.lower().strip()
834
 
835
- # Remove common prefixes
836
  prefixes_to_remove = [
837
  "a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
838
  "this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
@@ -842,26 +831,19 @@ class GaiaLevel1Agent:
842
  if cleaned.startswith(prefix):
843
  cleaned = cleaned[len(prefix):]
844
 
845
- # Remove common suffixes
846
  suffixes_to_remove = [" bird", " species"]
847
  for suffix in suffixes_to_remove:
848
  if cleaned.endswith(suffix):
849
  cleaned = cleaned[:-len(suffix)]
850
 
851
- # Remove parenthetical content or descriptive clauses if simple
852
- cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip() # e.g. "robin (american)" -> "robin"
853
- cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip() # e.g. "sparrow, which is small" -> "sparrow"
854
-
855
- # Basic character filtering (allow letters, numbers for things like "Type 2", spaces, hyphens)
856
  cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
857
-
858
- # Normalize whitespace
859
  cleaned = " ".join(cleaned.split())
860
 
861
- # Filter out very generic or uncertain answers post-cleaning
862
  uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird", "no bird", "not a bird"]
863
  if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
864
- return "" # Return empty if too generic or short
865
 
866
  return cleaned
867
 
@@ -895,27 +877,19 @@ class GaiaLevel1Agent:
895
  'quiet': True,
896
  'max_filesize': 75 * 1024 * 1024,
897
  'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
898
- 'merge_output_format': 'mp4', # Encourage mp4 output if merging
899
- # Removed 'postprocessors': [{'key': 'FFmpegExtractAudio', ...}]
900
  }
901
  gaia_logger.info(f"Attempting to download video: {video_url}")
902
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
903
  info_dict = ydl.extract_info(video_url, download=True)
904
- video_file_path = ydl.prepare_filename(info_dict) # Get the final path
905
 
906
- # Check if downloaded file is indeed a video format recognised by OpenCV
907
- # Common video extensions that OpenCV usually handles well.
908
- # This check is made more robust by also trying to open it.
909
  if not video_file_path or not any(video_file_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.avi', '.mkv', '.mov', '.flv']):
910
  gaia_logger.warning(f"Downloaded file '{video_file_path}' might not be a standard video format or download failed to produce one. Will attempt to open.")
911
- # Try to find a plausible video file if the main one looks suspicious
912
  possible_video_files = [f for f in os.listdir(temp_dir) if f.startswith(info_dict.get('id','')) and any(f.lower().endswith(ext) for ext in ['.mp4', '.webm'])]
913
  if possible_video_files:
914
  video_file_path = os.path.join(temp_dir, possible_video_files[0])
915
  gaia_logger.info(f"Using alternative video file from temp_dir: {video_file_path}")
916
- # else: # The cap.isOpened() check below will handle if it's truly unusable
917
- # gaia_logger.error(f"No suitable video file found in temp_dir for {info_dict.get('id','')}")
918
- # return "Video download resulted in a non-video or unusable file."
919
 
920
 
921
  if not video_file_path or not os.path.exists(video_file_path):
@@ -935,9 +909,9 @@ class GaiaLevel1Agent:
935
 
936
  total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
937
  fps = cap.get(cv2.CAP_PROP_FPS)
938
- if not fps or fps <= 0: fps = 25 # Default fps if detection fails or is zero
939
 
940
- frame_interval = max(1, int(fps)) # Process ~1 frame per second
941
 
942
  frames_analyzed_count = 0
943
  current_frame_num = 0
@@ -945,11 +919,11 @@ class GaiaLevel1Agent:
945
  gaia_logger.info(f"Video Info: ~{total_frames_video // fps if fps > 0 else total_frames_video:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
946
 
947
  while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
948
- cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num) # Jump to frame
949
  ret, frame_data = cap.read()
950
  if not ret: break
951
 
952
- timestamp_sec = current_frame_num / fps if fps > 0 else frames_analyzed_count # Fallback timestamp if fps is bad
953
  gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
954
 
955
  try:
@@ -962,11 +936,9 @@ class GaiaLevel1Agent:
962
  detected_objects = detector(pil_image)
963
  bird_crops_this_frame = []
964
  for obj in detected_objects:
965
- # Check label case-insensitively
966
  if obj['label'].lower() == 'bird' and obj['score'] > VIDEO_CONFIDENCE_THRESHOLD_BIRD:
967
  box = obj['box']
968
  xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
969
- # Ensure box coordinates are valid
970
  if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
971
  gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
972
  continue
@@ -991,7 +963,7 @@ class GaiaLevel1Agent:
991
  vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
992
 
993
  raw_vqa_answer_text = ""
994
- vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD # Default
995
 
996
  if isinstance(vqa_answer_list, list) and vqa_answer_list:
997
  raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
@@ -1023,7 +995,6 @@ class GaiaLevel1Agent:
1023
  current_frame_num += frame_interval
1024
  frames_analyzed_count += 1
1025
 
1026
- # cap.release() should be in finally
1027
 
1028
  context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
1029
  f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
@@ -1034,7 +1005,7 @@ class GaiaLevel1Agent:
1034
  except yt_dlp.utils.DownloadError as e:
1035
  gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
1036
  msg_str = str(e)
1037
- clean_msg = msg_str # Default to full message
1038
  if "Unsupported URL" in msg_str: clean_msg = "Unsupported video URL."
1039
  elif "video unavailable" in msg_str.lower(): clean_msg = "Video is unavailable."
1040
  elif "private video" in msg_str.lower(): clean_msg = "Video is private."
@@ -1043,8 +1014,7 @@ class GaiaLevel1Agent:
1043
  clean_msg = "Video download failed due to YouTube restrictions (e.g., sign-in, cookies, or authentication required)."
1044
  elif "HTTP Error 403" in msg_str or "Forbidden" in msg_str : clean_msg = "Access to video denied (Forbidden/403)."
1045
  elif "HTTP Error 404" in msg_str or "Not Found" in msg_str : clean_msg = "Video not found (404)."
1046
- # Keep the message relatively concise for the LLM
1047
- return f"Video download failed: {clean_msg[:250] + '...' if len(clean_msg) > 250 else clean_msg}" # Limit length of detailed message
1048
 
1049
  except Exception as e:
1050
  gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
@@ -1054,7 +1024,7 @@ class GaiaLevel1Agent:
1054
  cap.release()
1055
  gaia_logger.info("Video capture released.")
1056
  if temp_dir_obj:
1057
- temp_dir_path_for_log = temp_dir_obj.name # Store before cleanup for logging
1058
  try:
1059
  temp_dir_obj.cleanup()
1060
  gaia_logger.info(f"Successfully cleaned up temp video directory: {temp_dir_path_for_log}")
@@ -1073,10 +1043,9 @@ class GaiaLevel1Agent:
1073
  reasoning_trace = parts[0].strip()
1074
  model_answer = parts[1].strip()
1075
  else:
1076
- reasoning_trace = llm_text # If sentinel not found, assume whole output is reasoning
1077
  lines = llm_text.strip().split('\n')
1078
- # Try to take the last non-empty line as answer, or a default if all reasoning
1079
- model_answer = "Could not parse answer" # Default if no clear answer found
1080
  for line in reversed(lines):
1081
  if line.strip():
1082
  model_answer = line.strip()
@@ -1089,11 +1058,10 @@ class GaiaLevel1Agent:
1089
  default_model_answer = "Information not available in provided context"
1090
  default_reasoning = "LLM processing failed or context insufficient."
1091
 
1092
- if not self.llm_model or not genai or not GenerationConfig or not FinishReason or not HarmCategory or not HarmBlockThreshold: # Added more checks
1093
  gaia_logger.warning("LLM model (Gemini) or necessary enums/configs not available for answer formulation.")
1094
  reasoning = "LLM model (Gemini) or its configuration components not available for answer formulation."
1095
  answer_val = default_model_answer
1096
- # Provide some context indication even if LLM is down
1097
  if web_context and file_context:
1098
  reasoning += " Context from file and web was found but not processed by LLM."
1099
  elif web_context:
@@ -1125,7 +1093,7 @@ class GaiaLevel1Agent:
1125
  file_header = "\n\nContext from Provided Document:\n---"
1126
  file_footer = "\n---"
1127
  len_web_ctx = len(web_context) if web_context else 0
1128
- max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500 # Buffer
1129
 
1130
  if max_len_for_file > 100 :
1131
  truncated_file_context = file_context[:max_len_for_file]
@@ -1139,11 +1107,10 @@ class GaiaLevel1Agent:
1139
 
1140
  if web_context:
1141
  header_text = "\n\nContext from External Sources (Web/Video):\n---"
1142
- if "Video analysis result:" in web_context and "Source [" not in web_context: # Only video
1143
  header_text = "\n\nContext from Video Analysis:\n---"
1144
- elif "Source [" in web_context and "Video analysis result:" not in web_context: # Only web
1145
  header_text = "\n\nContext from Web Search Results:\n---"
1146
- # If both, the generic "External Sources" is fine.
1147
 
1148
  web_footer = "\n---"
1149
  available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300
@@ -1186,11 +1153,10 @@ class GaiaLevel1Agent:
1186
  return {"model_answer": "LLM Error: No response", "reasoning_trace": "LLM did not provide any response candidates."}
1187
 
1188
  candidate = response.candidates[0]
1189
- # Check candidate's finish_reason
1190
  if candidate.finish_reason != FinishReason.STOP:
1191
  reason_name = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
1192
  safety_ratings_str = ""
1193
- if candidate.safety_ratings: # Check if safety_ratings exist
1194
  relevant_ratings = [
1195
  f"{sr.category.name.split('_')[-1] if hasattr(sr.category, 'name') else 'CAT?'}: {(sr.probability.name if hasattr(sr.probability, 'name') else 'PROB?')}"
1196
  for sr in candidate.safety_ratings if (hasattr(sr,'blocked') and sr.blocked) or (hasattr(sr,'probability') and HarmProbability and sr.probability.value >= HarmProbability.MEDIUM.value)
@@ -1198,7 +1164,7 @@ class GaiaLevel1Agent:
1198
  if relevant_ratings: safety_ratings_str = "; ".join(relevant_ratings)
1199
 
1200
  gaia_logger.warning(f"Gemini candidate did not finish successfully. Reason: {reason_name}. Safety Ratings: {safety_ratings_str if safety_ratings_str else 'N/A'}")
1201
-
1202
  user_message = "LLM Error: Response incomplete"
1203
  if candidate.finish_reason == FinishReason.SAFETY: user_message = "LLM Error: Response blocked for safety"
1204
  elif candidate.finish_reason == FinishReason.MAX_TOKENS: user_message = "LLM Error: Response truncated (max tokens)"
@@ -1209,19 +1175,19 @@ class GaiaLevel1Agent:
1209
  "reasoning_trace": f"LLM generation stopped. Reason: {reason_name}. " + (f"Details: {safety_ratings_str}" if safety_ratings_str else "")
1210
  }
1211
 
1212
- llm_answer_text = response.text # Safe to access now
1213
  gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
1214
  return self._parse_llm_output(llm_answer_text)
1215
 
1216
  except ValueError as ve:
1217
  if "finish_reason" in str(ve).lower() and ("part" in str(ve).lower() or "candidate" in str(ve).lower()):
1218
- gaia_logger.error(f"ValueError accessing Gemini response.text, likely due to non-STOP finish_reason not caught explicitly: {ve}", exc_info=False) # exc_info=False as it's handled
1219
  fr_from_ex = "Unknown (from ValueError)"
1220
- match_fr = re.search(r"finish_reason.*?is\s*(\w+)", str(ve), re.IGNORECASE) # Try to get name or number
1221
  if match_fr: fr_from_ex = match_fr.group(1)
1222
  return {"model_answer": "LLM Error: Invalid response state",
1223
  "reasoning_trace": f"Could not parse LLM response. Finish reason possibly {fr_from_ex}. Details: {str(ve)[:150]}"}
1224
- else: # Other ValueErrors
1225
  gaia_logger.error(f"ValueError during Gemini call or processing: {ve}", exc_info=True)
1226
  return {"model_answer": "LLM Error: Value error", "reasoning_trace": f"A value error occurred: {str(ve)}"}
1227
  except Exception as e:
@@ -1240,7 +1206,6 @@ class GaiaLevel1Agent:
1240
  elif "InternalServerError" in error_type_name or "500" in error_message :
1241
  answer_val = "LLM server error"
1242
  reasoning = "Error: LLM experienced an internal server error."
1243
- # Add specific handling for google.api_core.exceptions.ServiceUnavailable (503) if it occurs
1244
  elif "ServiceUnavailable" in error_type_name or "503" in error_message:
1245
  answer_val = "LLM service unavailable"
1246
  reasoning = "Error: LLM service is temporarily unavailable (503)."
@@ -1253,15 +1218,13 @@ class GaiaLevel1Agent:
1253
  q_lower = question.lower().strip()
1254
 
1255
  video_context_str: Optional[str] = None
1256
- # Regex for YouTube URLs (watch, short, and youtu.be forms)
1257
  video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)[\w\-=&%]+)", question)
1258
 
1259
 
1260
- video_keywords = ["video", "youtube.com", "youtu.be", "clip", "recording"] # Broader keywords
1261
  species_keywords = ["species", "bird", "birds", "type of bird", "kinds of bird", "different birds"]
1262
  action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera", "identify", "list"]
1263
 
1264
- # Trigger video analysis if a URL is found AND relevant keywords are present
1265
  if video_url_match and \
1266
  any(vk in q_lower for vk in video_keywords) and \
1267
  any(sk in q_lower for sk in species_keywords) and \
@@ -1286,27 +1249,22 @@ class GaiaLevel1Agent:
1286
  web_rag_ctx_str: Optional[str] = None
1287
  needs_web_rag = True
1288
 
1289
- # Logic to decide if RAG web search is needed
1290
  if video_context_str:
1291
- # If video analysis seems to directly answer a counting/identification question from video
1292
  if "Video analysis result:" in video_context_str and not "download failed" in video_context_str.lower() and not "skipped" in video_context_str.lower():
1293
  if (("count" in q_lower or "how many" in q_lower or "number of" in q_lower) and ("simultaneously" in q_lower or "at the same time" in q_lower or "distinct" in q_lower)) and any(sk_q in q_lower for sk_q in species_keywords):
1294
- needs_web_rag = False # Video analysis likely sufficient
1295
  gaia_logger.info("Video context seems primary for a specific video counting question; web RAG may be skipped.")
1296
 
1297
 
1298
- if file_ctx_str and len(file_ctx_str) > 100 and not video_context_str: # Only consider file if no video context
1299
- # Keywords suggesting the answer is likely within the document
1300
  doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content", "from this file", "in this data"]
1301
- # Keywords suggesting external info is needed despite file
1302
  web_still_needed_kws = ["what is the current", "latest news on", "public opinion of", "search for more about", "compare this to", "what happened after"]
1303
 
1304
  if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
1305
  needs_web_rag = False
1306
  gaia_logger.info("File context seems primary; web RAG may be skipped.")
1307
- # Less strong heuristic: if it's a statement or simple file query not asking for external comparison/update
1308
  elif not any(kw in q_lower for kw in web_still_needed_kws) and not question.strip().endswith("?"):
1309
- if not any(qk in q_lower for qk in ["why is", "how does", "explain the impact of", "what if"]): # Questions often needing broader context
1310
  needs_web_rag = False
1311
  gaia_logger.info("File context seems sufficient for non-complex query; web RAG may be skipped.")
1312
 
@@ -1317,8 +1275,6 @@ class GaiaLevel1Agent:
1317
 
1318
  if needs_web_rag:
1319
  search_q = question.replace("?", "").strip()
1320
- # If video context failed, the question might still be about the video's topic, so RAG is useful.
1321
- # If file context is present but RAG is still needed, LLM will have to reconcile.
1322
  rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
1323
  if rag_res:
1324
  snippets = []
@@ -1326,7 +1282,7 @@ class GaiaLevel1Agent:
1326
  title = res_item.get('title','N/A')
1327
  body = res_item.get('body','')
1328
  href = res_item.get('href','#')
1329
- provider_info = res_item.get('query_tag','WebSearch') # Can be refined if RAG provides more source details
1330
  source_type = "EnrichedContent" if res_item.get('enriched') else "Snippet"
1331
  body_preview = (body[:1500] + "...") if len(body) > 1500 else body
1332
  snippets.append(f"Source [{i+1} - {provider_info}]: {title}\nURL: {href}\n{source_type}: {body_preview}\n---")
@@ -1336,10 +1292,8 @@ class GaiaLevel1Agent:
1336
 
1337
  final_llm_external_context_parts = []
1338
  if video_context_str:
1339
- final_llm_external_context_parts.append(f"{video_context_str}") # Header already in video_context_str
1340
  if web_rag_ctx_str:
1341
- # No separate header needed if video_context_str already has "Video Analysis Context:"
1342
- # and web_rag_ctx_str is structured with "Source [n]:"
1343
  final_llm_external_context_parts.append(f"{web_rag_ctx_str}")
1344
 
1345
  final_llm_external_context = "\n\n---\n\n".join(final_llm_external_context_parts).strip() if final_llm_external_context_parts else None
@@ -1364,9 +1318,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1364
  except Exception as e: return f"Error fetching questions: {e}", None
1365
 
1366
  results_log_for_gradio, answers_for_api_submission = [], []
1367
- # Use a more conservative default RPM if not set, matching free tier common limits.
1368
- GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "10")) # Default to 10 RPM if not set, as per common free tier
1369
- # Add a small buffer to sleep time
1370
  sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
1371
  gaia_logger.info(f"Using Gemini RPM limit: {GEMINI_RPM_LIMIT}, LLM call sleep: {sleep_llm:.2f}s")
1372
 
@@ -1422,7 +1374,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
1422
  except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
1423
 
1424
  with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
1425
- gr.Markdown("# GAIA Agent")
1426
  gr.Markdown(
1427
  """
1428
  **Instructions:**
@@ -1435,11 +1387,11 @@ with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
1435
  gr.LoginButton()
1436
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
1437
  status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
1438
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, max_rows=20, height=500) # Added max_rows and height
1439
  run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
1440
 
1441
  if __name__ == "__main__":
1442
- print("\n" + "-"*30 + " GAIA Agent - RAG, FileProc, Video Analysis " + "-"*30)
1443
  required_env = {
1444
  "GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
1445
  "GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
@@ -1461,11 +1413,10 @@ if __name__ == "__main__":
1461
 
1462
  if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
1463
  else: print("\n--- All major API Key Environment Variables found. ---")
1464
-
1465
- # Log the Gemini RPM limit being used
1466
  gemini_rpm = os.getenv("GEMINI_RPM_LIMIT", "10 (defaulted)")
1467
  print(f"--- Using GEMINI_RPM_LIMIT: {gemini_rpm} (Ensure this matches your Gemini API plan limits) ---")
1468
 
1469
 
1470
- print("-"*(60 + len(" GAIA Agent - RAG, FileProc, Video Analysis ")) + "\n")
1471
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)
 
15
  from concurrent.futures import ThreadPoolExecutor, as_completed
16
  from concurrent.futures import TimeoutError as FuturesTimeoutError
17
  from collections import defaultdict
18
+ import tempfile
19
 
20
  try:
21
  import google.generativeai as genai
 
23
  except ImportError:
24
  genai = None
25
  GenerationConfig = None
26
+ HarmCategory = None
27
+ HarmBlockThreshold = None
28
+ FinishReason = None
29
+ HarmProbability = None
30
  print("WARNING: google-generativeai library not found. Install with: pip install google-generativeai")
31
 
32
  try:
 
167
  global video_object_detector_pipeline, VIDEO_ANALYSIS_DEVICE
168
  if video_object_detector_pipeline is None and hf_transformers_pipeline and torch:
169
  try:
 
170
  device_id = 0 if torch.cuda.is_available() else -1
171
+ if VIDEO_ANALYSIS_DEVICE == -1 : VIDEO_ANALYSIS_DEVICE = device_id
172
 
173
  target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
174
 
 
183
  return None
184
  return video_object_detector_pipeline
185
 
186
+ def _get_video_vqa_pipeline():
187
  global video_vqa_pipeline, VIDEO_ANALYSIS_DEVICE
188
  if video_vqa_pipeline is None and hf_transformers_pipeline and torch:
189
  try:
 
193
  target_device = VIDEO_ANALYSIS_DEVICE if VIDEO_ANALYSIS_DEVICE != -1 else device_id
194
 
195
  video_vqa_pipeline = hf_transformers_pipeline(
196
+ "visual-question-answering",
197
+ model=VIDEO_ANALYSIS_VQA_MODEL,
198
  device=target_device
199
  )
200
  gaia_logger.info(f"Video VQA pipeline ('{VIDEO_ANALYSIS_VQA_MODEL}') initialized on {'cuda' if target_device==0 else 'cpu'}.")
 
370
  if not df_list_for_fallback and xls:
371
  for sheet_name in xls.sheet_names:
372
  df_list_for_fallback.append((sheet_name, xls.parse(sheet_name)))
373
+ elif not xls and not df_list_for_fallback:
374
  temp_xls = pd.ExcelFile(io.BytesIO(content), engine='openpyxl')
375
  for sheet_name in temp_xls.sheet_names:
376
  df_list_for_fallback.append((sheet_name, temp_xls.parse(sheet_name)))
 
405
  page_text = page.extract_text()
406
  if page_text:
407
  text_content += page_text + "\n"
408
+ if len(text_content) > MAX_FILE_CONTEXT_LENGTH * 1.2:
409
  break
410
  if not text_content:
411
  return f"PDF Document: '{filename}'. No text could be extracted or PDF is empty."
 
494
  self.delete(key)
495
  return None
496
  def set(self, key: Any, value: Any):
497
+ if key in self._cache: self.delete(key)
498
  while len(self._cache) >= self.max_size and self._access_order:
499
  old_key = self._access_order.pop(0)
500
+ if old_key in self._cache:
501
  del self._cache[old_key]; del self._timestamps[old_key]
502
  try: self._cache[key] = copy.deepcopy(value)
503
+ except TypeError: self._cache[key] = value
504
  self._timestamps[key] = time.time(); self._access_order.append(key)
505
  def delete(self, key: Any):
506
  if key in self._cache:
 
740
  max_r_pq = cfg_search.get('default_max_results', 3)
741
  cache_key = (q, max_r_pq, total_lim, enrich_en, enrich_cnt)
742
  if not force_refresh and (cached := self.pipeline_cache.get(cache_key)) is not None: return cached
743
+ if force_refresh: self.search_client.cache.clear();
744
+ if self.enricher and force_refresh: self.enricher.cache.clear()
745
  all_res, res_proc = [], ResultProcessor(self.config)
746
  staged_qs = GaiaQueryBuilder(q, self.config).get_queries()
747
  for stage, qs_in_stage in staged_qs.items():
748
  for query_s, cat in qs_in_stage:
749
+ if len(all_res) >= total_lim * 2: break
750
  s_res = self.search_client.search(query_s, max_results=max_r_pq, force_refresh=force_refresh)
751
  all_res.extend(res_proc.process_batch(s_res or [], query_s, initial_cat=cat))
752
  all_res.sort(key=lambda x: x.get('combined_score', 0), reverse=True)
 
768
  try:
769
  genai.configure(api_key=GOOGLE_GEMINI_API_KEY)
770
  model_name = 'gemini-2.5-flash-preview-05-20'
 
 
771
  self.llm_model = genai.GenerativeModel(model_name)
772
  gaia_logger.info(f"Gemini LLM ('{model_name}') initialized.")
773
  except Exception as e:
774
+ gaia_logger.error(f"Error initializing Gemini LLM ('{model_name}'): {e}", exc_info=True)
775
+ # No fallback, as per user instruction.
 
 
 
 
 
 
 
776
  else:
777
  gaia_logger.warning("Gemini LLM dependencies or API key missing.")
778
 
 
788
  def _fetch_and_process_file_content(self, task_id: str) -> Optional[str]:
789
 
790
  file_url = f"{self.api_url}/files/{task_id}"
791
+ for attempt in range(2):
792
  try:
793
  response = requests.get(file_url, timeout=AGENT_DEFAULT_TIMEOUT)
794
  response.raise_for_status()
795
 
796
+ filename = FileProcessor._get_filename_from_url(response.url)
797
  content_disposition = response.headers.get('Content-Disposition')
798
  if content_disposition:
799
  header_filename = FileProcessor._get_filename_from_url(content_disposition)
 
806
  except requests.exceptions.HTTPError as e:
807
  if e.response.status_code == 404:
808
  gaia_logger.warning(f"File not found for task {task_id}: {file_url}")
809
+ return None
810
  gaia_logger.warning(f"HTTP error fetching file {task_id}: {e}")
811
  except requests.exceptions.Timeout:
812
  gaia_logger.warning(f"Timeout fetching file {task_id}")
 
822
 
823
  cleaned = answer_text.lower().strip()
824
 
 
825
  prefixes_to_remove = [
826
  "a type of ", "a variety of ", "it's a ", "it is a ", "an ", "a ", "the ",
827
  "this is a ", "this bird is a ", "it appears to be a ", "looks like a ",
 
831
  if cleaned.startswith(prefix):
832
  cleaned = cleaned[len(prefix):]
833
 
 
834
  suffixes_to_remove = [" bird", " species"]
835
  for suffix in suffixes_to_remove:
836
  if cleaned.endswith(suffix):
837
  cleaned = cleaned[:-len(suffix)]
838
 
839
+ cleaned = re.sub(r"\s*\(.*\)\s*$", "", cleaned).strip()
840
+ cleaned = re.sub(r",\s*which is.*$", "", cleaned).strip()
 
 
 
841
  cleaned = re.sub(r"[^a-z0-9\s\-]", "", cleaned).strip()
 
 
842
  cleaned = " ".join(cleaned.split())
843
 
 
844
  uncertain_terms = ["unknown", "not sure", "unclear", "difficult to say", "generic", "common bird", "no bird", "not a bird"]
845
  if any(term in cleaned for term in uncertain_terms) or len(cleaned) < VIDEO_VQA_MIN_ANSWER_LENGTH:
846
+ return ""
847
 
848
  return cleaned
849
 
 
877
  'quiet': True,
878
  'max_filesize': 75 * 1024 * 1024,
879
  'overwrites': True, 'noprogress': True, 'noplaylist': True, 'socket_timeout': 20,
880
+ 'merge_output_format': 'mp4',
 
881
  }
882
  gaia_logger.info(f"Attempting to download video: {video_url}")
883
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
884
  info_dict = ydl.extract_info(video_url, download=True)
885
+ video_file_path = ydl.prepare_filename(info_dict)
886
 
 
 
 
887
  if not video_file_path or not any(video_file_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.avi', '.mkv', '.mov', '.flv']):
888
  gaia_logger.warning(f"Downloaded file '{video_file_path}' might not be a standard video format or download failed to produce one. Will attempt to open.")
 
889
  possible_video_files = [f for f in os.listdir(temp_dir) if f.startswith(info_dict.get('id','')) and any(f.lower().endswith(ext) for ext in ['.mp4', '.webm'])]
890
  if possible_video_files:
891
  video_file_path = os.path.join(temp_dir, possible_video_files[0])
892
  gaia_logger.info(f"Using alternative video file from temp_dir: {video_file_path}")
 
 
 
893
 
894
 
895
  if not video_file_path or not os.path.exists(video_file_path):
 
909
 
910
  total_frames_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
911
  fps = cap.get(cv2.CAP_PROP_FPS)
912
+ if not fps or fps <= 0: fps = 25
913
 
914
+ frame_interval = max(1, int(fps))
915
 
916
  frames_analyzed_count = 0
917
  current_frame_num = 0
 
919
  gaia_logger.info(f"Video Info: ~{total_frames_video // fps if fps > 0 else total_frames_video:.0f}s, {fps:.2f} FPS. Analyzing ~1 frame/sec up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames.")
920
 
921
  while cap.isOpened() and frames_analyzed_count < VIDEO_MAX_FRAMES_TO_PROCESS:
922
+ cap.set(cv2.CAP_PROP_POS_FRAMES, current_frame_num)
923
  ret, frame_data = cap.read()
924
  if not ret: break
925
 
926
+ timestamp_sec = current_frame_num / fps if fps > 0 else frames_analyzed_count
927
  gaia_logger.info(f"Processing frame {current_frame_num} (analyzed {frames_analyzed_count+1}/{VIDEO_MAX_FRAMES_TO_PROCESS}) at ~{timestamp_sec:.1f}s")
928
 
929
  try:
 
936
  detected_objects = detector(pil_image)
937
  bird_crops_this_frame = []
938
  for obj in detected_objects:
 
939
  if obj['label'].lower() == 'bird' and obj['score'] > VIDEO_CONFIDENCE_THRESHOLD_BIRD:
940
  box = obj['box']
941
  xmin, ymin, xmax, ymax = box['xmin'], box['ymin'], box['xmax'], box['ymax']
 
942
  if not (0 <= xmin < xmax <= pil_image.width and 0 <= ymin < ymax <= pil_image.height):
943
  gaia_logger.debug(f"Invalid box for bird: {box}, img size: {pil_image.size}")
944
  continue
 
963
  vqa_answer_list = vqa_model(bird_crop_img, question=vqa_question, top_k=1)
964
 
965
  raw_vqa_answer_text = ""
966
+ vqa_confidence = VIDEO_VQA_CONFIDENCE_THRESHOLD
967
 
968
  if isinstance(vqa_answer_list, list) and vqa_answer_list:
969
  raw_vqa_answer_text = vqa_answer_list[0].get('answer', "")
 
995
  current_frame_num += frame_interval
996
  frames_analyzed_count += 1
997
 
 
998
 
999
  context_str = (f"Video analysis result: The highest number of distinct bird species types inferred simultaneously "
1000
  f"in the analyzed portion of the video (up to {VIDEO_MAX_FRAMES_TO_PROCESS} frames) was {max_simultaneous_species}. "
 
1005
  except yt_dlp.utils.DownloadError as e:
1006
  gaia_logger.error(f"yt-dlp download error for {video_url}: {str(e)}")
1007
  msg_str = str(e)
1008
+ clean_msg = msg_str
1009
  if "Unsupported URL" in msg_str: clean_msg = "Unsupported video URL."
1010
  elif "video unavailable" in msg_str.lower(): clean_msg = "Video is unavailable."
1011
  elif "private video" in msg_str.lower(): clean_msg = "Video is private."
 
1014
  clean_msg = "Video download failed due to YouTube restrictions (e.g., sign-in, cookies, or authentication required)."
1015
  elif "HTTP Error 403" in msg_str or "Forbidden" in msg_str : clean_msg = "Access to video denied (Forbidden/403)."
1016
  elif "HTTP Error 404" in msg_str or "Not Found" in msg_str : clean_msg = "Video not found (404)."
1017
+ return f"Video download failed: {clean_msg[:250] + '...' if len(clean_msg) > 250 else clean_msg}"
 
1018
 
1019
  except Exception as e:
1020
  gaia_logger.error(f"Error during video analysis for {video_url}: {e}", exc_info=True)
 
1024
  cap.release()
1025
  gaia_logger.info("Video capture released.")
1026
  if temp_dir_obj:
1027
+ temp_dir_path_for_log = temp_dir_obj.name
1028
  try:
1029
  temp_dir_obj.cleanup()
1030
  gaia_logger.info(f"Successfully cleaned up temp video directory: {temp_dir_path_for_log}")
 
1043
  reasoning_trace = parts[0].strip()
1044
  model_answer = parts[1].strip()
1045
  else:
1046
+ reasoning_trace = llm_text
1047
  lines = llm_text.strip().split('\n')
1048
+ model_answer = "Could not parse answer"
 
1049
  for line in reversed(lines):
1050
  if line.strip():
1051
  model_answer = line.strip()
 
1058
  default_model_answer = "Information not available in provided context"
1059
  default_reasoning = "LLM processing failed or context insufficient."
1060
 
1061
+ if not self.llm_model or not genai or not GenerationConfig or not FinishReason or not HarmCategory or not HarmBlockThreshold:
1062
  gaia_logger.warning("LLM model (Gemini) or necessary enums/configs not available for answer formulation.")
1063
  reasoning = "LLM model (Gemini) or its configuration components not available for answer formulation."
1064
  answer_val = default_model_answer
 
1065
  if web_context and file_context:
1066
  reasoning += " Context from file and web was found but not processed by LLM."
1067
  elif web_context:
 
1093
  file_header = "\n\nContext from Provided Document:\n---"
1094
  file_footer = "\n---"
1095
  len_web_ctx = len(web_context) if web_context else 0
1096
+ max_len_for_file = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len_web_ctx - len(file_header) - len(file_footer) - 500
1097
 
1098
  if max_len_for_file > 100 :
1099
  truncated_file_context = file_context[:max_len_for_file]
 
1107
 
1108
  if web_context:
1109
  header_text = "\n\nContext from External Sources (Web/Video):\n---"
1110
+ if "Video analysis result:" in web_context and "Source [" not in web_context:
1111
  header_text = "\n\nContext from Video Analysis:\n---"
1112
+ elif "Source [" in web_context and "Video analysis result:" not in web_context:
1113
  header_text = "\n\nContext from Web Search Results:\n---"
 
1114
 
1115
  web_footer = "\n---"
1116
  available_len_for_web = MAX_CONTEXT_LENGTH_LLM - current_prompt_text_len - len(header_text) - len(web_footer) - 300
 
1153
  return {"model_answer": "LLM Error: No response", "reasoning_trace": "LLM did not provide any response candidates."}
1154
 
1155
  candidate = response.candidates[0]
 
1156
  if candidate.finish_reason != FinishReason.STOP:
1157
  reason_name = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
1158
  safety_ratings_str = ""
1159
+ if candidate.safety_ratings:
1160
  relevant_ratings = [
1161
  f"{sr.category.name.split('_')[-1] if hasattr(sr.category, 'name') else 'CAT?'}: {(sr.probability.name if hasattr(sr.probability, 'name') else 'PROB?')}"
1162
  for sr in candidate.safety_ratings if (hasattr(sr,'blocked') and sr.blocked) or (hasattr(sr,'probability') and HarmProbability and sr.probability.value >= HarmProbability.MEDIUM.value)
 
1164
  if relevant_ratings: safety_ratings_str = "; ".join(relevant_ratings)
1165
 
1166
  gaia_logger.warning(f"Gemini candidate did not finish successfully. Reason: {reason_name}. Safety Ratings: {safety_ratings_str if safety_ratings_str else 'N/A'}")
1167
+
1168
  user_message = "LLM Error: Response incomplete"
1169
  if candidate.finish_reason == FinishReason.SAFETY: user_message = "LLM Error: Response blocked for safety"
1170
  elif candidate.finish_reason == FinishReason.MAX_TOKENS: user_message = "LLM Error: Response truncated (max tokens)"
 
1175
  "reasoning_trace": f"LLM generation stopped. Reason: {reason_name}. " + (f"Details: {safety_ratings_str}" if safety_ratings_str else "")
1176
  }
1177
 
1178
+ llm_answer_text = response.text
1179
  gaia_logger.info(f"LLM Raw Full Answer (first 200 chars): {llm_answer_text[:200]}...")
1180
  return self._parse_llm_output(llm_answer_text)
1181
 
1182
  except ValueError as ve:
1183
  if "finish_reason" in str(ve).lower() and ("part" in str(ve).lower() or "candidate" in str(ve).lower()):
1184
+ gaia_logger.error(f"ValueError accessing Gemini response.text, likely due to non-STOP finish_reason not caught explicitly: {ve}", exc_info=False)
1185
  fr_from_ex = "Unknown (from ValueError)"
1186
+ match_fr = re.search(r"finish_reason.*?is\s*(\w+)", str(ve), re.IGNORECASE)
1187
  if match_fr: fr_from_ex = match_fr.group(1)
1188
  return {"model_answer": "LLM Error: Invalid response state",
1189
  "reasoning_trace": f"Could not parse LLM response. Finish reason possibly {fr_from_ex}. Details: {str(ve)[:150]}"}
1190
+ else:
1191
  gaia_logger.error(f"ValueError during Gemini call or processing: {ve}", exc_info=True)
1192
  return {"model_answer": "LLM Error: Value error", "reasoning_trace": f"A value error occurred: {str(ve)}"}
1193
  except Exception as e:
 
1206
  elif "InternalServerError" in error_type_name or "500" in error_message :
1207
  answer_val = "LLM server error"
1208
  reasoning = "Error: LLM experienced an internal server error."
 
1209
  elif "ServiceUnavailable" in error_type_name or "503" in error_message:
1210
  answer_val = "LLM service unavailable"
1211
  reasoning = "Error: LLM service is temporarily unavailable (503)."
 
1218
  q_lower = question.lower().strip()
1219
 
1220
  video_context_str: Optional[str] = None
 
1221
  video_url_match = re.search(r"(https?://(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/)|youtu\.be/)[\w\-=&%]+)", question)
1222
 
1223
 
1224
+ video_keywords = ["video", "youtube.com", "youtu.be", "clip", "recording"]
1225
  species_keywords = ["species", "bird", "birds", "type of bird", "kinds of bird", "different birds"]
1226
  action_keywords = ["count", "how many", "number of", "simultaneously", "at the same time", "on camera", "identify", "list"]
1227
 
 
1228
  if video_url_match and \
1229
  any(vk in q_lower for vk in video_keywords) and \
1230
  any(sk in q_lower for sk in species_keywords) and \
 
1249
  web_rag_ctx_str: Optional[str] = None
1250
  needs_web_rag = True
1251
 
 
1252
  if video_context_str:
 
1253
  if "Video analysis result:" in video_context_str and not "download failed" in video_context_str.lower() and not "skipped" in video_context_str.lower():
1254
  if (("count" in q_lower or "how many" in q_lower or "number of" in q_lower) and ("simultaneously" in q_lower or "at the same time" in q_lower or "distinct" in q_lower)) and any(sk_q in q_lower for sk_q in species_keywords):
1255
+ needs_web_rag = False
1256
  gaia_logger.info("Video context seems primary for a specific video counting question; web RAG may be skipped.")
1257
 
1258
 
1259
+ if file_ctx_str and len(file_ctx_str) > 100 and not video_context_str:
 
1260
  doc_can_answer_kws = ["summarize", "according to the document", "in the provided text", "based on the file content", "from this file", "in this data"]
 
1261
  web_still_needed_kws = ["what is the current", "latest news on", "public opinion of", "search for more about", "compare this to", "what happened after"]
1262
 
1263
  if any(kw in q_lower for kw in doc_can_answer_kws) and not any(kw in q_lower for kw in web_still_needed_kws):
1264
  needs_web_rag = False
1265
  gaia_logger.info("File context seems primary; web RAG may be skipped.")
 
1266
  elif not any(kw in q_lower for kw in web_still_needed_kws) and not question.strip().endswith("?"):
1267
+ if not any(qk in q_lower for qk in ["why is", "how does", "explain the impact of", "what if"]):
1268
  needs_web_rag = False
1269
  gaia_logger.info("File context seems sufficient for non-complex query; web RAG may be skipped.")
1270
 
 
1275
 
1276
  if needs_web_rag:
1277
  search_q = question.replace("?", "").strip()
 
 
1278
  rag_res = self.rag_pipeline.analyze(query=search_q, force_refresh=False)
1279
  if rag_res:
1280
  snippets = []
 
1282
  title = res_item.get('title','N/A')
1283
  body = res_item.get('body','')
1284
  href = res_item.get('href','#')
1285
+ provider_info = res_item.get('query_tag','WebSearch')
1286
  source_type = "EnrichedContent" if res_item.get('enriched') else "Snippet"
1287
  body_preview = (body[:1500] + "...") if len(body) > 1500 else body
1288
  snippets.append(f"Source [{i+1} - {provider_info}]: {title}\nURL: {href}\n{source_type}: {body_preview}\n---")
 
1292
 
1293
  final_llm_external_context_parts = []
1294
  if video_context_str:
1295
+ final_llm_external_context_parts.append(f"{video_context_str}")
1296
  if web_rag_ctx_str:
 
 
1297
  final_llm_external_context_parts.append(f"{web_rag_ctx_str}")
1298
 
1299
  final_llm_external_context = "\n\n---\n\n".join(final_llm_external_context_parts).strip() if final_llm_external_context_parts else None
 
1318
  except Exception as e: return f"Error fetching questions: {e}", None
1319
 
1320
  results_log_for_gradio, answers_for_api_submission = [], []
1321
+ GEMINI_RPM_LIMIT = int(os.getenv("GEMINI_RPM_LIMIT", "10"))
 
 
1322
  sleep_llm = (60.0 / GEMINI_RPM_LIMIT) + 0.5 if GEMINI_RPM_LIMIT > 0 else 0.2
1323
  gaia_logger.info(f"Using Gemini RPM limit: {GEMINI_RPM_LIMIT}, LLM call sleep: {sleep_llm:.2f}s")
1324
 
 
1374
  except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log_for_gradio)
1375
 
1376
  with gr.Blocks(title="GAIA RAG Agent - Advanced") as demo:
1377
+ gr.Markdown("# GAIA Level 1 Agent")
1378
  gr.Markdown(
1379
  """
1380
  **Instructions:**
 
1387
  gr.LoginButton()
1388
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
1389
  status_output = gr.Textbox(label="Status / Submission Result", lines=5, interactive=False)
1390
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=500) # Removed max_rows
1391
  run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
1392
 
1393
  if __name__ == "__main__":
1394
+ print("\n" + "-"*30 + " GAIA Level 1 Agent - RAG, FileProc, Video Analysis " + "-"*30)
1395
  required_env = {
1396
  "GOOGLE_GEMINI_API_KEY": GOOGLE_GEMINI_API_KEY,
1397
  "GOOGLE_API_KEY": GOOGLE_CUSTOM_SEARCH_API_KEY,
 
1413
 
1414
  if missing_keys: print(f"\n--- PLEASE SET MISSING ENV VARS FOR FULL FUNCTIONALITY: {', '.join(missing_keys)} ---\n")
1415
  else: print("\n--- All major API Key Environment Variables found. ---")
1416
+
 
1417
  gemini_rpm = os.getenv("GEMINI_RPM_LIMIT", "10 (defaulted)")
1418
  print(f"--- Using GEMINI_RPM_LIMIT: {gemini_rpm} (Ensure this matches your Gemini API plan limits) ---")
1419
 
1420
 
1421
+ print("-"*(60 + len(" GAIA Level 1 Agent - RAG, FileProc, Video Analysis ")) + "\n")
1422
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=False, share=False)