diff --git "a/gaia_system.py" "b/gaia_system.py" deleted file mode 100644--- "a/gaia_system.py" +++ /dev/null @@ -1,2279 +0,0 @@ -#!/usr/bin/env python3 -""" -๐Ÿš€ GAIA Multi-Agent System - UNIVERSAL MULTIMODAL AI AGENT -Enhanced with comprehensive multimodal capabilities for ANY type of question: -- ๐ŸŽฅ Video Processing & Analysis -- ๐ŸŽต Audio Processing & Speech Recognition -- ๐ŸŽจ Image Generation & Advanced Computer Vision -- ๐Ÿ“Š Data Visualization & Chart Generation -- ๐ŸŽ™๏ธ Speech Synthesis & Voice Generation -- ๐ŸŽฌ Video Generation & Editing -- ๐Ÿงฌ Scientific Computing & Analysis -- ๐Ÿ“ˆ Advanced Analytics & Modeling -""" - -import os -import sys -import re -import json -import time -import random -import logging -import requests -import tempfile -import base64 -import hashlib -import subprocess -from typing import Dict, List, Any, Optional, Tuple, Union -from dataclasses import dataclass -from enum import Enum -from urllib.parse import urlparse, urljoin -import math -import statistics - -# Core AI and Web Libraries -try: - from huggingface_hub import InferenceClient - HF_AVAILABLE = True -except ImportError: - HF_AVAILABLE = False - print("โš ๏ธ huggingface_hub not available. AI features limited.") - -try: - import openai - OPENAI_AVAILABLE = True -except ImportError: - OPENAI_AVAILABLE = False - print("โš ๏ธ OpenAI not available. GPT models unavailable.") - -# Web Scraping -try: - from bs4 import BeautifulSoup - BS4_AVAILABLE = True -except ImportError: - BS4_AVAILABLE = False - print("โš ๏ธ BeautifulSoup not available. Web scraping limited.") - -# Image Processing -try: - from PIL import Image, ImageDraw, ImageFont - PIL_AVAILABLE = True -except ImportError: - PIL_AVAILABLE = False - print("โš ๏ธ Pillow not available. Image processing limited.") - -# Video Processing -try: - import cv2 - CV2_AVAILABLE = True -except ImportError: - CV2_AVAILABLE = False - print("โš ๏ธ OpenCV not available. Video processing unavailable.") - -# Audio Processing -try: - import librosa - import soundfile as sf - AUDIO_AVAILABLE = True -except ImportError: - AUDIO_AVAILABLE = False - print("โš ๏ธ Audio libraries not available. Audio processing unavailable.") - -# Speech Recognition -try: - import speech_recognition as sr - SPEECH_AVAILABLE = True -except ImportError: - SPEECH_AVAILABLE = False - print("โš ๏ธ Speech recognition not available.") - -# Text-to-Speech -try: - import pyttsx3 - TTS_AVAILABLE = True -except ImportError: - TTS_AVAILABLE = False - print("โš ๏ธ Text-to-speech not available.") - -# Data Visualization -try: - import matplotlib.pyplot as plt - import plotly.graph_objects as go - import plotly.express as px - VIZ_AVAILABLE = True - # Optional: seaborn - try: - import seaborn as sns - SEABORN_AVAILABLE = True - except ImportError: - SEABORN_AVAILABLE = False - sns = None -except ImportError: - VIZ_AVAILABLE = False - SEABORN_AVAILABLE = False - plt = None - go = None - px = None - sns = None - print("โš ๏ธ Visualization libraries not available.") - -# Scientific Computing -try: - import numpy as np - import pandas as pd - import scipy.stats as stats - from sklearn.preprocessing import StandardScaler - from sklearn.cluster import KMeans - SCIENCE_AVAILABLE = True -except ImportError: - SCIENCE_AVAILABLE = False - print("โš ๏ธ Scientific computing libraries not available.") - -# Configure logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -class ToolType(Enum): - """๐Ÿ› ๏ธ Universal tool types for any content type""" - # Original tools - WEB_SEARCH = "web_search" - BROWSE_URL = "browse_url" - DOWNLOAD_FILE = "download_file" - READ_PDF = "read_pdf" - ANALYZE_IMAGE = "analyze_image" - CALCULATOR = "calculator" - - # New multimodal tools - PROCESS_VIDEO = "process_video" - ANALYZE_AUDIO = "analyze_audio" - GENERATE_IMAGE = "generate_image" - SYNTHESIZE_SPEECH = "synthesize_speech" - CREATE_VISUALIZATION = "create_visualization" - ANALYZE_DATA = "analyze_data" - GENERATE_VIDEO = "generate_video" - EXTRACT_AUDIO = "extract_audio" - TRANSCRIBE_SPEECH = "transcribe_speech" - DETECT_OBJECTS = "detect_objects" - FACE_RECOGNITION = "face_recognition" - SCIENTIFIC_COMPUTE = "scientific_compute" - -@dataclass -class ToolCall: - tool: ToolType - parameters: Dict[str, Any] - -class UniversalMultimodalToolkit: - """๐ŸŒŸ Universal toolkit for processing ANY type of content""" - - def __init__(self, hf_token: str = None, openai_key: str = None): - self.hf_token = hf_token - self.openai_key = openai_key - self.temp_dir = tempfile.mkdtemp() - - # Initialize specialized clients - self._init_multimodal_clients() - - def _init_multimodal_clients(self): - """Initialize all multimodal AI clients""" - self.clients = {} - - if self.hf_token and HF_AVAILABLE: - # Vision models - self.clients['vision'] = InferenceClient(model="Salesforce/blip-image-captioning-large", token=self.hf_token) - self.clients['image_gen'] = InferenceClient(model="stabilityai/stable-diffusion-xl-base-1.0", token=self.hf_token) - self.clients['object_detection'] = InferenceClient(model="facebook/detr-resnet-50", token=self.hf_token) - - # Audio models - Updated to use provider pattern for speech recognition - self.clients['speech_to_text'] = InferenceClient( - provider="hf-inference", - api_key=self.hf_token, - ) - self.clients['audio_classification'] = InferenceClient(model="facebook/wav2vec2-base-960h", token=self.hf_token) - - # Text generation for multimodal - self.clients['text_gen'] = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct", token=self.hf_token) - - # === VIDEO PROCESSING === - def process_video(self, video_path: str, task: str = "analyze") -> str: - """๐ŸŽฅ Process and analyze video content""" - if not CV2_AVAILABLE: - return "โŒ Video processing unavailable. Install opencv-python." - - try: - logger.info(f"๐ŸŽฅ Processing video: {video_path} | Task: {task}") - - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - return f"โŒ Could not open video: {video_path}" - - # Get video properties - fps = cap.get(cv2.CAP_PROP_FPS) - frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - duration = frame_count / fps if fps > 0 else 0 - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - - video_info = f"Video: {width}x{height}, {fps:.1f} FPS, {duration:.1f}s, {frame_count} frames" - - if task == "extract_frames": - # Extract key frames for analysis - frames_extracted = [] - frame_interval = max(1, frame_count // 10) # Extract 10 frames max - - for i in range(0, frame_count, frame_interval): - cap.set(cv2.CAP_PROP_POS_FRAMES, i) - ret, frame = cap.read() - if ret: - frame_path = os.path.join(self.temp_dir, f"frame_{i}.jpg") - cv2.imwrite(frame_path, frame) - frames_extracted.append(frame_path) - - cap.release() - - # Analyze extracted frames - frame_analyses = [] - for frame_path in frames_extracted[:3]: # Analyze first 3 frames - analysis = self.analyze_image(frame_path, "Describe what you see in this video frame") - frame_analyses.append(analysis) - - return f"{video_info}. Frame analysis: {'; '.join(frame_analyses)}" - - elif task == "motion_detection": - # Simple motion detection - ret, frame1 = cap.read() - if not ret: - cap.release() - return f"{video_info}. Motion detection failed." - - frame1_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) - motion_detected = 0 - - while True: - ret, frame2 = cap.read() - if not ret: - break - - frame2_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) - diff = cv2.absdiff(frame1_gray, frame2_gray) - - if cv2.countNonZero(diff) > 5000: # Threshold for motion - motion_detected += 1 - - frame1_gray = frame2_gray - - cap.release() - motion_percentage = (motion_detected / frame_count) * 100 - - return f"{video_info}. Motion detected in {motion_percentage:.1f}% of frames." - - else: - cap.release() - return f"{video_info}. Basic video analysis complete." - - except Exception as e: - logger.error(f"โŒ Video processing error: {e}") - return f"โŒ Video processing failed: {e}" - - # === AUDIO PROCESSING === - def analyze_audio(self, audio_path: str, task: str = "analyze") -> str: - """๐ŸŽต Analyze audio content""" - if not AUDIO_AVAILABLE: - return "โŒ Audio processing unavailable. Install librosa and soundfile." - - try: - logger.info(f"๐ŸŽต Analyzing audio: {audio_path} | Task: {task}") - - # Load audio - y, sr = librosa.load(audio_path, sr=None) - duration = len(y) / sr - - audio_info = f"Audio: {duration:.1f}s, {sr} Hz, {len(y)} samples" - - if task == "transcribe": - return self.transcribe_speech(audio_path) - elif task == "features": - # Extract audio features - tempo, beats = librosa.beat.beat_track(y=y, sr=sr) - spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0] - spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0] - zero_crossing_rate = librosa.feature.zero_crossing_rate(y)[0] - - features = { - "tempo": float(tempo), - "avg_spectral_centroid": float(np.mean(spectral_centroids)), - "avg_spectral_rolloff": float(np.mean(spectral_rolloff)), - "avg_zero_crossing_rate": float(np.mean(zero_crossing_rate)) - } - - return f"{audio_info}. Features: {json.dumps(features, indent=2)}" - else: - return f"{audio_info}. Basic audio analysis complete." - - except Exception as e: - logger.error(f"โŒ Audio analysis error: {e}") - return f"โŒ Audio analysis failed: {e}" - - def transcribe_speech(self, audio_path: str) -> str: - """๐ŸŽ™๏ธ Convert speech to text using Whisper via HuggingFace Inference API""" - try: - logger.info(f"๐ŸŽ™๏ธ Transcribing speech from: {audio_path}") - - if self.hf_token and HF_AVAILABLE and 'speech_to_text' in self.clients: - # Use Whisper via HuggingFace Inference API with provider pattern - try: - result = self.clients['speech_to_text'].automatic_speech_recognition( - audio_path, - model="openai/whisper-large-v3" - ) - - if isinstance(result, dict) and 'text' in result: - transcription = result['text'].strip() - elif isinstance(result, str): - transcription = result.strip() - else: - transcription = str(result).strip() - - if transcription: - return f"Transcription: {transcription}" - else: - return "โŒ No transcription available" - - except Exception as hf_error: - logger.warning(f"โš ๏ธ HuggingFace speech recognition failed: {hf_error}") - # Fall through to local recognition - - # Fallback to local speech recognition if available - if SPEECH_AVAILABLE: - try: - r = sr.Recognizer() - with sr.AudioFile(audio_path) as source: - audio = r.record(source) - text = r.recognize_google(audio) - return f"Transcription: {text}" - except sr.UnknownValueError: - return "โŒ Could not understand audio" - except sr.RequestError as e: - return f"โŒ Speech recognition error: {e}" - else: - return "โŒ Speech recognition unavailable. Need HuggingFace token or speech_recognition library." - - except Exception as e: - logger.error(f"โŒ Transcription error: {e}") - return f"โŒ Transcription failed: {e}" - - # === IMAGE GENERATION === - def generate_image(self, prompt: str, style: str = "realistic") -> str: - """๐ŸŽจ Generate images from text descriptions""" - try: - logger.info(f"๐ŸŽจ Generating image: {prompt} | Style: {style}") - - if self.hf_token and 'image_gen' in self.clients: - # Use Stable Diffusion via HuggingFace - enhanced_prompt = f"{prompt}, {style} style, high quality, detailed" - - image = self.clients['image_gen'].text_to_image(enhanced_prompt) - - # Save generated image - image_path = os.path.join(self.temp_dir, f"generated_{int(time.time())}.png") - image.save(image_path) - - return f"โœ… Image generated and saved to: {image_path}" - - elif self.openai_key and OPENAI_AVAILABLE: - # Use DALL-E via OpenAI - client = openai.OpenAI(api_key=self.openai_key) - response = client.images.generate( - model="dall-e-3", - prompt=f"{prompt}, {style} style", - size="1024x1024", - quality="standard", - n=1, - ) - - image_url = response.data[0].url - - # Download and save image - img_response = requests.get(image_url) - image_path = os.path.join(self.temp_dir, f"dalle_generated_{int(time.time())}.png") - with open(image_path, 'wb') as f: - f.write(img_response.content) - - return f"โœ… DALL-E image generated and saved to: {image_path}" - else: - return "โŒ Image generation unavailable. Need HuggingFace token or OpenAI key." - - except Exception as e: - logger.error(f"โŒ Image generation error: {e}") - return f"โŒ Image generation failed: {e}" - - # === SPEECH SYNTHESIS === - def synthesize_speech(self, text: str, voice: str = "default") -> str: - """๐ŸŽ™๏ธ Convert text to speech""" - try: - logger.info(f"๐ŸŽ™๏ธ Synthesizing speech: {text[:50]}... | Voice: {voice}") - - if TTS_AVAILABLE: - engine = pyttsx3.init() - - # Set voice properties - voices = engine.getProperty('voices') - if voices and len(voices) > 0: - if voice == "female" and len(voices) > 1: - engine.setProperty('voice', voices[1].id) - else: - engine.setProperty('voice', voices[0].id) - - # Set speech rate and volume - engine.setProperty('rate', 150) - engine.setProperty('volume', 0.9) - - # Generate speech file - speech_path = os.path.join(self.temp_dir, f"speech_{int(time.time())}.wav") - engine.save_to_file(text, speech_path) - engine.runAndWait() - - return f"โœ… Speech synthesized and saved to: {speech_path}" - else: - return "โŒ Text-to-speech unavailable. Install pyttsx3." - - except Exception as e: - logger.error(f"โŒ Speech synthesis error: {e}") - return f"โŒ Speech synthesis failed: {e}" - - # === DATA VISUALIZATION === - def create_visualization(self, data: Dict[str, Any], chart_type: str = "bar") -> str: - """๐Ÿ“Š Create data visualizations and charts""" - try: - logger.info(f"๐Ÿ“Š Creating {chart_type} chart") - - if not VIZ_AVAILABLE: - return "โŒ Visualization unavailable. Install matplotlib, seaborn, and plotly." - - # Prepare data - if isinstance(data, dict) and 'x' in data and 'y' in data: - x_data = data['x'] - y_data = data['y'] - title = data.get('title', 'Data Visualization') - else: - return "โŒ Invalid data format. Need dict with 'x' and 'y' keys." - - # Create visualization - plt.figure(figsize=(10, 6)) - - if chart_type == "bar": - plt.bar(x_data, y_data) - elif chart_type == "line": - plt.plot(x_data, y_data, marker='o') - elif chart_type == "scatter": - plt.scatter(x_data, y_data) - elif chart_type == "pie": - plt.pie(y_data, labels=x_data, autopct='%1.1f%%') - else: - plt.plot(x_data, y_data) - - plt.title(title) - plt.xlabel(data.get('xlabel', 'X')) - plt.ylabel(data.get('ylabel', 'Y')) - plt.grid(True, alpha=0.3) - - # Save chart - chart_path = os.path.join(self.temp_dir, f"chart_{int(time.time())}.png") - plt.savefig(chart_path, dpi=300, bbox_inches='tight') - plt.close() - - return f"โœ… {chart_type.title()} chart created and saved to: {chart_path}" - - except Exception as e: - logger.error(f"โŒ Visualization error: {e}") - return f"โŒ Visualization failed: {e}" - - # === SCIENTIFIC COMPUTING === - def scientific_compute(self, operation: str, data: Dict[str, Any]) -> str: - """๐Ÿงฌ Perform scientific computations and analysis""" - try: - if not SCIENCE_AVAILABLE: - return "โŒ Scientific computing unavailable. Install numpy, pandas, scipy, sklearn." - - logger.info(f"๐Ÿงฌ Scientific computation: {operation}") - - if operation == "statistics": - values = data.get('values', []) - if not values: - return "โŒ No values provided for statistics" - - result = { - "mean": float(np.mean(values)), - "median": float(np.median(values)), - "std": float(np.std(values)), - "min": float(np.min(values)), - "max": float(np.max(values)), - "variance": float(np.var(values)), - "skewness": float(stats.skew(values)), - "kurtosis": float(stats.kurtosis(values)) - } - - return f"Statistics: {json.dumps(result, indent=2)}" - - elif operation == "correlation": - x = data.get('x', []) - y = data.get('y', []) - if not x or not y or len(x) != len(y): - return "โŒ Need equal length x and y arrays for correlation" - - correlation = float(np.corrcoef(x, y)[0, 1]) - p_value = float(stats.pearsonr(x, y)[1]) - - return f"Correlation: {correlation:.4f}, P-value: {p_value:.4f}" - - elif operation == "clustering": - data_points = data.get('data', []) - n_clusters = data.get('clusters', 3) - - if not data_points: - return "โŒ No data points provided for clustering" - - # Perform K-means clustering - scaler = StandardScaler() - scaled_data = scaler.fit_transform(data_points) - - kmeans = KMeans(n_clusters=n_clusters, random_state=42) - labels = kmeans.fit_predict(scaled_data) - - return f"Clustering complete. Labels: {labels.tolist()}" - - else: - return f"โŒ Unknown scientific operation: {operation}" - - except Exception as e: - logger.error(f"โŒ Scientific computation error: {e}") - return f"โŒ Scientific computation failed: {e}" - - # === OBJECT DETECTION === - def detect_objects(self, image_path: str) -> str: - """๐Ÿ” Detect and identify objects in images""" - try: - logger.info(f"๐Ÿ” Detecting objects in: {image_path}") - - if self.hf_token and 'object_detection' in self.clients: - with open(image_path, 'rb') as img_file: - result = self.clients['object_detection'].object_detection(img_file.read()) - - if result: - objects = [] - for detection in result: - label = detection.get('label', 'unknown') - score = detection.get('score', 0) - objects.append(f"{label} ({score:.2f})") - - return f"Objects detected: {', '.join(objects)}" - else: - return "No objects detected" - else: - return "โŒ Object detection unavailable. Need HuggingFace token." - - except Exception as e: - logger.error(f"โŒ Object detection error: {e}") - return f"โŒ Object detection failed: {e}" - - # Enhanced existing methods - def web_search(self, query: str, num_results: int = 5) -> str: - """๐Ÿ” Enhanced web search with comprehensive crawling and browsing""" - try: - logger.info(f"๐Ÿ” Web search: {query}") - - # Enhanced DuckDuckGo search with better result extraction - search_url = f"https://duckduckgo.com/html/?q={requests.utils.quote(query)}" - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' - } - - response = requests.get(search_url, headers=headers, timeout=15) - response.raise_for_status() - - if not BS4_AVAILABLE: - return f"โš ๏ธ Search completed but parsing limited. Raw response length: {len(response.text)}" - - soup = BeautifulSoup(response.text, 'html.parser') - results = [] - - # Enhanced result extraction with multiple patterns - result_selectors = [ - 'div.result', - 'div[data-result-index]', - 'article', - 'li.result' - ] - - for selector in result_selectors: - search_results = soup.select(selector)[:num_results] - if search_results: - break - else: - search_results = [] - - for result in search_results: - # Extract title - title_elem = (result.find('a', class_='result__a') or - result.find('h2') or - result.find('h3') or - result.find('a')) - - # Extract snippet - snippet_elem = (result.find('a', class_='result__snippet') or - result.find('span', class_='result__snippet') or - result.find('p')) - - if title_elem: - title = title_elem.get_text(strip=True) - url = title_elem.get('href', '') - snippet = snippet_elem.get_text(strip=True) if snippet_elem else "" - - # Clean and format URL - if url and not url.startswith('http'): - if url.startswith('//'): - url = 'https:' + url - elif url.startswith('/'): - url = 'https://duckduckgo.com' + url - - results.append({ - 'title': title, - 'url': url, - 'snippet': snippet - }) - - if results: - # Format results for AI consumption - formatted_results = [] - for i, result in enumerate(results, 1): - formatted_results.append( - f"{i}. {result['title']}\n" - f" {result['snippet']}\n" - f" URL: {result['url']}" - ) - - return "\n\n".join(formatted_results) - else: - # Fallback: Try alternative search approach - try: - alt_url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}" - alt_response = requests.get(alt_url, headers=headers, timeout=10) - if alt_response.status_code == 200: - return f"Search completed for '{query}' - found {len(alt_response.text)} characters of content" - except: - pass - - return f"๐Ÿ” No results found for '{query}'" - - except Exception as e: - logger.error(f"โŒ Web search error: {e}") - return f"โŒ Web search failed: {e}" - - def browse_url(self, url: str) -> str: - """๐ŸŒ Enhanced web browsing with content extraction""" - try: - logger.info(f"๐ŸŒ Browsing URL: {url}") - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate', - 'Connection': 'keep-alive' - } - - response = requests.get(url, headers=headers, timeout=15, allow_redirects=True) - response.raise_for_status() - - if not BS4_AVAILABLE: - return f"โš ๏ธ URL accessed but content parsing limited. Content length: {len(response.text)}" - - soup = BeautifulSoup(response.text, 'html.parser') - - # Remove script and style elements - for script in soup(["script", "style", "nav", "footer", "header"]): - script.decompose() - - # Extract main content - content_selectors = [ - 'main', - 'article', - 'div[role="main"]', - 'div.content', - 'div.main-content', - 'div.post-content', - 'div.entry-content', - 'div.article-body', - 'section' - ] - - main_content = None - for selector in content_selectors: - main_content = soup.select_one(selector) - if main_content: - break - - if not main_content: - main_content = soup.find('body') or soup - - # Extract text content - text_content = main_content.get_text(separator=' ', strip=True) - - # Clean up the text - lines = text_content.split('\n') - cleaned_lines = [] - for line in lines: - line = line.strip() - if line and len(line) > 3: # Filter out very short lines - cleaned_lines.append(line) - - content = '\n'.join(cleaned_lines) - - # Truncate if too long (keep first 3000 characters) - if len(content) > 3000: - content = content[:3000] + "... [content truncated]" - - return f"๐Ÿ“„ Content from {url}:\n\n{content}" - - except Exception as e: - logger.error(f"โŒ URL browsing error: {e}") - return f"โŒ Failed to browse {url}: {e}" - - def download_file(self, url: str, task_id: str = None) -> str: - """๐Ÿ“ฅ Download files from URLs or GAIA API""" - try: - logger.info(f"๐Ÿ“ฅ Downloading file from: {url}") - - # Handle GAIA API task file downloads - if task_id and not url: - gaia_url = f"https://huggingface.co/datasets/gaia-benchmark/GAIA/raw/main/2023/validation/{task_id}" - url = gaia_url - - # Set up headers - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' - } - - # Download the file - response = requests.get(url, headers=headers, timeout=30, stream=True) - response.raise_for_status() - - # Determine file extension - content_type = response.headers.get('content-type', '').lower() - if 'pdf' in content_type: - extension = '.pdf' - elif 'image' in content_type: - if 'jpeg' in content_type or 'jpg' in content_type: - extension = '.jpg' - elif 'png' in content_type: - extension = '.png' - else: - extension = '.img' - elif 'text' in content_type: - extension = '.txt' - else: - # Try to extract from URL - parsed_url = urlparse(url) - path = parsed_url.path - if '.' in path: - extension = '.' + path.split('.')[-1] - else: - extension = '.bin' - - # Save to temp directory - filename = f"downloaded_file_{task_id or 'temp'}{extension}" - filepath = os.path.join(self.temp_dir, filename) - - with open(filepath, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - - logger.info(f"๐Ÿ“ฅ File downloaded to: {filepath}") - return filepath - - except Exception as e: - logger.error(f"โŒ File download error: {e}") - return f"โŒ Download failed: {e}" - - def read_pdf(self, file_path: str) -> str: - """๐Ÿ“„ Read and extract text from PDF files""" - try: - logger.info(f"๐Ÿ“„ Reading PDF: {file_path}") - - # Try importing PyPDF2 - try: - import PyPDF2 - PDF_AVAILABLE = True - except ImportError: - PDF_AVAILABLE = False - - if not PDF_AVAILABLE: - return "โŒ PDF reading unavailable. Install PyPDF2." - - with open(file_path, 'rb') as file: - pdf_reader = PyPDF2.PdfReader(file) - text_content = [] - - for page_num, page in enumerate(pdf_reader.pages): - try: - text = page.extract_text() - if text.strip(): - text_content.append(f"[Page {page_num + 1}]\n{text}") - except Exception as page_error: - logger.warning(f"โš ๏ธ Error reading page {page_num + 1}: {page_error}") - text_content.append(f"[Page {page_num + 1}] - Error reading page") - - full_text = "\n\n".join(text_content) - - # Truncate if too long - if len(full_text) > 5000: - full_text = full_text[:5000] + "... [content truncated]" - - return full_text - - except Exception as e: - logger.error(f"โŒ PDF reading error: {e}") - return f"โŒ Failed to read PDF: {e}" - - def calculator(self, expression: str) -> str: - """๐Ÿงฎ Enhanced mathematical calculator with scientific functions""" - try: - logger.info(f"๐Ÿงฎ Calculating: {expression}") - - # Import required math modules - import math - import statistics - - # Clean the expression - expression = expression.strip() - - # Allow common mathematical functions - safe_dict = { - "__builtins__": {}, - "abs": abs, - "round": round, - "min": min, - "max": max, - "sum": sum, - "len": len, - "pow": pow, - "sqrt": math.sqrt, - "sin": math.sin, - "cos": math.cos, - "tan": math.tan, - "log": math.log, - "log10": math.log10, - "exp": math.exp, - "pi": math.pi, - "e": math.e, - "factorial": math.factorial, - "mean": statistics.mean, - "median": statistics.median, - "mode": statistics.mode, - "stdev": statistics.stdev, - } - - # Evaluate the expression safely - result = eval(expression, safe_dict, {}) - - # Format the result appropriately - if isinstance(result, float): - if result.is_integer(): - return str(int(result)) - else: - return f"{result:.6f}".rstrip('0').rstrip('.') - else: - return str(result) - - except Exception as e: - logger.error(f"โŒ Calculation error: {e}") - return f"โŒ Calculation failed: {e}" - - def analyze_image(self, image_path: str, question: str = "") -> str: - """๐Ÿ–ผ๏ธ Enhanced image analysis with multiple AI models""" - if not PIL_AVAILABLE: - return "โŒ Image analysis unavailable. Install Pillow." - - try: - logger.info(f"๐Ÿ–ผ๏ธ Analyzing image: {image_path} | Question: {question}") - - # Get basic image info - with Image.open(image_path) as img: - basic_info = f"Image: {img.size[0]}x{img.size[1]} pixels, format: {img.format}, mode: {img.mode}" - - # Multi-model analysis - analyses = [] - - # 1. OpenAI GPT-4V (if available) - if self.openai_key and question: - try: - with open(image_path, 'rb') as img_file: - img_base64 = base64.b64encode(img_file.read()).decode('utf-8') - - client = openai.OpenAI(api_key=self.openai_key) - response = client.chat.completions.create( - model="gpt-4o", - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": f"Analyze this image and answer: {question}. Provide only the direct answer, no explanations."}, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"} - } - ] - } - ], - max_tokens=300 - ) - - gpt4v_result = response.choices[0].message.content.strip() - analyses.append(f"GPT-4V: {gpt4v_result}") - - except Exception as vision_error: - logger.warning(f"โš ๏ธ GPT-4V analysis failed: {vision_error}") - - # 2. HuggingFace Vision Models (if available) - if self.hf_token and 'vision' in self.clients: - try: - with open(image_path, 'rb') as img_file: - caption = self.clients['vision'].image_to_text(img_file.read()) - if caption: - analyses.append(f"BLIP: {caption[0].get('generated_text', 'No caption')}") - except Exception as hf_error: - logger.warning(f"โš ๏ธ HuggingFace vision analysis failed: {hf_error}") - - # 3. Object Detection - if question and "object" in question.lower(): - obj_result = self.detect_objects(image_path) - if not obj_result.startswith("โŒ"): - analyses.append(f"Objects: {obj_result}") - - # Combine all analyses - if analyses: - combined_analysis = "; ".join(analyses) - return f"{basic_info}. Analysis: {combined_analysis}" - else: - return f"{basic_info}. Advanced vision analysis requires API keys." - - except Exception as e: - logger.error(f"โŒ Image analysis error: {e}") - return f"โŒ Image analysis failed: {e}" - - # === ENHANCED DOCUMENT PROCESSING === - def read_docx(self, file_path: str) -> str: - """๐Ÿ“„ Read Microsoft Word documents""" - try: - import docx2txt - text = docx2txt.process(file_path) - logger.info(f"๐Ÿ“„ DOCX read: {len(text)} characters") - return text - except ImportError: - logger.warning("โš ๏ธ docx2txt not available. Install python-docx.") - return "โŒ DOCX reading unavailable. Install python-docx." - except Exception as e: - logger.error(f"โŒ DOCX reading error: {e}") - return f"โŒ DOCX reading failed: {e}" - - def read_excel(self, file_path: str, sheet_name: str = None) -> str: - """๐Ÿ“Š Read Excel spreadsheets""" - try: - import pandas as pd - if sheet_name: - df = pd.read_excel(file_path, sheet_name=sheet_name) - else: - df = pd.read_excel(file_path) - - # Convert to readable format - result = f"Excel data ({df.shape[0]} rows, {df.shape[1]} columns):\n" - result += df.to_string(max_rows=50, max_cols=10) - - logger.info(f"๐Ÿ“Š Excel read: {df.shape}") - return result - except ImportError: - logger.warning("โš ๏ธ pandas not available for Excel reading.") - return "โŒ Excel reading unavailable. Install pandas and openpyxl." - except Exception as e: - logger.error(f"โŒ Excel reading error: {e}") - return f"โŒ Excel reading failed: {e}" - - def read_csv(self, file_path: str) -> str: - """๐Ÿ“‹ Read CSV files""" - try: - import pandas as pd - df = pd.read_csv(file_path) - - # Convert to readable format - result = f"CSV data ({df.shape[0]} rows, {df.shape[1]} columns):\n" - result += df.head(20).to_string() - - if df.shape[0] > 20: - result += f"\n... (showing first 20 of {df.shape[0]} rows)" - - logger.info(f"๐Ÿ“‹ CSV read: {df.shape}") - return result - except ImportError: - logger.warning("โš ๏ธ pandas not available for CSV reading.") - return "โŒ CSV reading unavailable. Install pandas." - except Exception as e: - logger.error(f"โŒ CSV reading error: {e}") - return f"โŒ CSV reading failed: {e}" - - def read_text_file(self, file_path: str, encoding: str = 'utf-8') -> str: - """๐Ÿ“ Read plain text files with encoding detection""" - try: - # Try UTF-8 first - try: - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - except UnicodeDecodeError: - # Try other common encodings - encodings = ['latin-1', 'cp1252', 'ascii'] - content = None - for enc in encodings: - try: - with open(file_path, 'r', encoding=enc) as f: - content = f.read() - break - except UnicodeDecodeError: - continue - - if content is None: - return "โŒ Unable to decode text file with common encodings" - - logger.info(f"๐Ÿ“ Text file read: {len(content)} characters") - return content[:10000] + ("..." if len(content) > 10000 else "") - except Exception as e: - logger.error(f"โŒ Text file reading error: {e}") - return f"โŒ Text file reading failed: {e}" - - def extract_archive(self, file_path: str) -> str: - """๐Ÿ“ฆ Extract and list archive contents (ZIP, RAR, etc.)""" - try: - import zipfile - import os - - if file_path.endswith('.zip'): - with zipfile.ZipFile(file_path, 'r') as zip_ref: - file_list = zip_ref.namelist() - extract_dir = os.path.join(os.path.dirname(file_path), 'extracted') - os.makedirs(extract_dir, exist_ok=True) - zip_ref.extractall(extract_dir) - - result = f"๐Ÿ“ฆ ZIP archive extracted to {extract_dir}\n" - result += f"Contents ({len(file_list)} files):\n" - result += "\n".join(file_list[:20]) - - if len(file_list) > 20: - result += f"\n... (showing first 20 of {len(file_list)} files)" - - logger.info(f"๐Ÿ“ฆ ZIP extracted: {len(file_list)} files") - return result - else: - return f"โŒ Unsupported archive format: {file_path}" - except Exception as e: - logger.error(f"โŒ Archive extraction error: {e}") - return f"โŒ Archive extraction failed: {e}" - - # === ENHANCED WEB BROWSING === - def browse_with_js(self, url: str) -> str: - """๐ŸŒ Enhanced web browsing with JavaScript support (when available)""" - try: - # Try playwright for dynamic content - from playwright.sync_api import sync_playwright - - with sync_playwright() as p: - browser = p.chromium.launch(headless=True) - page = browser.new_page() - page.goto(url, timeout=15000) - page.wait_for_timeout(2000) # Wait for JS to load - content = page.content() - browser.close() - - # Parse content - from bs4 import BeautifulSoup - soup = BeautifulSoup(content, 'html.parser') - - # Remove scripts and styles - for script in soup(["script", "style"]): - script.decompose() - - text = soup.get_text() - # Clean up whitespace - lines = (line.strip() for line in text.splitlines()) - chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) - clean_text = ' '.join(chunk for chunk in chunks if chunk) - - logger.info(f"๐ŸŒ JS-enabled browsing: {url} - {len(clean_text)} chars") - return clean_text[:5000] + ("..." if len(clean_text) > 5000 else "") - - except ImportError: - logger.info("โš ๏ธ Playwright not available, falling back to requests") - return self.browse_url(url) - except Exception as e: - logger.warning(f"โš ๏ธ JS browsing failed: {e}, falling back to basic") - return self.browse_url(url) - - # === ENHANCED GAIA FILE HANDLING === - def download_gaia_file(self, task_id: str, file_name: str = None) -> str: - """๐Ÿ“ฅ Enhanced GAIA file download with comprehensive format support""" - try: - # GAIA API endpoint for file downloads - api_base = "https://agents-course-unit4-scoring.hf.space" - file_url = f"{api_base}/files/{task_id}" - - logger.info(f"๐Ÿ“ฅ Downloading GAIA file for task: {task_id}") - - headers = { - 'User-Agent': 'GAIA-Agent/1.0 (Enhanced)', - 'Accept': '*/*', - 'Accept-Encoding': 'gzip, deflate', - } - - response = requests.get(file_url, headers=headers, timeout=30, stream=True) - - if response.status_code == 200: - # Determine file extension from headers or filename - content_type = response.headers.get('content-type', '') - content_disposition = response.headers.get('content-disposition', '') - - # Extract filename from Content-Disposition header - if file_name: - filename = file_name - elif 'filename=' in content_disposition: - filename = content_disposition.split('filename=')[1].strip('"\'') - else: - # Guess extension from content type - extension_map = { - 'image/jpeg': '.jpg', - 'image/png': '.png', - 'image/gif': '.gif', - 'application/pdf': '.pdf', - 'text/plain': '.txt', - 'application/json': '.json', - 'text/csv': '.csv', - 'application/vnd.ms-excel': '.xlsx', - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx', - 'application/msword': '.docx', - 'video/mp4': '.mp4', - 'audio/mpeg': '.mp3', - 'audio/wav': '.wav', - 'application/zip': '.zip', - } - extension = extension_map.get(content_type, '.tmp') - filename = f"gaia_file_{task_id}{extension}" - - # Save file - import tempfile - import os - - temp_dir = tempfile.gettempdir() - filepath = os.path.join(temp_dir, filename) - - with open(filepath, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) - - file_size = os.path.getsize(filepath) - logger.info(f"๐Ÿ“ฅ GAIA file downloaded: {filepath} ({file_size} bytes)") - - # Automatically process based on file type - return self.process_downloaded_file(filepath, task_id) - - else: - error_msg = f"โŒ GAIA file download failed: HTTP {response.status_code}" - logger.error(error_msg) - return error_msg - - except Exception as e: - error_msg = f"โŒ GAIA file download error: {e}" - logger.error(error_msg) - return error_msg - - def process_downloaded_file(self, filepath: str, task_id: str) -> str: - """๐Ÿ“‹ Process downloaded GAIA files based on their type""" - try: - import os - filename = os.path.basename(filepath) - file_ext = os.path.splitext(filename)[1].lower() - - logger.info(f"๐Ÿ“‹ Processing GAIA file: {filename} (type: {file_ext})") - - result = f"๐Ÿ“ GAIA File: {filename} (Task: {task_id})\n\n" - - # Process based on file type - if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']: - # Image file - image_result = self.analyze_image(filepath, "Describe this image in detail") - result += f"๐Ÿ–ผ๏ธ Image Analysis:\n{image_result}\n" - - elif file_ext == '.pdf': - # PDF document - pdf_content = self.read_pdf(filepath) - result += f"๐Ÿ“„ PDF Content:\n{pdf_content}\n" - - elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']: - # Text files - text_content = self.read_text_file(filepath) - result += f"๐Ÿ“ Text Content:\n{text_content}\n" - - elif file_ext in ['.csv']: - # CSV files - csv_content = self.read_csv(filepath) - result += f"๐Ÿ“Š CSV Data:\n{csv_content}\n" - - elif file_ext in ['.xlsx', '.xls']: - # Excel files - excel_content = self.read_excel(filepath) - result += f"๐Ÿ“ˆ Excel Data:\n{excel_content}\n" - - elif file_ext in ['.docx']: - # Word documents - docx_content = self.read_docx(filepath) - result += f"๐Ÿ“„ Word Document:\n{docx_content}\n" - - elif file_ext in ['.mp4', '.avi', '.mov', '.wmv']: - # Video files - video_result = self.process_video(filepath, "analyze") - result += f"๐ŸŽฅ Video Analysis:\n{video_result}\n" - - elif file_ext in ['.mp3', '.wav', '.m4a', '.flac']: - # Audio files - audio_result = self.analyze_audio(filepath, "transcribe") - result += f"๐ŸŽต Audio Analysis:\n{audio_result}\n" - - elif file_ext in ['.zip', '.rar']: - # Archive files - archive_result = self.extract_archive(filepath) - result += f"๐Ÿ“ฆ Archive Contents:\n{archive_result}\n" - - elif file_ext in ['.json']: - # JSON files - try: - import json - with open(filepath, 'r') as f: - json_data = json.load(f) - result += f"๐Ÿ“‹ JSON Data:\n{json.dumps(json_data, indent=2)[:2000]}\n" - except Exception as e: - result += f"โŒ JSON parsing error: {e}\n" - - else: - # Unknown file type - try as text - try: - text_content = self.read_text_file(filepath) - result += f"๐Ÿ“„ Raw Content:\n{text_content}\n" - except: - result += f"โŒ Unsupported file type: {file_ext}\n" - - # Add file metadata - file_size = os.path.getsize(filepath) - result += f"\n๐Ÿ“Š File Info: {file_size} bytes, Path: {filepath}" - - return result - - except Exception as e: - error_msg = f"โŒ File processing error: {e}" - logger.error(error_msg) - return error_msg - - # === ENHANCED REASONING CHAIN === - def reasoning_chain(self, question: str, max_steps: int = 5) -> str: - """๐Ÿง  Explicit step-by-step reasoning for complex GAIA questions""" - try: - logger.info(f"๐Ÿง  Starting reasoning chain for: {question[:50]}...") - - reasoning_steps = [] - current_context = question - - for step in range(1, max_steps + 1): - logger.info(f"๐Ÿง  Reasoning step {step}/{max_steps}") - - # Analyze what we need to do next - analysis_prompt = f"""Analyze this question step by step: - -Question: {question} - -Previous context: {current_context} - -What is the next logical step to solve this question? Be specific about: -1. What information do we need? -2. What tool should we use? -3. What specific action to take? - -Respond with just the next action needed.""" - - # Get next step from our best model - next_step = self.fast_qa_answer(analysis_prompt) - reasoning_steps.append(f"Step {step}: {next_step}") - - # Execute the step if it mentions a specific tool - if any(tool in next_step.lower() for tool in ['search', 'download', 'calculate', 'analyze', 'read']): - # Extract and execute tool call - if 'search' in next_step.lower(): - search_query = self._extract_search_query(next_step, question) - if search_query: - search_result = self.web_search(search_query) - current_context += f"\n\nSearch result: {search_result[:500]}" - reasoning_steps.append(f" โ†’ Executed search: {search_result[:100]}...") - - elif 'calculate' in next_step.lower(): - calc_expr = self._extract_calculation(next_step, question) - if calc_expr: - calc_result = self.calculator(calc_expr) - current_context += f"\n\nCalculation: {calc_expr} = {calc_result}" - reasoning_steps.append(f" โ†’ Calculated: {calc_expr} = {calc_result}") - - # Check if we have enough information - if self._has_sufficient_info(current_context, question): - reasoning_steps.append(f"Step {step + 1}: Sufficient information gathered") - break - - # Generate final answer - final_prompt = f"""Based on this reasoning chain, provide the final answer: - -Question: {question} - -Reasoning steps: -{chr(10).join(reasoning_steps)} - -Context: {current_context} - -Provide ONLY the final answer - no explanation.""" - - final_answer = self.fast_qa_answer(final_prompt) - - logger.info(f"๐Ÿง  Reasoning chain complete: {len(reasoning_steps)} steps") - return final_answer - - except Exception as e: - logger.error(f"โŒ Reasoning chain error: {e}") - return self.query_with_tools(question) # Fallback to regular processing - - def _extract_search_query(self, step_text: str, question: str) -> str: - """Extract search query from reasoning step""" - # Simple extraction logic - if 'search for' in step_text.lower(): - parts = step_text.lower().split('search for')[1].split('.')[0] - return parts.strip(' "\'') - return None - - def _extract_calculation(self, step_text: str, question: str) -> str: - """Extract calculation from reasoning step""" - import re - # Look for mathematical expressions - math_patterns = [ - r'[\d+\-*/().\s]+', - r'\d+\s*[+\-*/]\s*\d+', - ] - for pattern in math_patterns: - matches = re.findall(pattern, step_text) - if matches: - return matches[0].strip() - return None - - def _has_sufficient_info(self, context: str, question: str) -> bool: - """Check if we have sufficient information to answer""" - # Simple heuristic - check if context is substantially longer than question - return len(context) > len(question) * 3 and len(context) > 200 - - # === ENHANCED TOOL ENUMERATION === - -# === MAIN SYSTEM CLASSES === - -class EnhancedMultiModelGAIASystem: - """๐Ÿš€ Complete GAIA system with advanced tool calling and multi-modal capabilities""" - - def __init__(self, hf_token: str = None, openai_key: str = None): - # Initialize enhanced toolkit - self.toolkit = UniversalMultimodalToolkit(hf_token, openai_key) - - # Initialize AI clients - self.hf_token = hf_token or os.getenv('HF_TOKEN') - self.openai_key = openai_key or os.getenv('OPENAI_API_KEY') - - # ๐Ÿš€ SPEED OPTIMIZATION: Response cache for instant answers - self.response_cache = {} - self.qa_cache = {} - - # Initialize clients with comprehensive model support - self.clients = self._initialize_clients() - # ๐ŸŽฏ PRIORITY ORDER: Qwen3-235B-A22B as TOP model for best performance - available_models = list(self.clients.keys()) - - # Preferred order (only include models that are actually available) - preferred_order = [ - "fireworks_qwen3_235b", # ๐Ÿฅ‡ PRIORITY 1: Qwen3-235B-A22B (Best reasoning) - "together_deepseek_r1", # ๐Ÿฅˆ PRIORITY 2: DeepSeek-R1 (Strong reasoning) - "openai_gpt4o", # ๐Ÿฅ‰ PRIORITY 3: GPT-4o (Vision capabilities) - "together_llama", # PRIORITY 4: Llama-3.3-70B (Large context) - "novita_minimax", # PRIORITY 5: MiniMax (Extended context) - "featherless_kimi", # PRIORITY 6: Moonshot (Specialized tasks) - "fallback_basic" # PRIORITY 7: Local fallback - ] - - # Only include available models in priority list - self.model_priority = [model for model in preferred_order if model in available_models] - - if not self.model_priority: - logger.error("โŒ No models available for processing") - else: - logger.info(f"๐ŸŽฏ Model priority: {self.model_priority[0]} (top priority)") - - logger.info("๐Ÿš€ Enhanced Multi-Model GAIA System initialized") - - def _initialize_clients(self) -> Dict[str, Any]: - """Initialize all AI model clients with SPEED OPTIMIZATION for 100% GAIA performance""" - clients = {} - - if self.hf_token and HF_AVAILABLE: - # ๐Ÿš€ ULTRA-FAST QA MODEL (Priority 0 - for instant answers) - clients["ultra_fast_qa"] = { - "client": InferenceClient( - provider="hf-inference", - api_key=self.hf_token, - ), - "model": "deepset/roberta-base-squad2", - "priority": 0, - "provider": "HuggingFace QA", - "type": "question_answering", - "speed": "ultra_fast", - "use_for": ["factual", "simple", "direct"] - } - - # โšก FAST BERT QA (Priority 0.5) - clients["fast_bert_qa"] = { - "client": InferenceClient( - provider="hf-inference", - api_key=self.hf_token, - ), - "model": "deepset/bert-base-cased-squad2", - "priority": 0.5, - "provider": "HuggingFace QA", - "type": "question_answering", - "speed": "very_fast", - "use_for": ["reading_comprehension", "context_based"] - } - - # ๐Ÿ”ฅ Together AI models (Priority: DeepSeek-R1) - clients["together_deepseek_r1"] = { - "client": InferenceClient(model="deepseek-ai/DeepSeek-R1", token=self.hf_token), - "priority": 1, - "provider": "Together AI", - "type": "chat", - "speed": "fast" - } - clients["together_llama"] = { - "client": InferenceClient(model="meta-llama/Llama-3.3-70B-Instruct", token=self.hf_token), - "priority": 2, - "provider": "Together AI", - "type": "chat", - "speed": "medium" - } - - # ๐ŸŒŸ Novita AI models (Enhanced Speed) - clients["novita_minimax"] = { - "client": InferenceClient(model="MiniMax/MiniMax-M1-80k", token=self.hf_token), - "priority": 3, - "provider": "Novita AI", - "type": "chat", - "speed": "fast" - } - clients["novita_deepseek_chat"] = { - "client": InferenceClient(model="deepseek-ai/deepseek-chat", token=self.hf_token), - "priority": 4, - "provider": "Novita AI", - "type": "chat", - "speed": "fast" - } - - # ๐Ÿชถ Featherless AI models - clients["featherless_kimi"] = { - "client": InferenceClient(model="moonshot-ai/moonshot-v1-8k", token=self.hf_token), - "priority": 5, - "provider": "Featherless AI", - "type": "chat", - "speed": "medium" - } - clients["featherless_jan"] = { - "client": InferenceClient(model="janhq/jan-nano", token=self.hf_token), - "priority": 6, - "provider": "Featherless AI", - "type": "chat", - "speed": "very_fast" - } - - # ๐Ÿš€ Fireworks AI models - TOP PRIORITY MODEL - clients["fireworks_qwen3_235b"] = { - "client": InferenceClient( - provider="fireworks-ai", - api_key=self.hf_token, - ), - "model": "Qwen/Qwen3-235B-A22B", - "priority": 0.1, # ๐Ÿฅ‡ HIGHEST PRIORITY - Best reasoning model - "provider": "Fireworks AI", - "type": "chat", - "speed": "fast" - } - clients["fireworks_llama"] = { - "client": InferenceClient(model="accounts/fireworks/models/llama-v3p1-8b-instruct", token=self.hf_token), - "priority": 7, - "provider": "Fireworks AI", - "type": "chat", - "speed": "very_fast" - } - - # ๐Ÿค— HuggingFace Inference models (Specialized) - clients["hf_mistral"] = { - "client": InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.1", token=self.hf_token), - "priority": 8, - "provider": "HuggingFace", - "type": "chat", - "speed": "fast" - } - clients["hf_phi"] = { - "client": InferenceClient(model="microsoft/Phi-3-mini-4k-instruct", token=self.hf_token), - "priority": 9, - "provider": "HuggingFace", - "type": "chat", - "speed": "ultra_fast" - } - - # ๐Ÿค– OpenAI models (if API key available) - if self.openai_key and OPENAI_AVAILABLE: - clients["openai_gpt4o"] = { - "client": "openai_gpt4o", - "model": "gpt-4o", - "priority": 1.5, - "provider": "OpenAI", - "type": "chat", - "speed": "medium" - } - clients["openai_gpt35"] = { - "client": "openai_gpt35", - "model": "gpt-3.5-turbo", - "priority": 10, - "provider": "OpenAI", - "type": "chat", - "speed": "fast" - } - - # ๐Ÿ›ก๏ธ Fallback client for when external services are unavailable - if not clients: - clients["fallback_basic"] = { - "client": "fallback", - "model": "basic", - "priority": 999, - "provider": "Local Fallback", - "type": "fallback", - "speed": "instant" - } - logger.warning("โš ๏ธ No external AI services available, using fallback mode") - - logger.info(f"โœ… Initialized {len(clients)} AI clients with speed optimization") - return clients - - def parse_tool_calls(self, response: str) -> List[ToolCall]: - """๐Ÿ”ง Parse advanced tool calls from AI response""" - tool_calls = [] - - # Enhanced patterns for tool calls - patterns = [ - r'TOOL_CALL:\s*(\w+)\((.*?)\)', # TOOL_CALL: web_search(query="...") - r'(\w+)\s*(.*?)', # XML-style - r'```(\w+)\n(.*?)\n```', # Code block style - ] - - for pattern in patterns: - matches = re.findall(pattern, response, re.DOTALL | re.IGNORECASE) - for tool_name, params_str in matches: - try: - params = self._parse_parameters(params_str) - tool_type = ToolType(tool_name.lower()) - tool_calls.append(ToolCall(tool=tool_type, parameters=params)) - logger.info(f"๐Ÿ”ง Parsed tool call: {tool_name} with params: {params}") - except (ValueError, Exception) as e: - logger.warning(f"โš ๏ธ Failed to parse tool call {tool_name}: {e}") - - return tool_calls - - def _parse_parameters(self, params_str: str) -> Dict[str, Any]: - """Parse parameters from various formats""" - params = {} - if not params_str.strip(): - return params - - # Try JSON parsing first - try: - return json.loads(params_str) - except: - pass - - # Try key=value parsing - param_matches = re.findall(r'(\w+)=(["\'])(.*?)\2', params_str) - for param_name, quote, param_value in param_matches: - params[param_name] = param_value - - # Try simple text for single parameter - if not params and params_str.strip(): - # Remove quotes if present - clean_param = params_str.strip().strip('"\'') - params['query'] = clean_param # Default to query parameter - - return params - - def execute_tool_call(self, tool_call: ToolCall) -> str: - """โšก Execute a single tool call with comprehensive error handling""" - try: - logger.info(f"โšก Executing {tool_call.tool.value} with params: {tool_call.parameters}") - - if tool_call.tool == ToolType.WEB_SEARCH: - query = tool_call.parameters.get('query', '') - results = self.toolkit.web_search(query) - return f"๐Ÿ” Web search results:\n{results}" - - elif tool_call.tool == ToolType.BROWSE_URL: - url = tool_call.parameters.get('url', '') - result = self.toolkit.browse_url(url) - return result - - elif tool_call.tool == ToolType.DOWNLOAD_FILE: - task_id = tool_call.parameters.get('task_id', '') - url = tool_call.parameters.get('url', '') - filepath = self.toolkit.download_file(url, task_id) - return f"๐Ÿ“ฅ Downloaded file to: {filepath}" - - elif tool_call.tool == ToolType.READ_PDF: - file_path = tool_call.parameters.get('file_path', '') - text = self.toolkit.read_pdf(file_path) - return f"๐Ÿ“„ PDF content:\n{text}" - - elif tool_call.tool == ToolType.ANALYZE_IMAGE: - image_path = tool_call.parameters.get('image_path', '') - question = tool_call.parameters.get('question', '') - result = self.toolkit.analyze_image(image_path, question) - return f"๐Ÿ–ผ๏ธ Image analysis: {result}" - - elif tool_call.tool == ToolType.CALCULATOR: - expression = tool_call.parameters.get('expression', '') - result = self.toolkit.calculator(expression) - return f"๐Ÿงฎ Calculation result: {result}" - - elif tool_call.tool == ToolType.PROCESS_VIDEO: - video_path = tool_call.parameters.get('video_path', '') - task = tool_call.parameters.get('task', 'analyze') - result = self.toolkit.process_video(video_path, task) - return f"๐ŸŽฅ Video analysis: {result}" - - elif tool_call.tool == ToolType.ANALYZE_AUDIO: - audio_path = tool_call.parameters.get('audio_path', '') - task = tool_call.parameters.get('task', 'analyze') - result = self.toolkit.analyze_audio(audio_path, task) - return f"๐ŸŽต Audio analysis: {result}" - - elif tool_call.tool == ToolType.GENERATE_IMAGE: - prompt = tool_call.parameters.get('prompt', '') - style = tool_call.parameters.get('style', 'realistic') - result = self.toolkit.generate_image(prompt, style) - return f"๐ŸŽจ Image generation: {result}" - - elif tool_call.tool == ToolType.SYNTHESIZE_SPEECH: - text = tool_call.parameters.get('text', '') - voice = tool_call.parameters.get('voice', 'default') - result = self.toolkit.synthesize_speech(text, voice) - return f"๐ŸŽ™๏ธ Speech synthesis: {result}" - - elif tool_call.tool == ToolType.CREATE_VISUALIZATION: - data = tool_call.parameters.get('data', {}) - chart_type = tool_call.parameters.get('chart_type', 'bar') - result = self.toolkit.create_visualization(data, chart_type) - return f"๐Ÿ“Š Data visualization: {result}" - - elif tool_call.tool == ToolType.ANALYZE_DATA: - data = tool_call.parameters.get('data', {}) - operation = tool_call.parameters.get('operation', 'statistics') - result = self.toolkit.scientific_compute(operation, data) - return f"๐Ÿงฌ Scientific computation: {result}" - - elif tool_call.tool == ToolType.GENERATE_VIDEO: - video_path = tool_call.parameters.get('video_path', '') - result = self.toolkit.process_video(video_path, 'generate') - return f"๐ŸŽฌ Video generation: {result}" - - elif tool_call.tool == ToolType.EXTRACT_AUDIO: - audio_path = tool_call.parameters.get('audio_path', '') - result = self.toolkit.analyze_audio(audio_path, 'extract') - return f"๐ŸŽต Audio extraction: {result}" - - elif tool_call.tool == ToolType.TRANSCRIBE_SPEECH: - audio_path = tool_call.parameters.get('audio_path', '') - result = self.toolkit.transcribe_speech(audio_path) - return f"๐ŸŽ™๏ธ Speech transcription: {result}" - - elif tool_call.tool == ToolType.DETECT_OBJECTS: - image_path = tool_call.parameters.get('image_path', '') - result = self.toolkit.detect_objects(image_path) - return f"๐Ÿ” Object detection: {result}" - - elif tool_call.tool == ToolType.FACE_RECOGNITION: - image_path = tool_call.parameters.get('image_path', '') - result = self.toolkit.analyze_image(image_path, "Identify the person in this image") - return f"๐Ÿ‘ค Face recognition: {result}" - - elif tool_call.tool == ToolType.SCIENTIFIC_COMPUTE: - operation = tool_call.parameters.get('operation', 'statistics') - data = tool_call.parameters.get('data', {}) - result = self.toolkit.scientific_compute(operation, data) - return f"๐Ÿงฌ Scientific computation: {result}" - - else: - return f"โŒ Unknown tool: {tool_call.tool}" - - except Exception as e: - error_msg = f"โŒ Tool execution failed: {str(e)}" - logger.error(error_msg) - return error_msg - - def fast_qa_answer(self, question: str, context: str = "") -> str: - """๐Ÿš€ Ultra-fast question answering using optimized models""" - try: - # Check cache first - cache_key = hashlib.md5(f"{question}:{context}".encode()).hexdigest() - if cache_key in self.qa_cache: - logger.info("๐Ÿš€ Cache hit - instant answer!") - return self.qa_cache[cache_key] - - # Try ultra-fast QA model first - if "ultra_fast_qa" in self.clients: - try: - client_info = self.clients["ultra_fast_qa"] - client = client_info["client"] - - # Use question-answering endpoint with correct model parameter - if context: - result = client.question_answering( - question=question, - context=context, - model=client_info["model"] - ) - answer = result.get("answer", "").strip() - else: - # For questions without context, use web search for context - search_result = self.toolkit.web_search(question, num_results=2) - result = client.question_answering( - question=question, - context=search_result[:500], - model=client_info["model"] - ) - answer = result.get("answer", "").strip() - - if answer: - # Cache the result - self.qa_cache[cache_key] = answer - return answer - - except Exception as e: - logger.warning(f"โš ๏ธ Fast QA failed: {e}") - - # Fallback to regular processing - return None - - except Exception as e: - logger.error(f"โŒ Fast QA error: {e}") - return None - - def query_with_tools(self, question: str, model_name: str = None, max_iterations: int = 3) -> str: - """๐Ÿง  Enhanced query processing with SPEED-OPTIMIZED capabilities for 100% GAIA performance""" - - # ๐Ÿš€ FIRST: Try ultra-fast QA for instant answers - fast_answer = self.fast_qa_answer(question) - if fast_answer: - logger.info("โšก Ultra-fast QA answer found!") - return self._clean_final_answer(fast_answer) - - # Check response cache - cache_key = hashlib.md5(question.encode()).hexdigest() - if cache_key in self.response_cache: - logger.info("๐Ÿš€ Cache hit - instant answer!") - return self.response_cache[cache_key] - - if not model_name: - model_name = self.model_priority[0] - - logger.info(f"๐Ÿง  Processing question with {model_name}: {question[:100]}...") - - # Ultra-enhanced system prompt for GAIA benchmark - system_prompt = f"""You are an advanced AI agent optimized for the GAIA benchmark with access to powerful tools. - -๐Ÿ› ๏ธ AVAILABLE TOOLS: -- TOOL_CALL: web_search(query="search term") - Search the web for current information -- TOOL_CALL: browse_url(url="https://example.com") - Browse and extract content from specific URLs -- TOOL_CALL: download_file(task_id="123") - Download files from GAIA tasks or URLs -- TOOL_CALL: read_pdf(file_path="document.pdf") - Read and extract text from PDF files -- TOOL_CALL: analyze_image(image_path="image.jpg", question="what to analyze") - Analyze images with vision AI -- TOOL_CALL: calculator(expression="2+2*3") - Perform mathematical calculations and scientific functions -- TOOL_CALL: process_video(video_path="video.mp4", task="analyze") - Analyze video content -- TOOL_CALL: analyze_audio(audio_path="audio.wav", task="analyze") - Analyze audio content -- TOOL_CALL: generate_image(prompt="description", style="realistic") - Generate images from text descriptions -- TOOL_CALL: synthesize_speech(text="Hello, world!", voice="default") - Convert text to speech -- TOOL_CALL: create_visualization(data="chart_data", chart_type="bar") - Create data visualizations and charts -- TOOL_CALL: analyze_data(data="statistical_data") - Perform scientific computations and analysis -- TOOL_CALL: generate_video(video_path="output.mp4") - Generate videos from video content -- TOOL_CALL: extract_audio(audio_path="audio.wav") - Extract audio from video content -- TOOL_CALL: transcribe_speech(audio_path="audio.wav") - Convert speech to text -- TOOL_CALL: detect_objects(image_path="image.jpg") - Detect and identify objects in images -- TOOL_CALL: face_recognition(image_path="image.jpg") - Identify the person in images -- TOOL_CALL: scientific_compute(operation="statistics", data="numerical_data") - Perform scientific computations and analysis - -๐ŸŽฏ GAIA BENCHMARK INSTRUCTIONS: -1. For research questions, ALWAYS use web_search first to get current information -2. If files are mentioned or task IDs given, use download_file then read_pdf/analyze_image -3. For multi-step problems, break down systematically and use tools in logical order -4. For image questions, use analyze_image with specific question about what to find -5. CRITICAL: Provide DIRECT, CONCISE answers ONLY - no explanations or reasoning -6. Format response as just the final answer - nothing else - -Question: {question} - -Think step by step about what tools you need, use them, then provide ONLY the final answer.""" - - conversation_history = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": question} - ] - - # Iterative tool calling loop - for iteration in range(max_iterations): - try: - client_info = self.clients.get(model_name) - if not client_info: - logger.warning(f"โš ๏ธ Model {model_name} unavailable, using fallback") - return self._fallback_response(question) - - # Handle fallback client - if model_name == "fallback_basic": - logger.info("๐Ÿ›ก๏ธ Using local fallback processing") - return self._fallback_response(question) - - # Get AI response - if "openai" in model_name: - response = client_info["client"].chat.completions.create( - model=client_info["model"], - messages=conversation_history, - max_tokens=1500, - temperature=0.0 - ) - ai_response = response.choices[0].message.content - elif model_name == "fireworks_qwen3_235b": - # Use the specific Qwen model implementation - response = client_info["client"].chat.completions.create( - model=client_info["model"], - messages=conversation_history, - max_tokens=1500, - temperature=0.0 - ) - ai_response = response.choices[0].message.content - else: - response = client_info["client"].chat_completion( - messages=conversation_history, - max_tokens=1500, - temperature=0.0 - ) - ai_response = response.choices[0].message.content - - # Clean thinking process from response (critical for GAIA compliance) - ai_response = self._remove_thinking_process(ai_response) - - logger.info(f"๐Ÿค– AI Response (iteration {iteration + 1}): {ai_response[:200]}...") - - # Check for tool calls - tool_calls = self.parse_tool_calls(ai_response) - - if tool_calls: - # Execute tools and collect results - tool_results = [] - for tool_call in tool_calls: - result = self.execute_tool_call(tool_call) - tool_results.append(f"Tool {tool_call.tool.value}: {result}") - - # Add tool results to conversation - conversation_history.append({"role": "assistant", "content": ai_response}) - - tool_context = f"TOOL RESULTS:\n" + "\n\n".join(tool_results) - tool_context += f"\n\nBased on these tool results, provide the final answer to: {question}\nProvide ONLY the direct answer - no explanations:" - - conversation_history.append({"role": "user", "content": tool_context}) - - logger.info(f"๐Ÿ”ง Executed {len(tool_calls)} tools, continuing to iteration {iteration + 2}") - - else: - # No tools needed, extract final answer - final_answer = self._extract_final_answer(ai_response) - logger.info(f"โœ… Final answer extracted: {final_answer}") - return final_answer - - except Exception as e: - logger.error(f"โŒ Query iteration {iteration + 1} failed for {model_name}: {e}") - - # Try next model in priority list - current_index = self.model_priority.index(model_name) if model_name in self.model_priority else 0 - if current_index + 1 < len(self.model_priority): - model_name = self.model_priority[current_index + 1] - logger.info(f"๐Ÿ”„ Switching to model: {model_name}") - else: - break - - # Final attempt with tool results if we have them - if len(conversation_history) > 2: - try: - client_info = self.clients.get(model_name) - if client_info: - if "openai" in model_name: - final_response = client_info["client"].chat.completions.create( - model=client_info["model"], - messages=conversation_history, - max_tokens=300, - temperature=0.0 - ) - final_answer = final_response.choices[0].message.content - else: - final_response = client_info["client"].chat_completion( - messages=conversation_history, - max_tokens=300, - temperature=0.0 - ) - final_answer = final_response.choices[0].message.content - - return self._extract_final_answer(final_answer) - except Exception as e: - logger.error(f"โŒ Final answer extraction failed: {e}") - - # Ultimate fallback - logger.warning(f"โš ๏ธ Using fallback response for: {question}") - return self._fallback_response(question) - - def _extract_final_answer(self, response: str) -> str: - """โœจ Ultra-aggressive answer extraction for perfect GAIA compliance""" - if not response: - return "Unknown" - - logger.info(f"โœจ Extracting final answer from: {response[:100]}...") - - # Remove tool calls completely - response = re.sub(r'TOOL_CALL:.*?\n', '', response, flags=re.DOTALL) - response = re.sub(r'.*?', '', response, flags=re.DOTALL | re.IGNORECASE) - response = re.sub(r'.*?', '', response, flags=re.DOTALL | re.IGNORECASE) - - # Remove thinking blocks aggressively - response = re.sub(r'.*?', '', response, flags=re.DOTALL | re.IGNORECASE) - response = re.sub(r'\*\*Think\*\*.*?\*\*Answer\*\*', '', response, flags=re.DOTALL | re.IGNORECASE) - - # Remove reasoning phrases more comprehensively - reasoning_patterns = [ - r'let me.*?[.!?]\s*', - r'i need to.*?[.!?]\s*', - r'first,?\s*i.*?[.!?]\s*', - r'to solve this.*?[.!?]\s*', - r'based on.*?[,.]?\s*', - r'the answer is[:\s]*', - r'therefore[,:\s]*', - r'so[,:\s]*the answer[,:\s]*', - r'thus[,:\s]*', - r'in conclusion[,:\s]*', - r'after.*?analysis[,:\s]*', - r'from.*?search[,:\s]*' - ] - - for pattern in reasoning_patterns: - response = re.sub(pattern, '', response, flags=re.IGNORECASE) - - # Extract core answer patterns - answer_patterns = [ - r'(?:answer|result)[:\s]*([^\n.!?]+)', - r'(?:final|conclusion)[:\s]*([^\n.!?]+)', - r'^([A-Z][^.!?]*)', # First capitalized sentence - r'(\d+(?:\.\d+)?)', # Numbers - r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)' # Proper nouns - ] - - for pattern in answer_patterns: - match = re.search(pattern, response, re.IGNORECASE) - if match: - answer = match.group(1).strip() - if len(answer) > 2: # Avoid single characters - return self._clean_final_answer(answer) - - # Take the last substantial line - lines = [line.strip() for line in response.split('\n') if line.strip()] - if lines: - # Filter out obvious non-answers - for line in reversed(lines): - if len(line) > 2 and not any(word in line.lower() for word in ['tool', 'search', 'analysis', 'extract']): - return self._clean_final_answer(line) - - # Final cleanup of the entire response - return self._clean_final_answer(response.strip()) - - def _remove_thinking_process(self, response: str) -> str: - """๐Ÿง  Remove thinking process from responses to ensure only final answers""" - try: - # Remove common thinking indicators - thinking_patterns = [ - r'.*?', - r'.*?', - r'.*?', - r'Let me think.*?(?=\n\n|\.|$)', - r'I need to.*?(?=\n\n|\.|$)', - r'First, I.*?(?=\n\n|\.|$)', - r'Step \d+:.*?(?=\n|\.|$)', - r'Thinking step by step.*?(?=\n\n|\.|$)', - r'^.*?Let me analyze.*?(?=\n\n)', - r'^.*?I should.*?(?=\n\n)', - r'To solve this.*?(?=\n\n)', - ] - - cleaned = response - for pattern in thinking_patterns: - cleaned = re.sub(pattern, '', cleaned, flags=re.DOTALL | re.IGNORECASE) - - # Remove multiple newlines and clean up - cleaned = re.sub(r'\n\s*\n', '\n', cleaned).strip() - - # If response starts with reasoning words, extract the final answer - if any(cleaned.lower().startswith(word) for word in ['let me', 'first', 'i need to', 'to solve', 'thinking']): - # Look for final answer patterns - final_patterns = [ - r'(?:the answer is|answer:|final answer:|therefore|so|thus|hence)[:\s]*(.+?)(?:\.|$)', - r'(?:^|\n)([^.\n]+?)(?:\.|$)' # Last sentence - ] - - for pattern in final_patterns: - match = re.search(pattern, cleaned, re.IGNORECASE | re.MULTILINE) - if match: - potential_answer = match.group(1).strip() - if potential_answer and len(potential_answer) < 200: # Reasonable answer length - return potential_answer - - return cleaned - - except Exception as e: - logger.warning(f"โš ๏ธ Error removing thinking process: {e}") - return response - - def _clean_final_answer(self, answer: str) -> str: - """๐Ÿงน Enhanced answer cleaning that preserves meaning and completeness""" - if not answer: - return "Unable to determine answer" - - # Quality validation - reject broken/incomplete responses - answer = answer.strip() - - # Reject clearly broken responses but allow valid short answers - broken_patterns = [ - r'^s,?\s*$', # Just "s," or "s" - r'^s\s+\w+$', # "s something" - r'^(think|right|Unable to)$', # Single incomplete words - r'^Jagged$', # Random single words - ] - - # Don't reject numbers or valid single words - if answer.isdigit() or answer.replace('.', '').replace('-', '').isdigit(): - # Valid number - keep it - pass - elif len(answer) == 1 and answer.isalpha(): - # Single letter might be valid (like "A", "B" for multiple choice) - pass - else: - # Apply broken pattern checks for other cases - for pattern in broken_patterns: - if re.match(pattern, answer, re.IGNORECASE): - return "Unable to provide complete answer" - - # Remove common prefixes but preserve content - prefixes = ['answer:', 'result:', 'final:', 'conclusion:', 'the answer is', 'it is', 'this is'] - for prefix in prefixes: - if answer.lower().startswith(prefix): - answer = answer[len(prefix):].strip() - - # Remove tool call artifacts - answer = re.sub(r'^TOOL_CALL:.*$', '', answer, flags=re.MULTILINE) - answer = re.sub(r'from \d+ tool calls?', '', answer) - - # Clean whitespace but preserve structure - answer = re.sub(r'\s+', ' ', answer).strip() - - # Remove quotes if they wrap the entire answer - if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")): - answer = answer[1:-1] - - # Final validation - but allow valid single character answers - if len(answer) < 1: - return "Unable to provide complete answer" - elif len(answer) == 1: - # Single character is OK if it's a digit or capital letter - if answer.isdigit() or answer.isupper(): - return answer.strip() - else: - return "Unable to provide complete answer" - - return answer.strip() - - def _fallback_response(self, question: str) -> str: - """๐Ÿ›ก๏ธ Enhanced fallback responses optimized for GAIA benchmark""" - question_lower = question.lower() - logger.info(f"๐Ÿ›ก๏ธ Using enhanced fallback for: {question[:50]}...") - - # Enhanced mathematical operations - if any(word in question_lower for word in ['calculate', 'compute', 'math', '+', '-', '*', '/', 'sum', 'product']): - numbers = re.findall(r'-?\d+(?:\.\d+)?', question) - if len(numbers) >= 2: - try: - a, b = float(numbers[0]), float(numbers[1]) - if '+' in question or 'add' in question_lower or 'sum' in question_lower: - return str(int(a + b) if (a + b).is_integer() else a + b) - elif '-' in question or 'subtract' in question_lower or 'minus' in question_lower: - return str(int(a - b) if (a - b).is_integer() else a - b) - elif '*' in question or 'multiply' in question_lower or 'times' in question_lower or 'product' in question_lower: - return str(int(a * b) if (a * b).is_integer() else a * b) - elif '/' in question or 'divide' in question_lower: - return str(int(a / b) if (a / b).is_integer() else round(a / b, 6)) - except: - pass - - # Enhanced geography and capitals - if any(word in question_lower for word in ['capital', 'country', 'city']): - capitals = { - 'france': 'Paris', 'germany': 'Berlin', 'italy': 'Rome', 'spain': 'Madrid', - 'japan': 'Tokyo', 'china': 'Beijing', 'usa': 'Washington D.C.', 'united states': 'Washington D.C.', - 'uk': 'London', 'united kingdom': 'London', 'canada': 'Ottawa', 'australia': 'Canberra', - 'brazil': 'Brasรญlia', 'india': 'New Delhi', 'russia': 'Moscow', 'mexico': 'Mexico City' - } - for country, capital in capitals.items(): - if country in question_lower: - return capital - - # Enhanced political and current affairs - if 'president' in question_lower: - if any(country in question_lower for country in ['united states', 'usa', 'america']): - return 'Joe Biden' - elif 'france' in question_lower: - return 'Emmanuel Macron' - elif 'russia' in question_lower: - return 'Vladimir Putin' - - # Enhanced counting questions - if 'how many' in question_lower: - counting_map = { - 'planets': '8', 'continents': '7', 'days in year': '365', 'days in week': '7', - 'months': '12', 'seasons': '4', 'oceans': '5', 'great lakes': '5' - } - for item, count in counting_map.items(): - if item in question_lower: - return count - - # Enhanced scientific formulas - if 'chemical formula' in question_lower or 'formula' in question_lower: - formulas = { - 'water': 'H2O', 'carbon dioxide': 'CO2', 'methane': 'CH4', 'ammonia': 'NH3', - 'salt': 'NaCl', 'sugar': 'C12H22O11', 'alcohol': 'C2H5OH', 'oxygen': 'O2' - } - for compound, formula in formulas.items(): - if compound in question_lower: - return formula - - # Enhanced units and conversions - if any(word in question_lower for word in ['meter', 'kilogram', 'second', 'celsius', 'fahrenheit']): - if 'freezing point' in question_lower and 'water' in question_lower: - if 'celsius' in question_lower: - return '0' - elif 'fahrenheit' in question_lower: - return '32' - - # Enhanced colors and basic facts - if 'color' in question_lower or 'colour' in question_lower: - if 'sun' in question_lower: - return 'yellow' - elif 'grass' in question_lower: - return 'green' - elif 'sky' in question_lower: - return 'blue' - - # GAIA-specific fallback for research questions - if any(word in question_lower for word in ['when', 'where', 'who', 'what', 'which', 'how']): - return "Information not available without web search" - - # Default fallback with instruction - return "Unable to determine answer without additional tools" - - def cleanup(self): - """๐Ÿงน Cleanup temporary resources""" - pass - -# Backward compatibility aliases -class MultiModelGAIASystem(EnhancedMultiModelGAIASystem): - """Alias for backward compatibility""" - pass - -def create_gaia_system(hf_token: str = None, openai_key: str = None) -> EnhancedMultiModelGAIASystem: - """๐Ÿš€ Create an enhanced GAIA system with all advanced capabilities""" - return EnhancedMultiModelGAIASystem(hf_token=hf_token, openai_key=openai_key) - -class BasicAgent: - """๐Ÿค– GAIA-compatible agent interface with comprehensive tool calling""" - - def __init__(self, hf_token: str = None, openai_key: str = None): - self.system = create_gaia_system(hf_token, openai_key) - logger.info("๐Ÿค– BasicAgent with enhanced GAIA capabilities initialized") - - def query(self, question: str) -> str: - """Process GAIA question with full tool calling support""" - try: - result = self.system.query_with_tools(question) - return result - except Exception as e: - logger.error(f"โŒ Agent query failed: {e}") - return self.system._fallback_response(question) - - def clean_for_api_submission(self, response: str) -> str: - """Clean response for GAIA API submission""" - return self.system._extract_final_answer(response) - - def __call__(self, question: str) -> str: - """Callable interface for backward compatibility""" - return self.query(question) - - def cleanup(self): - """Cleanup resources""" - self.system.cleanup() - -# Test function for comprehensive validation -def test_enhanced_gaia_system(): - """๐Ÿงช Test the enhanced GAIA system with tool calling""" - print("๐Ÿงช Testing Enhanced GAIA System with Tool Calling") - - # Initialize the system - agent = BasicAgent() - - # Test questions requiring different tools - test_questions = [ - "What is 15 + 27?", # Calculator - "What is the capital of France?", # Fallback knowledge - "Search for the current weather in Paris", # Web search - "How many planets are in our solar system?", # Fallback knowledge - "What is 2 * 3 + 4?", # Calculator - ] - - print("\n" + "="*50) - print("๐ŸŽฏ ENHANCED GAIA COMPLIANCE TEST") - print("="*50) - - for question in test_questions: - print(f"\nQ: {question}") - response = agent.query(question) - print(f"A: {response}") # Should be clean, direct answers with tool usage - - # Cleanup - agent.cleanup() - print("\nโœ… Enhanced GAIA system test complete!") - -if __name__ == "__main__": - test_enhanced_gaia_system() \ No newline at end of file