multi-agent-gaia-system / enhanced_gaia_tools.py
Omachoko
πŸš€ ULTIMATE GAIA Enhancement: 25+ Tool Arsenal
26eff0c
raw
history blame
18.9 kB
#!/usr/bin/env python3
"""
πŸš€ Enhanced GAIA Tools - Complete Tool Arsenal
Additional specialized tools for 100% GAIA benchmark compliance
"""
import os
import logging
import tempfile
import requests
from typing import Dict, Any, List, Optional
logger = logging.getLogger(__name__)
class EnhancedGAIATools:
"""πŸ› οΈ Complete toolkit for GAIA benchmark excellence"""
def __init__(self, hf_token: str = None, openai_key: str = None):
self.hf_token = hf_token or os.getenv('HF_TOKEN')
self.openai_key = openai_key or os.getenv('OPENAI_API_KEY')
# === ENHANCED DOCUMENT PROCESSING ===
def read_docx(self, file_path: str) -> str:
"""πŸ“„ Read Microsoft Word documents"""
try:
import docx2txt
text = docx2txt.process(file_path)
logger.info(f"πŸ“„ DOCX read: {len(text)} characters")
return text
except ImportError:
logger.warning("⚠️ docx2txt not available. Install python-docx.")
return "❌ DOCX reading unavailable. Install python-docx."
except Exception as e:
logger.error(f"❌ DOCX reading error: {e}")
return f"❌ DOCX reading failed: {e}"
def read_excel(self, file_path: str, sheet_name: str = None) -> str:
"""πŸ“Š Read Excel spreadsheets"""
try:
import pandas as pd
if sheet_name:
df = pd.read_excel(file_path, sheet_name=sheet_name)
else:
df = pd.read_excel(file_path)
# Convert to readable format
result = f"Excel data ({df.shape[0]} rows, {df.shape[1]} columns):\n"
result += df.to_string(max_rows=50, max_cols=10)
logger.info(f"πŸ“Š Excel read: {df.shape}")
return result
except ImportError:
logger.warning("⚠️ pandas not available for Excel reading.")
return "❌ Excel reading unavailable. Install pandas and openpyxl."
except Exception as e:
logger.error(f"❌ Excel reading error: {e}")
return f"❌ Excel reading failed: {e}"
def read_csv(self, file_path: str) -> str:
"""πŸ“‹ Read CSV files"""
try:
import pandas as pd
df = pd.read_csv(file_path)
# Convert to readable format
result = f"CSV data ({df.shape[0]} rows, {df.shape[1]} columns):\n"
result += df.head(20).to_string()
if df.shape[0] > 20:
result += f"\n... (showing first 20 of {df.shape[0]} rows)"
logger.info(f"πŸ“‹ CSV read: {df.shape}")
return result
except ImportError:
logger.warning("⚠️ pandas not available for CSV reading.")
return "❌ CSV reading unavailable. Install pandas."
except Exception as e:
logger.error(f"❌ CSV reading error: {e}")
return f"❌ CSV reading failed: {e}"
def read_text_file(self, file_path: str, encoding: str = 'utf-8') -> str:
"""πŸ“ Read plain text files with encoding detection"""
try:
# Try UTF-8 first
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
except UnicodeDecodeError:
# Try other common encodings
encodings = ['latin-1', 'cp1252', 'ascii']
content = None
for enc in encodings:
try:
with open(file_path, 'r', encoding=enc) as f:
content = f.read()
break
except UnicodeDecodeError:
continue
if content is None:
return "❌ Unable to decode text file with common encodings"
logger.info(f"πŸ“ Text file read: {len(content)} characters")
return content[:10000] + ("..." if len(content) > 10000 else "")
except Exception as e:
logger.error(f"❌ Text file reading error: {e}")
return f"❌ Text file reading failed: {e}"
def extract_archive(self, file_path: str) -> str:
"""πŸ“¦ Extract and list archive contents (ZIP, RAR, etc.)"""
try:
import zipfile
import os
if file_path.endswith('.zip'):
with zipfile.ZipFile(file_path, 'r') as zip_ref:
file_list = zip_ref.namelist()
extract_dir = os.path.join(os.path.dirname(file_path), 'extracted')
os.makedirs(extract_dir, exist_ok=True)
zip_ref.extractall(extract_dir)
result = f"πŸ“¦ ZIP archive extracted to {extract_dir}\n"
result += f"Contents ({len(file_list)} files):\n"
result += "\n".join(file_list[:20])
if len(file_list) > 20:
result += f"\n... (showing first 20 of {len(file_list)} files)"
logger.info(f"πŸ“¦ ZIP extracted: {len(file_list)} files")
return result
else:
return f"❌ Unsupported archive format: {file_path}"
except Exception as e:
logger.error(f"❌ Archive extraction error: {e}")
return f"❌ Archive extraction failed: {e}"
# === ENHANCED WEB BROWSING ===
def browse_with_js(self, url: str) -> str:
"""🌐 Enhanced web browsing with JavaScript support (when available)"""
try:
# Try playwright for dynamic content
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url, timeout=15000)
page.wait_for_timeout(2000) # Wait for JS to load
content = page.content()
browser.close()
# Parse content
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')
# Remove scripts and styles
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
# Clean up whitespace
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
clean_text = ' '.join(chunk for chunk in chunks if chunk)
logger.info(f"🌐 JS-enabled browsing: {url} - {len(clean_text)} chars")
return clean_text[:5000] + ("..." if len(clean_text) > 5000 else "")
except ImportError:
logger.info("⚠️ Playwright not available, using requests fallback")
return self._fallback_browse(url)
except Exception as e:
logger.warning(f"⚠️ JS browsing failed: {e}, falling back to basic")
return self._fallback_browse(url)
def _fallback_browse(self, url: str) -> str:
"""🌐 Fallback web browsing using requests"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
}
response = requests.get(url, headers=headers, timeout=15, allow_redirects=True)
response.raise_for_status()
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
# Clean up whitespace
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
clean_text = ' '.join(chunk for chunk in chunks if chunk)
logger.info(f"🌐 Basic browsing: {url} - {len(clean_text)} chars")
return clean_text[:5000] + ("..." if len(clean_text) > 5000 else "")
except Exception as e:
logger.error(f"❌ Web browsing error: {e}")
return f"❌ Web browsing failed: {e}"
# === ENHANCED GAIA FILE HANDLING ===
def download_gaia_file(self, task_id: str, file_name: str = None) -> str:
"""πŸ“₯ Enhanced GAIA file download with comprehensive format support"""
try:
# GAIA API endpoint for file downloads
api_base = "https://agents-course-unit4-scoring.hf.space"
file_url = f"{api_base}/files/{task_id}"
logger.info(f"πŸ“₯ Downloading GAIA file for task: {task_id}")
headers = {
'User-Agent': 'GAIA-Agent/1.0 (Enhanced)',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
}
response = requests.get(file_url, headers=headers, timeout=30, stream=True)
if response.status_code == 200:
# Determine file extension from headers or filename
content_type = response.headers.get('content-type', '')
content_disposition = response.headers.get('content-disposition', '')
# Extract filename from Content-Disposition header
if file_name:
filename = file_name
elif 'filename=' in content_disposition:
filename = content_disposition.split('filename=')[1].strip('"\'')
else:
# Guess extension from content type
extension_map = {
'image/jpeg': '.jpg',
'image/png': '.png',
'image/gif': '.gif',
'application/pdf': '.pdf',
'text/plain': '.txt',
'application/json': '.json',
'text/csv': '.csv',
'application/vnd.ms-excel': '.xlsx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
'application/msword': '.docx',
'video/mp4': '.mp4',
'audio/mpeg': '.mp3',
'audio/wav': '.wav',
'application/zip': '.zip',
}
extension = extension_map.get(content_type, '.tmp')
filename = f"gaia_file_{task_id}{extension}"
# Save file
import tempfile
import os
temp_dir = tempfile.gettempdir()
filepath = os.path.join(temp_dir, filename)
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
file_size = os.path.getsize(filepath)
logger.info(f"πŸ“₯ GAIA file downloaded: {filepath} ({file_size} bytes)")
# Automatically process based on file type
return self.process_downloaded_file(filepath, task_id)
else:
error_msg = f"❌ GAIA file download failed: HTTP {response.status_code}"
logger.error(error_msg)
return error_msg
except Exception as e:
error_msg = f"❌ GAIA file download error: {e}"
logger.error(error_msg)
return error_msg
def process_downloaded_file(self, filepath: str, task_id: str) -> str:
"""πŸ“‹ Process downloaded GAIA files based on their type"""
try:
import os
filename = os.path.basename(filepath)
file_ext = os.path.splitext(filename)[1].lower()
logger.info(f"πŸ“‹ Processing GAIA file: {filename} (type: {file_ext})")
result = f"πŸ“ GAIA File: {filename} (Task: {task_id})\n\n"
# Process based on file type
if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
# Image file - return file path for image analysis
result += f"πŸ–ΌοΈ Image file ready for analysis: {filepath}\n"
result += f"File type: {file_ext}, Path: {filepath}"
elif file_ext == '.pdf':
# PDF document
pdf_content = self.read_pdf(filepath)
result += f"πŸ“„ PDF Content:\n{pdf_content}\n"
elif file_ext in ['.txt', '.md', '.py', '.js', '.html', '.css']:
# Text files
text_content = self.read_text_file(filepath)
result += f"πŸ“ Text Content:\n{text_content}\n"
elif file_ext in ['.csv']:
# CSV files
csv_content = self.read_csv(filepath)
result += f"πŸ“Š CSV Data:\n{csv_content}\n"
elif file_ext in ['.xlsx', '.xls']:
# Excel files
excel_content = self.read_excel(filepath)
result += f"πŸ“ˆ Excel Data:\n{excel_content}\n"
elif file_ext in ['.docx']:
# Word documents
docx_content = self.read_docx(filepath)
result += f"πŸ“„ Word Document:\n{docx_content}\n"
elif file_ext in ['.mp4', '.avi', '.mov', '.wmv']:
# Video files - return path for video analysis
result += f"πŸŽ₯ Video file ready for analysis: {filepath}\n"
result += f"File type: {file_ext}, Path: {filepath}"
elif file_ext in ['.mp3', '.wav', '.m4a', '.flac']:
# Audio files - return path for audio analysis
result += f"🎡 Audio file ready for analysis: {filepath}\n"
result += f"File type: {file_ext}, Path: {filepath}"
elif file_ext in ['.zip', '.rar']:
# Archive files
archive_result = self.extract_archive(filepath)
result += f"πŸ“¦ Archive Contents:\n{archive_result}\n"
elif file_ext in ['.json']:
# JSON files
try:
import json
with open(filepath, 'r') as f:
json_data = json.load(f)
result += f"πŸ“‹ JSON Data:\n{json.dumps(json_data, indent=2)[:2000]}\n"
except Exception as e:
result += f"❌ JSON parsing error: {e}\n"
else:
# Unknown file type - try as text
try:
text_content = self.read_text_file(filepath)
result += f"πŸ“„ Raw Content:\n{text_content}\n"
except:
result += f"❌ Unsupported file type: {file_ext}\n"
# Add file metadata
file_size = os.path.getsize(filepath)
result += f"\nπŸ“Š File Info: {file_size} bytes, Path: {filepath}"
return result
except Exception as e:
error_msg = f"❌ File processing error: {e}"
logger.error(error_msg)
return error_msg
def read_pdf(self, file_path: str) -> str:
"""πŸ“„ Read PDF with fallback to raw text"""
try:
import PyPDF2
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page_num, page in enumerate(pdf_reader.pages):
try:
page_text = page.extract_text()
text += page_text + "\n"
except Exception as e:
text += f"[Page {page_num + 1} extraction failed: {e}]\n"
logger.info(f"πŸ“„ PDF read: {len(pdf_reader.pages)} pages, {len(text)} chars")
return text
except ImportError:
return "❌ PDF reading unavailable. Install PyPDF2."
except Exception as e:
logger.error(f"❌ PDF reading error: {e}")
return f"❌ PDF reading failed: {e}"
# === UTILITY METHODS ===
def get_available_tools(self) -> List[str]:
"""πŸ“‹ List all available enhanced tools"""
return [
"read_docx", "read_excel", "read_csv", "read_text_file", "extract_archive",
"browse_with_js", "download_gaia_file", "process_downloaded_file",
"read_pdf"
]
def tool_description(self, tool_name: str) -> str:
"""πŸ“– Get description of a specific tool"""
descriptions = {
"read_docx": "πŸ“„ Read Microsoft Word documents (.docx)",
"read_excel": "πŸ“Š Read Excel spreadsheets (.xlsx, .xls)",
"read_csv": "πŸ“‹ Read CSV files with pandas",
"read_text_file": "πŸ“ Read text files with encoding detection",
"extract_archive": "πŸ“¦ Extract ZIP archives and list contents",
"browse_with_js": "🌐 Enhanced web browsing with JavaScript support",
"download_gaia_file": "πŸ“₯ Download GAIA benchmark files via API",
"process_downloaded_file": "πŸ“‹ Automatically process files by type",
"read_pdf": "πŸ“„ Read PDF documents with PyPDF2",
}
return descriptions.get(tool_name, f"❓ Unknown tool: {tool_name}")
# Test function
def test_enhanced_tools():
"""πŸ§ͺ Test enhanced GAIA tools"""
print("πŸ§ͺ Testing Enhanced GAIA Tools")
tools = EnhancedGAIATools()
print("\nπŸ“‹ Available tools:")
for tool in tools.get_available_tools():
print(f" - {tool}: {tools.tool_description(tool)}")
print("\nβœ… Enhanced tools ready for GAIA benchmark!")
if __name__ == "__main__":
test_enhanced_tools()