Spaces:
Runtime error
Runtime error
from typing import Dict, List, Tuple, Optional | |
import json | |
import sys | |
import glob | |
from pathlib import Path | |
from collections import defaultdict | |
def get_latest_log() -> str: | |
"""Find the most recently modified log file in the current directory. | |
Returns: | |
str: Path to the most recently modified log file | |
Raises: | |
SystemExit: If no log files are found in current directory | |
""" | |
log_pattern = "api_usage_*.json" | |
logs = list(Path(".").glob(log_pattern)) | |
if not logs: | |
print(f"No files matching pattern '{log_pattern}' found in current directory") | |
sys.exit(1) | |
return str(max(logs, key=lambda p: p.stat().st_mtime)) | |
def analyze_log_file(filename: str) -> Tuple[List[Dict], List[Dict], Dict[str, List[str]]]: | |
"""Analyze a log file for entries missing images and errors. | |
Args: | |
filename: Path to the log file to analyze | |
Returns: | |
Tuple containing: | |
- List of entries with no images | |
- List of skipped/error entries | |
- Dict of processing errors by type | |
Raises: | |
SystemExit: If file cannot be found or read | |
""" | |
no_images = [] | |
errors = defaultdict(list) | |
skipped = [] | |
try: | |
with open(filename, "r") as f: | |
for line_num, line in enumerate(f, 1): | |
# Skip HTTP request logs | |
if line.startswith("HTTP Request:") or line.strip() == "": | |
continue | |
try: | |
# Try to parse the JSON line | |
if not line.strip().startswith("{"): | |
continue | |
entry = json.loads(line.strip()) | |
case_id = entry.get("case_id") | |
question_id = entry.get("question_id") | |
# Skip if we can't identify the question | |
if not case_id or not question_id: | |
continue | |
# Check for explicit skip/error status | |
if entry.get("status") in ["skipped", "error"]: | |
skipped.append( | |
{ | |
"case_id": case_id, | |
"question_id": question_id, | |
"reason": entry.get("reason"), | |
"status": entry.get("status"), | |
} | |
) | |
continue | |
# Check user content for images | |
messages = entry.get("input", {}).get("messages", []) | |
has_image = False | |
for msg in messages: | |
content = msg.get("content", []) | |
if isinstance(content, list): | |
for item in content: | |
if isinstance(item, dict) and item.get("type") == "image_url": | |
has_image = True | |
break | |
if not has_image: | |
no_images.append( | |
{ | |
"case_id": case_id, | |
"question_id": question_id, | |
"question": entry.get("input", {}) | |
.get("question_data", {}) | |
.get("question", "")[:100] | |
+ "...", # First 100 chars of question | |
} | |
) | |
except json.JSONDecodeError: | |
errors["json_decode"].append(f"Line {line_num}: Invalid JSON") | |
continue | |
except Exception as e: | |
errors["other"].append(f"Line {line_num}: Error processing entry: {str(e)}") | |
except FileNotFoundError: | |
print(f"Error: Could not find log file: {filename}") | |
sys.exit(1) | |
except Exception as e: | |
print(f"Error reading file {filename}: {str(e)}") | |
sys.exit(1) | |
return no_images, skipped, errors | |
def print_results( | |
filename: str, no_images: List[Dict], skipped: List[Dict], errors: Dict[str, List[str]] | |
) -> None: | |
"""Print analysis results. | |
Args: | |
filename: Name of the analyzed log file | |
no_images: List of entries with no images | |
skipped: List of skipped/error entries | |
errors: Dict of processing errors by type | |
""" | |
print(f"\nAnalyzing log file: {filename}") | |
print("\n=== Questions with No Images ===") | |
if no_images: | |
for entry in no_images: | |
print(f"\nCase ID: {entry['case_id']}") | |
print(f"Question ID: {entry['question_id']}") | |
print(f"Question Preview: {entry['question']}") | |
print(f"\nTotal questions without images: {len(no_images)}") | |
print("\n=== Skipped/Error Questions ===") | |
if skipped: | |
for entry in skipped: | |
print(f"\nCase ID: {entry['case_id']}") | |
print(f"Question ID: {entry['question_id']}") | |
print(f"Status: {entry['status']}") | |
print(f"Reason: {entry.get('reason', 'unknown')}") | |
print(f"\nTotal skipped/error questions: {len(skipped)}") | |
if errors: | |
print("\n=== Processing Errors ===") | |
for error_type, messages in errors.items(): | |
if messages: | |
print(f"\n{error_type}:") | |
for msg in messages: | |
print(f" {msg}") | |
def main() -> None: | |
"""Main entry point for log validation script.""" | |
# If a file is specified as an argument, use it; otherwise find the latest log | |
if len(sys.argv) > 1: | |
log_file = sys.argv[1] | |
else: | |
log_file = get_latest_log() | |
no_images, skipped, errors = analyze_log_file(log_file) | |
print_results(log_file, no_images, skipped, errors) | |
if __name__ == "__main__": | |
main() | |