Spaces:
Runtime error
Runtime error
File size: 5,890 Bytes
d7a7846 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
from typing import Dict, List, Tuple, Optional
import json
import sys
import glob
from pathlib import Path
from collections import defaultdict
def get_latest_log() -> str:
"""Find the most recently modified log file in the current directory.
Returns:
str: Path to the most recently modified log file
Raises:
SystemExit: If no log files are found in current directory
"""
log_pattern = "api_usage_*.json"
logs = list(Path(".").glob(log_pattern))
if not logs:
print(f"No files matching pattern '{log_pattern}' found in current directory")
sys.exit(1)
return str(max(logs, key=lambda p: p.stat().st_mtime))
def analyze_log_file(filename: str) -> Tuple[List[Dict], List[Dict], Dict[str, List[str]]]:
"""Analyze a log file for entries missing images and errors.
Args:
filename: Path to the log file to analyze
Returns:
Tuple containing:
- List of entries with no images
- List of skipped/error entries
- Dict of processing errors by type
Raises:
SystemExit: If file cannot be found or read
"""
no_images = []
errors = defaultdict(list)
skipped = []
try:
with open(filename, "r") as f:
for line_num, line in enumerate(f, 1):
# Skip HTTP request logs
if line.startswith("HTTP Request:") or line.strip() == "":
continue
try:
# Try to parse the JSON line
if not line.strip().startswith("{"):
continue
entry = json.loads(line.strip())
case_id = entry.get("case_id")
question_id = entry.get("question_id")
# Skip if we can't identify the question
if not case_id or not question_id:
continue
# Check for explicit skip/error status
if entry.get("status") in ["skipped", "error"]:
skipped.append(
{
"case_id": case_id,
"question_id": question_id,
"reason": entry.get("reason"),
"status": entry.get("status"),
}
)
continue
# Check user content for images
messages = entry.get("input", {}).get("messages", [])
has_image = False
for msg in messages:
content = msg.get("content", [])
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "image_url":
has_image = True
break
if not has_image:
no_images.append(
{
"case_id": case_id,
"question_id": question_id,
"question": entry.get("input", {})
.get("question_data", {})
.get("question", "")[:100]
+ "...", # First 100 chars of question
}
)
except json.JSONDecodeError:
errors["json_decode"].append(f"Line {line_num}: Invalid JSON")
continue
except Exception as e:
errors["other"].append(f"Line {line_num}: Error processing entry: {str(e)}")
except FileNotFoundError:
print(f"Error: Could not find log file: {filename}")
sys.exit(1)
except Exception as e:
print(f"Error reading file {filename}: {str(e)}")
sys.exit(1)
return no_images, skipped, errors
def print_results(
filename: str, no_images: List[Dict], skipped: List[Dict], errors: Dict[str, List[str]]
) -> None:
"""Print analysis results.
Args:
filename: Name of the analyzed log file
no_images: List of entries with no images
skipped: List of skipped/error entries
errors: Dict of processing errors by type
"""
print(f"\nAnalyzing log file: {filename}")
print("\n=== Questions with No Images ===")
if no_images:
for entry in no_images:
print(f"\nCase ID: {entry['case_id']}")
print(f"Question ID: {entry['question_id']}")
print(f"Question Preview: {entry['question']}")
print(f"\nTotal questions without images: {len(no_images)}")
print("\n=== Skipped/Error Questions ===")
if skipped:
for entry in skipped:
print(f"\nCase ID: {entry['case_id']}")
print(f"Question ID: {entry['question_id']}")
print(f"Status: {entry['status']}")
print(f"Reason: {entry.get('reason', 'unknown')}")
print(f"\nTotal skipped/error questions: {len(skipped)}")
if errors:
print("\n=== Processing Errors ===")
for error_type, messages in errors.items():
if messages:
print(f"\n{error_type}:")
for msg in messages:
print(f" {msg}")
def main() -> None:
"""Main entry point for log validation script."""
# If a file is specified as an argument, use it; otherwise find the latest log
if len(sys.argv) > 1:
log_file = sys.argv[1]
else:
log_file = get_latest_log()
no_images, skipped, errors = analyze_log_file(log_file)
print_results(log_file, no_images, skipped, errors)
if __name__ == "__main__":
main()
|