Spaces:
Sleeping
Sleeping
from pathlib import Path | |
from typing import List, Dict, Optional, Set | |
from dataclasses import dataclass | |
import chardet | |
class FileInfo: | |
path: Path | |
size: int | |
extension: str | |
content: Optional[str] = None | |
encoding: Optional[str] = None | |
def formatted_size(self) -> str: | |
if self.size < 1024: | |
return f"{self.size} B" | |
elif self.size < 1024 * 1024: | |
return f"{self.size/1024:.1f} KB" | |
else: | |
return f"{self.size/(1024*1024):.1f} MB" | |
class FileScanner: | |
EXCLUDED_DIRS = { | |
'.git', '__pycache__', 'node_modules', 'venv', | |
'.env', 'build', 'dist', 'target', 'bin', 'obj' | |
} | |
def __init__(self, base_dir: Path, target_extensions: Set[str]): | |
self.base_dir = base_dir | |
self.target_extensions = target_extensions | |
def _should_scan_file(self, path: Path) -> bool: | |
if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS): | |
return False | |
return path.suffix.lower() in self.target_extensions | |
def _read_file_content(self, file_path: Path) -> tuple[Optional[str], Optional[str]]: | |
try: | |
with file_path.open('rb') as f: | |
raw_data = f.read(4096) | |
result = chardet.detect(raw_data) | |
encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8' | |
try: | |
with file_path.open('r', encoding=encoding) as f: | |
return f.read(), encoding | |
except UnicodeDecodeError: | |
try: | |
with file_path.open('r', encoding='cp932') as f: | |
return f.read(), 'cp932' | |
except UnicodeDecodeError: | |
return None, None | |
except (OSError, ValueError): | |
return None, None | |
def scan_files(self) -> List[FileInfo]: | |
if not self.base_dir.exists(): | |
raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}") | |
files = [] | |
for entry in self.base_dir.glob("**/*"): | |
if entry.is_file() and self._should_scan_file(entry): | |
content, encoding = self._read_file_content(entry) | |
if content is not None: | |
files.append(FileInfo( | |
path=entry.absolute(), | |
size=entry.stat().st_size, | |
extension=entry.suffix.lower(), | |
content=content, | |
encoding=encoding | |
)) | |
return sorted(files, key=lambda x: str(x.path)) | |