Repository_Scaner / core /file_scanner.py
DeL-TaiseiOzaki's picture
Update core/file_scanner.py
cefab8e verified
raw
history blame
1.42 kB
from pathlib import Path
from typing import List, Dict, Optional, Set
from dataclasses import dataclass
import chardet
@dataclass
class FileInfo:
path: Path
size: int
extension: str
content: Optional[str] = None
encoding: Optional[str] = None
@property
def formatted_size(self) -> str:
if self.size < 1024:
return f"{self.size} B"
elif self.size < 1024 * 1024:
return f"{self.size/1024:.1f} KB"
else:
return f"{self.size/(1024*1024):.1f} MB"
class FileScanner:
EXCLUDED_DIRS = {
'.git', '__pycache__', 'node_modules', 'venv',
'.env', 'build', 'dist', 'target', 'bin', 'obj'
}
def __init__(self, base_dir: Path, target_extensions: Set[str]):
self.base_dir = base_dir
self.target_extensions = target_extensions
def _should_scan_file(self, path: Path) -> bool:
if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS):
return False
return path.suffix.lower() in self.target_extensions
def _read_file_content(self, file_path: Path) -> tuple[Optional[str], Optional[str]]:
try:
with file_path.open('rb') as f:
raw_data = f.read(4096)
result = chardet.detect(raw_data)
encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'