|
from pathlib import Path |
|
from typing import List, Dict, Optional |
|
from dataclasses import dataclass |
|
import chardet |
|
|
|
@dataclass |
|
class FileInfo: |
|
path: Path |
|
size: int |
|
extension: str |
|
content: Optional[str] = None |
|
encoding: Optional[str] = None |
|
|
|
@property |
|
def formatted_size(self) -> str: |
|
if self.size < 1024: |
|
return f"{self.size} B" |
|
elif self.size < 1024 * 1024: |
|
return f"{self.size/1024:.1f} KB" |
|
else: |
|
return f"{self.size/(1024*1024):.1f} MB" |
|
|
|
class FileScanner: |
|
TARGET_EXTENSIONS = {'.py', '.sh', '.rb', '.js', '.ts', '.java', '.cpp', |
|
'.hpp', '.c', '.h', '.go', '.rs', '.php', '.json', |
|
'.yml', '.yaml', '.toml', '.ini', '.md', '.txt'} |
|
|
|
EXCLUDED_DIRS = {'.git', '__pycache__', 'node_modules', 'venv', '.env'} |
|
MAX_FILE_SIZE = 1 * 1024 * 1024 |
|
|
|
def __init__(self, base_dir: Path): |
|
self.base_dir = base_dir |
|
|
|
def scan_files(self) -> List[FileInfo]: |
|
if not self.base_dir.exists(): |
|
raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}") |
|
|
|
files = [] |
|
|
|
for entry in self.base_dir.glob("**/*"): |
|
if (entry.is_file() and |
|
entry.suffix.lower() in self.TARGET_EXTENSIONS and |
|
not any(excluded in entry.parts for excluded in self.EXCLUDED_DIRS) and |
|
entry.stat().st_size <= self.MAX_FILE_SIZE): |
|
|
|
try: |
|
with entry.open('rb') as f: |
|
raw_data = f.read(4096) |
|
encoding = chardet.detect(raw_data)['encoding'] or 'utf-8' |
|
|
|
with entry.open('r', encoding=encoding) as f: |
|
content = f.read() |
|
|
|
files.append(FileInfo( |
|
path=entry.absolute(), |
|
size=entry.stat().st_size, |
|
extension=entry.suffix.lower(), |
|
content=content, |
|
encoding=encoding |
|
)) |
|
except: |
|
continue |
|
|
|
return sorted(files, key=lambda x: str(x.path)) |
|
|