|
from pathlib import Path |
|
from typing import List, Dict, Optional |
|
from dataclasses import dataclass |
|
|
|
@dataclass |
|
class FileInfo: |
|
path: Path |
|
content: Optional[str] = None |
|
|
|
class FileScanner: |
|
|
|
TARGET_EXTENSIONS = { |
|
'.py', '.js', '.java', '.cpp', '.hpp', '.c', '.h', |
|
'.go', '.rs', '.php', '.rb', '.ts', '.scala', '.kt', |
|
'.cs', '.swift', '.m', '.sh', '.pl', '.r' |
|
} |
|
|
|
|
|
EXCLUDED_DIRS = { |
|
'.git', '__pycache__', 'node_modules', 'venv', '.env', |
|
'build', 'dist', 'target', 'bin', 'obj' |
|
} |
|
|
|
def __init__(self, base_dir: Path): |
|
self.base_dir = base_dir |
|
|
|
def _should_scan_file(self, path: Path) -> bool: |
|
if any(excluded in path.parts for excluded in self.EXCLUDED_DIRS): |
|
return False |
|
return path.suffix.lower() in self.TARGET_EXTENSIONS |
|
|
|
def _read_file_content(self, file_path: Path) -> Optional[str]: |
|
try: |
|
|
|
try: |
|
with file_path.open('r', encoding='utf-8') as f: |
|
return f.read() |
|
except UnicodeDecodeError: |
|
|
|
with file_path.open('r', encoding='cp932') as f: |
|
return f.read() |
|
except (OSError, UnicodeDecodeError): |
|
return None |
|
|
|
def scan_files(self) -> List[FileInfo]: |
|
if not self.base_dir.exists(): |
|
raise FileNotFoundError(f"Directory not found: {self.base_dir}") |
|
|
|
files = [] |
|
|
|
for entry in self.base_dir.rglob('*'): |
|
if entry.is_file() and self._should_scan_file(entry): |
|
content = self._read_file_content(entry) |
|
if content is not None: |
|
files.append(FileInfo( |
|
path=entry.relative_to(self.base_dir), |
|
content=content |
|
)) |
|
|
|
return sorted(files, key=lambda x: str(x.path)) |
|
|