File size: 2,289 Bytes
230b1a5
 
 
560aacd
230b1a5
 
 
 
560aacd
 
230b1a5
560aacd
 
 
 
 
 
 
 
 
 
230b1a5
 
560aacd
 
 
230b1a5
560aacd
 
230b1a5
 
 
 
 
 
560aacd
230b1a5
 
 
560aacd
 
 
 
 
 
 
 
 
 
 
 
 
 
230b1a5
560aacd
 
 
 
 
230b1a5
560aacd
 
230b1a5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass
import chardet

@dataclass
class FileInfo:
    path: Path
    size: int
    extension: str
    content: Optional[str] = None
    encoding: Optional[str] = None
    
    @property
    def formatted_size(self) -> str:
        if self.size < 1024:
            return f"{self.size} B"
        elif self.size < 1024 * 1024:
            return f"{self.size/1024:.1f} KB"
        else:
            return f"{self.size/(1024*1024):.1f} MB"

class FileScanner:
    TARGET_EXTENSIONS = {'.py', '.sh', '.rb', '.js', '.ts', '.java', '.cpp', 
                        '.hpp', '.c', '.h', '.go', '.rs', '.php', '.json', 
                        '.yml', '.yaml', '.toml', '.ini', '.md', '.txt'}
    
    EXCLUDED_DIRS = {'.git', '__pycache__', 'node_modules', 'venv', '.env'}
    MAX_FILE_SIZE = 1 * 1024 * 1024
    
    def __init__(self, base_dir: Path):
        self.base_dir = base_dir
    
    def scan_files(self) -> List[FileInfo]:
        if not self.base_dir.exists():
            raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}")
        
        files = []
        
        for entry in self.base_dir.glob("**/*"):
            if (entry.is_file() and 
                entry.suffix.lower() in self.TARGET_EXTENSIONS and
                not any(excluded in entry.parts for excluded in self.EXCLUDED_DIRS) and
                entry.stat().st_size <= self.MAX_FILE_SIZE):
                
                try:
                    with entry.open('rb') as f:
                        raw_data = f.read(4096)
                        encoding = chardet.detect(raw_data)['encoding'] or 'utf-8'
                    
                    with entry.open('r', encoding=encoding) as f:
                        content = f.read()
                    
                    files.append(FileInfo(
                        path=entry.absolute(),
                        size=entry.stat().st_size,
                        extension=entry.suffix.lower(),
                        content=content,
                        encoding=encoding
                    ))
                except:
                    continue
        
        return sorted(files, key=lambda x: str(x.path))