DeL-TaiseiOzaki commited on
Commit
1820fc3
·
verified ·
1 Parent(s): cefab8e

Update core/file_scanner.py

Browse files
Files changed (1) hide show
  1. core/file_scanner.py +29 -1
core/file_scanner.py CHANGED
@@ -41,4 +41,32 @@ class FileScanner:
41
  raw_data = f.read(4096)
42
  result = chardet.detect(raw_data)
43
  encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'
44
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  raw_data = f.read(4096)
42
  result = chardet.detect(raw_data)
43
  encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'
44
+ try:
45
+ with file_path.open('r', encoding=encoding) as f:
46
+ return f.read(), encoding
47
+ except UnicodeDecodeError:
48
+ try:
49
+ with file_path.open('r', encoding='cp932') as f:
50
+ return f.read(), 'cp932'
51
+ except UnicodeDecodeError:
52
+ return None, None
53
+ except (OSError, ValueError):
54
+ return None, None
55
+
56
+ def scan_files(self) -> List[FileInfo]:
57
+ if not self.base_dir.exists():
58
+ raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}")
59
+
60
+ files = []
61
+ for entry in self.base_dir.glob("**/*"):
62
+ if entry.is_file() and self._should_scan_file(entry):
63
+ content, encoding = self._read_file_content(entry)
64
+ if content is not None:
65
+ files.append(FileInfo(
66
+ path=entry.absolute(),
67
+ size=entry.stat().st_size,
68
+ extension=entry.suffix.lower(),
69
+ content=content,
70
+ encoding=encoding
71
+ ))
72
+ return sorted(files, key=lambda x: str(x.path))