File size: 13,342 Bytes
d26280a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 |
"""
file_selector.py
This module offers interactive file selection for projects. Leveraging a terminal-based,
tree-structured display, users can navigate and select files for editing or processing.
It integrates with system editors for direct file modification and supports saving
selections for later use. Designed for efficient workflow enhancement in file-intensive
environments, it offers customizable file filtering and seamless editor integration.
Key Components:
- FileSelector: Manages file selection and interaction.
- DisplayablePath: Provides a structured view of file paths.
Usage:
Typically used in project setup or management phases for selecting specific files.
It operates within the GPT-Engineer environment, relying on core functionalities for
file handling and persistence.
"""
import os
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Union
import toml
from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.default.paths import metadata_path
from gpt_engineer.core.files_dict import FilesDict
class FileSelector:
IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"}
FILE_LIST_NAME = "file_selection.toml"
COMMENT = (
"# Remove '#' to select a file.\n\n"
"# gpt-engineer can only read selected files. "
"Including irrelevant files will degrade performance, "
"cost additional tokens and potentially overflow token limit.\n\n"
)
def __init__(self, project_path: Union[str, Path]):
self.project_path = project_path
self.metadata_db = DiskMemory(metadata_path(self.project_path))
self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME
def ask_for_files(self) -> FilesDict:
"""
Asks the user to select files for the purpose of context improvement.
It supports selection from the terminal or using a previously saved list.
"""
if os.getenv("GPTE_TEST_MODE"):
# In test mode, retrieve files from a predefined TOML configuration
assert self.FILE_LIST_NAME in self.metadata_db
selected_files = self.get_files_from_toml(self.project_path, self.toml_path)
else:
# Otherwise, use the editor file selector for interactive selection
if self.FILE_LIST_NAME in self.metadata_db:
print(
f"File list detected at {self.toml_path}. Edit or delete it if you want to select new files."
)
selected_files = self.editor_file_selector(self.project_path, False)
else:
selected_files = self.editor_file_selector(self.project_path, True)
content_dict = {}
for file_path in selected_files:
# selected files contains paths that are relative to the project path
try:
# to open the file we need the path from the cwd
with open(Path(self.project_path) / file_path, "r") as content:
content_dict[str(file_path)] = content.read()
except FileNotFoundError:
print(f"Warning: File not found {file_path}")
return FilesDict(content_dict)
def editor_file_selector(
self, input_path: str | Path, init: bool = True
) -> List[str]:
"""
Provides an interactive file selection interface by generating a tree representation in a .toml file.
Allows users to select or deselect files for the context improvement process.
"""
root_path = Path(input_path)
tree_dict = {}
toml_file = DiskMemory(metadata_path(input_path)).path / "file_selection.toml"
# Define the toml file path
# Initialize .toml file with file tree if in initial state
if init:
tree_dict = {x: "selected" for x in self.get_current_files(root_path)}
s = toml.dumps({"files": tree_dict})
# add comments on all lines that match = "selected"
s = "\n".join(
[
"# " + line if line.endswith(' = "selected"') else line
for line in s.split("\n")
]
)
# Write to the toml file
with open(toml_file, "w") as f:
f.write(self.COMMENT)
f.write(s)
else:
# Load existing files from the .toml configuration
all_files = self.get_current_files(root_path)
s = toml.dumps({"files": {x: "selected" for x in all_files}})
with open(toml_file, "r") as file:
selected_files = toml.load(file)
lines = s.split("\n")
s = "\n".join(
lines[:1]
+ [
line
if line.split(" = ")[0].strip('"') in selected_files["files"]
else "# " + line
for line in lines[1:]
]
)
# Write the merged list back to the .toml for user review and modification
with open(toml_file, "w") as file:
file.write(self.COMMENT) # Ensure to write the comment
file.write(s)
print(
"Please select and deselect (add # in front) files, save it, and close it to continue..."
)
self.open_with_default_editor(
toml_file
) # Open the .toml file in the default editor for user modification
return self.get_files_from_toml(
input_path, toml_file
) # Return the list of selected files after user edits
def open_with_default_editor(self, file_path):
"""
Attempts to open the specified file using the system's default text editor or a common fallback editor.
"""
editors = [
"gedit",
"notepad",
"nvim",
"write",
"nano",
"vim",
"emacs",
] # Putting the beginner-friendly text editor forward
chosen_editor = os.environ.get("EDITOR")
# Try the preferred editor first, then fallback to common editors
if chosen_editor:
try:
subprocess.run([chosen_editor, file_path])
return
except Exception:
pass
for editor in editors:
try:
subprocess.run([editor, file_path])
return
except Exception:
continue
print("No suitable text editor found. Please edit the file manually.")
def is_utf8(self, file_path):
"""
Determines if the file is UTF-8 encoded by trying to read and decode it.
Useful for ensuring that files are in a readable and compatible format.
"""
try:
with open(file_path, "rb") as file:
file.read().decode("utf-8")
return True
except UnicodeDecodeError:
return False
def get_files_from_toml(self, input_path, toml_file):
"""
Retrieves the list of files selected by the user from a .toml configuration file.
This function parses the .toml file and returns the list of selected files.
"""
selected_files = []
edited_tree = toml.load(toml_file) # Load the edited .toml file
# Iterate through the files in the .toml and append selected files to the list
for file, _ in edited_tree["files"].items():
selected_files.append(file)
# Ensure that at least one file is selected, or raise an exception
if not selected_files:
raise Exception(
"No files were selected. Please select at least one file to proceed."
)
print(f"\nYou have selected the following files:\n{input_path}")
project_path = Path(input_path).resolve()
all_paths = set(
project_path.joinpath(file).resolve(strict=False) for file in selected_files
)
try:
for displayable_path in DisplayablePath.make_tree(project_path):
if displayable_path.path in all_paths:
print(displayable_path.displayable())
except FileNotFoundError:
print("Specified path does not exist: ", project_path)
except Exception as e:
print("An error occurred while trying to display the file tree:", e)
print("\n")
return selected_files
def merge_file_lists(
self, existing_files: list[str], new_files: list[str]
) -> Dict[str, Any]:
"""
Merges the new files list with the existing one, preserving the selection status.
"""
# Update the existing files with any new files or changes
for file, properties in new_files.items():
if file not in existing_files:
existing_files[file] = properties # Add new files as unselected
# If you want to update other properties of existing files, you can do so here
return existing_files
def get_current_files(self, project_path: Union[str, Path]) -> list[str]:
"""
Generates a dictionary of all files in the project directory
with their selection status set to False by default.
"""
all_files = []
project_path = Path(
project_path
).resolve() # Ensure path is absolute and resolved
for path in project_path.glob("**/*"): # Recursively list all files
if path.is_file():
relpath = path.relative_to(project_path)
parts = relpath.parts
if any(part.startswith(".") for part in parts):
continue # Skip hidden fileso
if any(part in self.IGNORE_FOLDERS for part in parts):
continue
all_files.append(str(relpath))
return all_files
def is_in_ignoring_extensions(self, path: Path) -> bool:
"""
Check if a path is not hidden or in the '__pycache__' directory.
Helps in filtering out unnecessary files during file selection.
"""
is_hidden = not path.name.startswith(".")
is_pycache = "__pycache__" not in path.name
return is_hidden and is_pycache
class DisplayablePath(object):
"""
Represents a path in a file system and displays it in a tree-like structure.
Useful for displaying file and directory structures like in a file explorer.
"""
display_filename_prefix_middle = "βββ "
display_filename_prefix_last = "βββ "
display_parent_prefix_middle = " "
display_parent_prefix_last = "β "
def __init__(
self, path: Union[str, Path], parent_path: "DisplayablePath", is_last: bool
):
"""
Initialize a DisplayablePath object.
"""
self.depth = 0
self.path = Path(str(path))
self.parent = parent_path
self.is_last = is_last
if self.parent:
self.depth = self.parent.depth + 1 # Increment depth if it has a parent
@property
def display_name(self) -> str:
"""
Get the display name of the file or directory.
"""
if self.path.is_dir():
return self.path.name + "/"
return self.path.name
@classmethod
def make_tree(
cls, root: Union[str, Path], parent=None, is_last=False, criteria=None
):
"""
Generate a tree of DisplayablePath objects, ensure it's only called on directories.
"""
root = Path(str(root)) # Ensure root is a Path object
criteria = criteria or cls._default_criteria
displayable_root = cls(root, parent, is_last)
yield displayable_root
if root.is_dir(): # Check if root is a directory before iterating
children = sorted(
list(path for path in root.iterdir() if criteria(path)),
key=lambda s: str(s).lower(),
)
count = 1
for path in children:
is_last = count == len(children)
yield from cls.make_tree(
path, parent=displayable_root, is_last=is_last, criteria=criteria
)
count += 1
@classmethod
def _default_criteria(cls, path: Path) -> bool:
"""
The default criteria function to filter the paths.
"""
return True
def displayable(self) -> str:
"""
Get the displayable string representation of the file or directory.
"""
if self.parent is None:
return self.display_name
_filename_prefix = (
self.display_filename_prefix_last
if self.is_last
else self.display_filename_prefix_middle
)
parts = ["{!s} {!s}".format(_filename_prefix, self.display_name)]
parent = self.parent
while parent and parent.parent is not None:
parts.append(
self.display_parent_prefix_middle
if parent.is_last
else self.display_parent_prefix_last
)
parent = parent.parent
return "".join(reversed(parts)) # Assemble the parts into the final string
|