|
""" |
|
file_selector.py |
|
|
|
This module offers interactive file selection for projects. Leveraging a terminal-based, |
|
tree-structured display, users can navigate and select files for editing or processing. |
|
It integrates with system editors for direct file modification and supports saving |
|
selections for later use. Designed for efficient workflow enhancement in file-intensive |
|
environments, it offers customizable file filtering and seamless editor integration. |
|
|
|
Key Components: |
|
- FileSelector: Manages file selection and interaction. |
|
- DisplayablePath: Provides a structured view of file paths. |
|
|
|
Usage: |
|
Typically used in project setup or management phases for selecting specific files. |
|
It operates within the GPT-Engineer environment, relying on core functionalities for |
|
file handling and persistence. |
|
|
|
""" |
|
|
|
import os |
|
import subprocess |
|
|
|
from pathlib import Path |
|
from typing import Any, Dict, List, Union |
|
|
|
import toml |
|
|
|
from gpt_engineer.core.default.disk_memory import DiskMemory |
|
from gpt_engineer.core.default.paths import metadata_path |
|
from gpt_engineer.core.files_dict import FilesDict |
|
|
|
|
|
class FileSelector: |
|
IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"} |
|
FILE_LIST_NAME = "file_selection.toml" |
|
COMMENT = ( |
|
"# Remove '#' to select a file.\n\n" |
|
"# gpt-engineer can only read selected files. " |
|
"Including irrelevant files will degrade performance, " |
|
"cost additional tokens and potentially overflow token limit.\n\n" |
|
) |
|
|
|
def __init__(self, project_path: Union[str, Path]): |
|
self.project_path = project_path |
|
self.metadata_db = DiskMemory(metadata_path(self.project_path)) |
|
self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME |
|
|
|
def ask_for_files(self) -> FilesDict: |
|
""" |
|
Asks the user to select files for the purpose of context improvement. |
|
It supports selection from the terminal or using a previously saved list. |
|
""" |
|
if os.getenv("GPTE_TEST_MODE"): |
|
|
|
assert self.FILE_LIST_NAME in self.metadata_db |
|
selected_files = self.get_files_from_toml(self.project_path, self.toml_path) |
|
else: |
|
|
|
if self.FILE_LIST_NAME in self.metadata_db: |
|
print( |
|
f"File list detected at {self.toml_path}. Edit or delete it if you want to select new files." |
|
) |
|
selected_files = self.editor_file_selector(self.project_path, False) |
|
else: |
|
selected_files = self.editor_file_selector(self.project_path, True) |
|
|
|
content_dict = {} |
|
for file_path in selected_files: |
|
|
|
try: |
|
|
|
with open(Path(self.project_path) / file_path, "r") as content: |
|
content_dict[str(file_path)] = content.read() |
|
except FileNotFoundError: |
|
print(f"Warning: File not found {file_path}") |
|
return FilesDict(content_dict) |
|
|
|
def editor_file_selector( |
|
self, input_path: str | Path, init: bool = True |
|
) -> List[str]: |
|
""" |
|
Provides an interactive file selection interface by generating a tree representation in a .toml file. |
|
Allows users to select or deselect files for the context improvement process. |
|
""" |
|
root_path = Path(input_path) |
|
tree_dict = {} |
|
toml_file = DiskMemory(metadata_path(input_path)).path / "file_selection.toml" |
|
|
|
|
|
|
|
if init: |
|
tree_dict = {x: "selected" for x in self.get_current_files(root_path)} |
|
|
|
s = toml.dumps({"files": tree_dict}) |
|
|
|
|
|
s = "\n".join( |
|
[ |
|
"# " + line if line.endswith(' = "selected"') else line |
|
for line in s.split("\n") |
|
] |
|
) |
|
|
|
with open(toml_file, "w") as f: |
|
f.write(self.COMMENT) |
|
f.write(s) |
|
|
|
else: |
|
|
|
all_files = self.get_current_files(root_path) |
|
s = toml.dumps({"files": {x: "selected" for x in all_files}}) |
|
|
|
with open(toml_file, "r") as file: |
|
selected_files = toml.load(file) |
|
|
|
lines = s.split("\n") |
|
s = "\n".join( |
|
lines[:1] |
|
+ [ |
|
line |
|
if line.split(" = ")[0].strip('"') in selected_files["files"] |
|
else "# " + line |
|
for line in lines[1:] |
|
] |
|
) |
|
|
|
|
|
with open(toml_file, "w") as file: |
|
file.write(self.COMMENT) |
|
file.write(s) |
|
|
|
print( |
|
"Please select and deselect (add # in front) files, save it, and close it to continue..." |
|
) |
|
self.open_with_default_editor( |
|
toml_file |
|
) |
|
return self.get_files_from_toml( |
|
input_path, toml_file |
|
) |
|
|
|
def open_with_default_editor(self, file_path): |
|
""" |
|
Attempts to open the specified file using the system's default text editor or a common fallback editor. |
|
""" |
|
editors = [ |
|
"gedit", |
|
"notepad", |
|
"nvim", |
|
"write", |
|
"nano", |
|
"vim", |
|
"emacs", |
|
] |
|
chosen_editor = os.environ.get("EDITOR") |
|
|
|
|
|
if chosen_editor: |
|
try: |
|
subprocess.run([chosen_editor, file_path]) |
|
return |
|
except Exception: |
|
pass |
|
|
|
for editor in editors: |
|
try: |
|
subprocess.run([editor, file_path]) |
|
return |
|
except Exception: |
|
continue |
|
print("No suitable text editor found. Please edit the file manually.") |
|
|
|
def is_utf8(self, file_path): |
|
""" |
|
Determines if the file is UTF-8 encoded by trying to read and decode it. |
|
Useful for ensuring that files are in a readable and compatible format. |
|
""" |
|
try: |
|
with open(file_path, "rb") as file: |
|
file.read().decode("utf-8") |
|
return True |
|
except UnicodeDecodeError: |
|
return False |
|
|
|
def get_files_from_toml(self, input_path, toml_file): |
|
""" |
|
Retrieves the list of files selected by the user from a .toml configuration file. |
|
This function parses the .toml file and returns the list of selected files. |
|
""" |
|
selected_files = [] |
|
edited_tree = toml.load(toml_file) |
|
|
|
|
|
for file, _ in edited_tree["files"].items(): |
|
selected_files.append(file) |
|
|
|
|
|
if not selected_files: |
|
raise Exception( |
|
"No files were selected. Please select at least one file to proceed." |
|
) |
|
|
|
print(f"\nYou have selected the following files:\n{input_path}") |
|
|
|
project_path = Path(input_path).resolve() |
|
all_paths = set( |
|
project_path.joinpath(file).resolve(strict=False) for file in selected_files |
|
) |
|
|
|
try: |
|
for displayable_path in DisplayablePath.make_tree(project_path): |
|
if displayable_path.path in all_paths: |
|
print(displayable_path.displayable()) |
|
except FileNotFoundError: |
|
print("Specified path does not exist: ", project_path) |
|
except Exception as e: |
|
print("An error occurred while trying to display the file tree:", e) |
|
|
|
print("\n") |
|
return selected_files |
|
|
|
def merge_file_lists( |
|
self, existing_files: list[str], new_files: list[str] |
|
) -> Dict[str, Any]: |
|
""" |
|
Merges the new files list with the existing one, preserving the selection status. |
|
""" |
|
|
|
for file, properties in new_files.items(): |
|
if file not in existing_files: |
|
existing_files[file] = properties |
|
|
|
|
|
return existing_files |
|
|
|
def get_current_files(self, project_path: Union[str, Path]) -> list[str]: |
|
""" |
|
Generates a dictionary of all files in the project directory |
|
with their selection status set to False by default. |
|
""" |
|
all_files = [] |
|
project_path = Path( |
|
project_path |
|
).resolve() |
|
|
|
for path in project_path.glob("**/*"): |
|
if path.is_file(): |
|
relpath = path.relative_to(project_path) |
|
|
|
parts = relpath.parts |
|
if any(part.startswith(".") for part in parts): |
|
continue |
|
if any(part in self.IGNORE_FOLDERS for part in parts): |
|
continue |
|
|
|
all_files.append(str(relpath)) |
|
|
|
return all_files |
|
|
|
def is_in_ignoring_extensions(self, path: Path) -> bool: |
|
""" |
|
Check if a path is not hidden or in the '__pycache__' directory. |
|
Helps in filtering out unnecessary files during file selection. |
|
""" |
|
is_hidden = not path.name.startswith(".") |
|
is_pycache = "__pycache__" not in path.name |
|
return is_hidden and is_pycache |
|
|
|
|
|
class DisplayablePath(object): |
|
""" |
|
Represents a path in a file system and displays it in a tree-like structure. |
|
Useful for displaying file and directory structures like in a file explorer. |
|
""" |
|
|
|
display_filename_prefix_middle = "βββ " |
|
display_filename_prefix_last = "βββ " |
|
display_parent_prefix_middle = " " |
|
display_parent_prefix_last = "β " |
|
|
|
def __init__( |
|
self, path: Union[str, Path], parent_path: "DisplayablePath", is_last: bool |
|
): |
|
""" |
|
Initialize a DisplayablePath object. |
|
""" |
|
self.depth = 0 |
|
self.path = Path(str(path)) |
|
self.parent = parent_path |
|
self.is_last = is_last |
|
if self.parent: |
|
self.depth = self.parent.depth + 1 |
|
|
|
@property |
|
def display_name(self) -> str: |
|
""" |
|
Get the display name of the file or directory. |
|
""" |
|
if self.path.is_dir(): |
|
return self.path.name + "/" |
|
return self.path.name |
|
|
|
@classmethod |
|
def make_tree( |
|
cls, root: Union[str, Path], parent=None, is_last=False, criteria=None |
|
): |
|
""" |
|
Generate a tree of DisplayablePath objects, ensure it's only called on directories. |
|
""" |
|
root = Path(str(root)) |
|
criteria = criteria or cls._default_criteria |
|
displayable_root = cls(root, parent, is_last) |
|
yield displayable_root |
|
|
|
if root.is_dir(): |
|
children = sorted( |
|
list(path for path in root.iterdir() if criteria(path)), |
|
key=lambda s: str(s).lower(), |
|
) |
|
count = 1 |
|
for path in children: |
|
is_last = count == len(children) |
|
yield from cls.make_tree( |
|
path, parent=displayable_root, is_last=is_last, criteria=criteria |
|
) |
|
count += 1 |
|
|
|
@classmethod |
|
def _default_criteria(cls, path: Path) -> bool: |
|
""" |
|
The default criteria function to filter the paths. |
|
""" |
|
return True |
|
|
|
def displayable(self) -> str: |
|
""" |
|
Get the displayable string representation of the file or directory. |
|
""" |
|
if self.parent is None: |
|
return self.display_name |
|
|
|
_filename_prefix = ( |
|
self.display_filename_prefix_last |
|
if self.is_last |
|
else self.display_filename_prefix_middle |
|
) |
|
|
|
parts = ["{!s} {!s}".format(_filename_prefix, self.display_name)] |
|
|
|
parent = self.parent |
|
while parent and parent.parent is not None: |
|
parts.append( |
|
self.display_parent_prefix_middle |
|
if parent.is_last |
|
else self.display_parent_prefix_last |
|
) |
|
parent = parent.parent |
|
|
|
return "".join(reversed(parts)) |
|
|