Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / openhands /events /observation /files.py

Backup-bdg

Upload 964 files

51ff9e5 verified 6 days ago

raw

history blame contribute delete

7.75 kB

	"""File-related observation classes for tracking file operations."""

	from dataclasses import dataclass
	from difflib import SequenceMatcher

	from openhands.core.schema import ObservationType
	from openhands.events.event import FileEditSource, FileReadSource
	from openhands.events.observation.observation import Observation


	@dataclass
	class FileReadObservation(Observation):
	"""This data class represents the content of a file."""

	path: str
	observation: str = ObservationType.READ
	impl_source: FileReadSource = FileReadSource.DEFAULT

	@property
	def message(self) -> str:
	"""Get a human-readable message describing the file read operation."""
	return f'I read the file {self.path}.'

	def __str__(self) -> str:
	"""Get a string representation of the file read observation."""
	return f'[Read from {self.path} is successful.]\n{self.content}'


	@dataclass
	class FileWriteObservation(Observation):
	"""This data class represents a file write operation."""

	path: str
	observation: str = ObservationType.WRITE

	@property
	def message(self) -> str:
	"""Get a human-readable message describing the file write operation."""
	return f'I wrote to the file {self.path}.'

	def __str__(self) -> str:
	"""Get a string representation of the file write observation."""
	return f'[Write to {self.path} is successful.]\n{self.content}'


	@dataclass
	class FileEditObservation(Observation):
	"""This data class represents a file edit operation.

	The observation includes both the old and new content of the file, and can
	generate a diff visualization showing the changes. The diff is computed lazily
	and cached to improve performance.

	The .content property can either be:
	- Git diff in LLM-based editing mode
	- the rendered message sent to the LLM in OH_ACI mode (e.g., "The file /path/to/file.txt is created with the provided content.")
	"""

	path: str = ''
	prev_exist: bool = False
	old_content: str \| None = None
	new_content: str \| None = None
	observation: str = ObservationType.EDIT
	impl_source: FileEditSource = FileEditSource.LLM_BASED_EDIT
	diff: str \| None = (
	None # The raw diff between old and new content, used in OH_ACI mode
	)
	_diff_cache: str \| None = (
	None # Cache for the diff visualization, used in LLM-based editing mode
	)

	@property
	def message(self) -> str:
	"""Get a human-readable message describing the file edit operation."""
	return f'I edited the file {self.path}.'

	def get_edit_groups(self, n_context_lines: int = 2) -> list[dict[str, list[str]]]:
	"""Get the edit groups showing changes between old and new content.

	Args:
	n_context_lines: Number of context lines to show around each change.

	Returns:
	A list of edit groups, where each group contains before/after edits.
	"""
	if self.old_content is None or self.new_content is None:
	return []
	old_lines = self.old_content.split('\n')
	new_lines = self.new_content.split('\n')
	# Borrowed from difflib.unified_diff to directly parse into structured format
	edit_groups: list[dict] = []
	for group in SequenceMatcher(None, old_lines, new_lines).get_grouped_opcodes(
	n_context_lines
	):
	# Take the max line number in the group
	_indent_pad_size = len(str(group[-1][3])) + 1 # +1 for "*" prefix
	cur_group: dict[str, list[str]] = {
	'before_edits': [],
	'after_edits': [],
	}
	for tag, i1, i2, j1, j2 in group:
	if tag == 'equal':
	for idx, line in enumerate(old_lines[i1:i2]):
	line_num = i1 + idx + 1
	cur_group['before_edits'].append(
	f'{line_num:>{_indent_pad_size}}\|{line}'
	)
	for idx, line in enumerate(new_lines[j1:j2]):
	line_num = j1 + idx + 1
	cur_group['after_edits'].append(
	f'{line_num:>{_indent_pad_size}}\|{line}'
	)
	continue
	if tag in {'replace', 'delete'}:
	for idx, line in enumerate(old_lines[i1:i2]):
	line_num = i1 + idx + 1
	cur_group['before_edits'].append(
	f'-{line_num:>{_indent_pad_size - 1}}\|{line}'
	)
	if tag in {'replace', 'insert'}:
	for idx, line in enumerate(new_lines[j1:j2]):
	line_num = j1 + idx + 1
	cur_group['after_edits'].append(
	f'+{line_num:>{_indent_pad_size - 1}}\|{line}'
	)
	edit_groups.append(cur_group)
	return edit_groups

	def visualize_diff(
	self,
	n_context_lines: int = 2,
	change_applied: bool = True,
	) -> str:
	"""Visualize the diff of the file edit. Used in the LLM-based editing mode.

	Instead of showing the diff line by line, this function shows each hunk
	of changes as a separate entity.

	Args:
	n_context_lines: Number of context lines to show before/after changes.
	change_applied: Whether changes are applied. If false, shows as
	attempted edit.

	Returns:
	A string containing the formatted diff visualization.
	"""
	# Use cached diff if available
	if self._diff_cache is not None:
	return self._diff_cache

	# Check if there are any changes
	if change_applied and self.old_content == self.new_content:
	msg = '(no changes detected. Please make sure your edits change '
	msg += 'the content of the existing file.)\n'
	self._diff_cache = msg
	return self._diff_cache

	edit_groups = self.get_edit_groups(n_context_lines=n_context_lines)

	if change_applied:
	header = f'[Existing file {self.path} is edited with '
	header += f'{len(edit_groups)} changes.]'
	else:
	header = f"[Changes are NOT applied to {self.path} - Here's how "
	header += 'the file looks like if changes are applied.]'
	result = [header]

	op_type = 'edit' if change_applied else 'ATTEMPTED edit'
	for i, cur_edit_group in enumerate(edit_groups):
	if i != 0:
	result.append('-------------------------')
	result.append(f'[begin of {op_type} {i + 1} / {len(edit_groups)}]')
	result.append(f'(content before {op_type})')
	result.extend(cur_edit_group['before_edits'])
	result.append(f'(content after {op_type})')
	result.extend(cur_edit_group['after_edits'])
	result.append(f'[end of {op_type} {i + 1} / {len(edit_groups)}]')

	# Cache the result
	self._diff_cache = '\n'.join(result)
	return self._diff_cache

	def __str__(self) -> str:
	"""Get a string representation of the file edit observation."""
	if self.impl_source == FileEditSource.OH_ACI:
	return self.content

	if not self.prev_exist:
	assert self.old_content == '', (
	'old_content should be empty if the file is new (prev_exist=False).'
	)
	return f'[New file {self.path} is created with the provided content.]\n'

	# Use cached diff if available, otherwise compute it
	return self.visualize_diff().rstrip() + '\n'