Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / openhands /resolver /send_pull_request.py

Backup-bdg

Upload 964 files

51ff9e5 verified 7 days ago

raw

history blame

24.7 kB

	import argparse
	import json
	import os
	import shutil
	import subprocess

	import jinja2
	from pydantic import SecretStr

	from openhands.core.config import LLMConfig
	from openhands.core.logger import openhands_logger as logger
	from openhands.integrations.service_types import ProviderType
	from openhands.llm.llm import LLM
	from openhands.resolver.interfaces.github import GithubIssueHandler
	from openhands.resolver.interfaces.gitlab import GitlabIssueHandler
	from openhands.resolver.interfaces.issue import Issue
	from openhands.resolver.interfaces.issue_definitions import ServiceContextIssue
	from openhands.resolver.io_utils import (
	load_single_resolver_output,
	)
	from openhands.resolver.patching import apply_diff, parse_patch
	from openhands.resolver.resolver_output import ResolverOutput
	from openhands.resolver.utils import identify_token
	from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync


	def apply_patch(repo_dir: str, patch: str) -> None:
	"""Apply a patch to a repository.

	Args:
	repo_dir: The directory containing the repository
	patch: The patch to apply
	"""
	diffs = parse_patch(patch)
	for diff in diffs:
	if not diff.header.new_path:
	logger.warning('Could not determine file to patch')
	continue

	# Remove both "a/" and "b/" prefixes from paths
	old_path = (
	os.path.join(
	repo_dir, diff.header.old_path.removeprefix('a/').removeprefix('b/')
	)
	if diff.header.old_path and diff.header.old_path != '/dev/null'
	else None
	)
	new_path = os.path.join(
	repo_dir, diff.header.new_path.removeprefix('a/').removeprefix('b/')
	)

	# Check if the file is being deleted
	if diff.header.new_path == '/dev/null':
	assert old_path is not None
	if os.path.exists(old_path):
	os.remove(old_path)
	logger.info(f'Deleted file: {old_path}')
	continue

	# Handle file rename
	if old_path and new_path and 'rename from' in patch:
	# Create parent directory of new path
	os.makedirs(os.path.dirname(new_path), exist_ok=True)
	try:
	# Try to move the file directly
	shutil.move(old_path, new_path)
	except shutil.SameFileError:
	# If it's the same file (can happen with directory renames), copy first then remove
	shutil.copy2(old_path, new_path)
	os.remove(old_path)

	# Try to remove empty parent directories
	old_dir = os.path.dirname(old_path)
	while old_dir and old_dir.startswith(repo_dir):
	try:
	os.rmdir(old_dir)
	old_dir = os.path.dirname(old_dir)
	except OSError:
	# Directory not empty or other error, stop trying to remove parents
	break
	continue

	if old_path:
	# Open the file in binary mode to detect line endings
	with open(old_path, 'rb') as f:
	original_content = f.read()

	# Detect line endings
	if b'\r\n' in original_content:
	newline = '\r\n'
	elif b'\n' in original_content:
	newline = '\n'
	else:
	newline = None # Let Python decide

	try:
	with open(old_path, 'r', newline=newline) as f:
	split_content = [x.strip(newline) for x in f.readlines()]
	except UnicodeDecodeError as e:
	logger.error(f'Error reading file {old_path}: {e}')
	split_content = []
	else:
	newline = '\n'
	split_content = []

	if diff.changes is None:
	logger.warning(f'No changes to apply for {old_path}')
	continue

	new_content = apply_diff(diff, split_content)

	# Ensure the directory exists before writing the file
	os.makedirs(os.path.dirname(new_path), exist_ok=True)

	# Write the new content using the detected line endings
	with open(new_path, 'w', newline=newline) as f:
	for line in new_content:
	print(line, file=f)

	logger.info('Patch applied successfully')


	def initialize_repo(
	output_dir: str, issue_number: int, issue_type: str, base_commit: str \| None = None
	) -> str:
	"""Initialize the repository.

	Args:
	output_dir: The output directory to write the repository to
	issue_number: The issue number to fix
	issue_type: The type of the issue
	base_commit: The base commit to checkout (if issue_type is pr)
	"""
	src_dir = os.path.join(output_dir, 'repo')
	dest_dir = os.path.join(output_dir, 'patches', f'{issue_type}_{issue_number}')

	if not os.path.exists(src_dir):
	raise ValueError(f'Source directory {src_dir} does not exist.')

	if os.path.exists(dest_dir):
	shutil.rmtree(dest_dir)

	shutil.copytree(src_dir, dest_dir)
	logger.info(f'Copied repository to {dest_dir}')

	# Checkout the base commit if provided
	if base_commit:
	result = subprocess.run(
	f'git -C {dest_dir} checkout {base_commit}',
	shell=True,
	capture_output=True,
	text=True,
	)
	if result.returncode != 0:
	logger.info(f'Error checking out commit: {result.stderr}')
	raise RuntimeError('Failed to check out commit')

	return dest_dir


	def make_commit(repo_dir: str, issue: Issue, issue_type: str) -> None:
	"""Make a commit with the changes to the repository.

	Args:
	repo_dir: The directory containing the repository
	issue: The issue to fix
	issue_type: The type of the issue
	"""
	# Check if git username is set
	result = subprocess.run(
	f'git -C {repo_dir} config user.name',
	shell=True,
	capture_output=True,
	text=True,
	)

	if not result.stdout.strip():
	# If username is not set, configure git
	subprocess.run(
	f'git -C {repo_dir} config user.name "openhands" && '
	f'git -C {repo_dir} config user.email "[email protected]" && '
	f'git -C {repo_dir} config alias.git "git --no-pager"',
	shell=True,
	check=True,
	)
	logger.info('Git user configured as openhands')

	# Add all changes to the git index
	result = subprocess.run(
	f'git -C {repo_dir} add .', shell=True, capture_output=True, text=True
	)
	if result.returncode != 0:
	logger.error(f'Error adding files: {result.stderr}')
	raise RuntimeError('Failed to add files to git')

	# Check the status of the git index
	status_result = subprocess.run(
	f'git -C {repo_dir} status --porcelain',
	shell=True,
	capture_output=True,
	text=True,
	)

	# If there are no changes, raise an error
	if not status_result.stdout.strip():
	logger.error(
	f'No changes to commit for issue #{issue.number}. Skipping commit.'
	)
	raise RuntimeError('ERROR: Openhands failed to make code changes.')

	# Prepare the commit message
	commit_message = f'Fix {issue_type} #{issue.number}: {issue.title}'

	# Commit the changes
	result = subprocess.run(
	['git', '-C', repo_dir, 'commit', '-m', commit_message],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0:
	raise RuntimeError(f'Failed to commit changes: {result}')


	def send_pull_request(
	issue: Issue,
	token: str,
	username: str \| None,
	platform: ProviderType,
	patch_dir: str,
	pr_type: str,
	fork_owner: str \| None = None,
	additional_message: str \| None = None,
	target_branch: str \| None = None,
	reviewer: str \| None = None,
	pr_title: str \| None = None,
	base_domain: str \| None = None,
	) -> str:
	"""Send a pull request to a GitHub or Gitlab repository.

	Args:
	issue: The issue to send the pull request for
	token: The GitHub or Gitlab token to use for authentication
	username: The GitHub or Gitlab username, if provided
	platform: The platform of the repository.
	patch_dir: The directory containing the patches to apply
	pr_type: The type: branch (no PR created), draft or ready (regular PR created)
	fork_owner: The owner of the fork to push changes to (if different from the original repo owner)
	additional_message: The additional messages to post as a comment on the PR in json list format
	target_branch: The target branch to create the pull request against (defaults to repository default branch)
	reviewer: The GitHub or Gitlab username of the reviewer to assign
	pr_title: Custom title for the pull request (optional)
	base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)
	"""
	if pr_type not in ['branch', 'draft', 'ready']:
	raise ValueError(f'Invalid pr_type: {pr_type}')

	# Determine default base_domain based on platform
	if base_domain is None:
	base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'

	handler = None
	if platform == ProviderType.GITHUB:
	handler = ServiceContextIssue(
	GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain),
	None,
	)
	else: # platform == Platform.GITLAB
	handler = ServiceContextIssue(
	GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain),
	None,
	)

	# Create a new branch with a unique name
	base_branch_name = f'openhands-fix-issue-{issue.number}'
	branch_name = handler.get_branch_name(
	base_branch_name=base_branch_name,
	)

	# Get the default branch or use specified target branch
	logger.info('Getting base branch...')
	if target_branch:
	base_branch = target_branch
	exists = handler.branch_exists(branch_name=target_branch)
	if not exists:
	raise ValueError(f'Target branch {target_branch} does not exist')
	else:
	base_branch = handler.get_default_branch_name()
	logger.info(f'Base branch: {base_branch}')

	# Create and checkout the new branch
	logger.info('Creating new branch...')
	result = subprocess.run(
	['git', '-C', patch_dir, 'checkout', '-b', branch_name],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0:
	logger.error(f'Error creating new branch: {result.stderr}')
	raise RuntimeError(
	f'Failed to create a new branch {branch_name} in {patch_dir}:'
	)

	# Determine the repository to push to (original or fork)
	push_owner = fork_owner if fork_owner else issue.owner

	handler._strategy.set_owner(push_owner)

	logger.info('Pushing changes...')
	push_url = handler.get_clone_url()
	result = subprocess.run(
	['git', '-C', patch_dir, 'push', push_url, branch_name],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0:
	logger.error(f'Error pushing changes: {result.stderr}')
	raise RuntimeError('Failed to push changes to the remote repository')

	# Prepare the PR data: title and body
	final_pr_title = (
	pr_title if pr_title else f'Fix issue #{issue.number}: {issue.title}'
	)
	pr_body = f'This pull request fixes #{issue.number}.'
	if additional_message:
	pr_body += f'\n\n{additional_message}'
	pr_body += '\n\nAutomatic fix generated by [OpenHands](https://github.com/All-Hands-AI/OpenHands/) 🙌'

	# For cross repo pull request, we need to send head parameter like fork_owner:branch as per git documentation here : https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#create-a-pull-request
	# head parameter usage : The name of the branch where your changes are implemented. For cross-repository pull requests in the same network, namespace head with a user like this: username:branch.
	if fork_owner and platform == ProviderType.GITHUB:
	head_branch = f'{fork_owner}:{branch_name}'
	else:
	head_branch = branch_name
	# If we are not sending a PR, we can finish early and return the
	# URL for the user to open a PR manually
	if pr_type == 'branch':
	url = handler.get_compare_url(branch_name)
	else:
	# Prepare the PR for the GitHub API
	data = {
	'title': final_pr_title,
	('body' if platform == ProviderType.GITHUB else 'description'): pr_body,
	(
	'head' if platform == ProviderType.GITHUB else 'source_branch'
	): head_branch,
	(
	'base' if platform == ProviderType.GITHUB else 'target_branch'
	): base_branch,
	'draft': pr_type == 'draft',
	}

	pr_data = handler.create_pull_request(data)
	url = pr_data['html_url']

	# Request review if a reviewer was specified
	if reviewer and pr_type != 'branch':
	number = pr_data['number']
	handler.request_reviewers(reviewer, number)

	logger.info(
	f'{pr_type} created: {url}\n\n--- Title: {final_pr_title}\n\n--- Body:\n{pr_body}'
	)

	return url


	def update_existing_pull_request(
	issue: Issue,
	token: str,
	username: str \| None,
	platform: ProviderType,
	patch_dir: str,
	llm_config: LLMConfig,
	comment_message: str \| None = None,
	additional_message: str \| None = None,
	base_domain: str \| None = None,
	) -> str:
	"""Update an existing pull request with the new patches.

	Args:
	issue: The issue to update.
	token: The token to use for authentication.
	username: The username to use for authentication.
	platform: The platform of the repository.
	patch_dir: The directory containing the patches to apply.
	llm_config: The LLM configuration to use for summarizing changes.
	comment_message: The main message to post as a comment on the PR.
	additional_message: The additional messages to post as a comment on the PR in json list format.
	base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)
	"""
	# Set up headers and base URL for GitHub or GitLab API

	# Determine default base_domain based on platform
	if base_domain is None:
	base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'

	handler = None
	if platform == ProviderType.GITHUB:
	handler = ServiceContextIssue(
	GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain),
	llm_config,
	)
	else: # platform == Platform.GITLAB
	handler = ServiceContextIssue(
	GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain),
	llm_config,
	)

	branch_name = issue.head_branch

	# Prepare the push command
	push_command = (
	f'git -C {patch_dir} push '
	f'{handler.get_authorize_url()}'
	f'{issue.owner}/{issue.repo}.git {branch_name}'
	)

	# Push the changes to the existing branch
	result = subprocess.run(push_command, shell=True, capture_output=True, text=True)
	if result.returncode != 0:
	logger.error(f'Error pushing changes: {result.stderr}')
	raise RuntimeError('Failed to push changes to the remote repository')

	pr_url = handler.get_pull_url(issue.number)
	logger.info(f'Updated pull request {pr_url} with new patches.')

	# Generate a summary of all comment success indicators for PR message
	if not comment_message and additional_message:
	try:
	explanations = json.loads(additional_message)
	if explanations:
	comment_message = (
	'OpenHands made the following changes to resolve the issues:\n\n'
	)
	for explanation in explanations:
	comment_message += f'- {explanation}\n'

	# Summarize with LLM if provided
	if llm_config is not None:
	llm = LLM(llm_config)
	with open(
	os.path.join(
	os.path.dirname(__file__),
	'prompts/resolve/pr-changes-summary.jinja',
	),
	'r',
	) as f:
	template = jinja2.Template(f.read())
	prompt = template.render(comment_message=comment_message)
	response = llm.completion(
	messages=[{'role': 'user', 'content': prompt}],
	)
	comment_message = response.choices[0].message.content.strip()

	except (json.JSONDecodeError, TypeError):
	comment_message = f'A new OpenHands update is available, but failed to parse or summarize the changes:\n{additional_message}'

	# Post a comment on the PR
	if comment_message:
	handler.send_comment_msg(issue.number, comment_message)

	# Reply to each unresolved comment thread
	if additional_message and issue.thread_ids:
	try:
	explanations = json.loads(additional_message)
	for count, reply_comment in enumerate(explanations):
	comment_id = issue.thread_ids[count]
	handler.reply_to_comment(issue.number, comment_id, reply_comment)
	except (json.JSONDecodeError, TypeError):
	msg = f'Error occurred when replying to threads; success explanations {additional_message}'
	handler.send_comment_msg(issue.number, msg)

	return pr_url


	def process_single_issue(
	output_dir: str,
	resolver_output: ResolverOutput,
	token: str,
	username: str,
	platform: ProviderType,
	pr_type: str,
	llm_config: LLMConfig,
	fork_owner: str \| None,
	send_on_failure: bool,
	target_branch: str \| None = None,
	reviewer: str \| None = None,
	pr_title: str \| None = None,
	base_domain: str \| None = None,
	) -> None:
	# Determine default base_domain based on platform
	if base_domain is None:
	base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'
	if not resolver_output.success and not send_on_failure:
	logger.info(
	f'Issue {resolver_output.issue.number} was not successfully resolved. Skipping PR creation.'
	)
	return

	issue_type = resolver_output.issue_type

	if issue_type == 'issue':
	patched_repo_dir = initialize_repo(
	output_dir,
	resolver_output.issue.number,
	issue_type,
	resolver_output.base_commit,
	)
	elif issue_type == 'pr':
	patched_repo_dir = initialize_repo(
	output_dir,
	resolver_output.issue.number,
	issue_type,
	resolver_output.issue.head_branch,
	)
	else:
	raise ValueError(f'Invalid issue type: {issue_type}')

	apply_patch(patched_repo_dir, resolver_output.git_patch)

	make_commit(patched_repo_dir, resolver_output.issue, issue_type)

	if issue_type == 'pr':
	update_existing_pull_request(
	issue=resolver_output.issue,
	token=token,
	username=username,
	platform=platform,
	patch_dir=patched_repo_dir,
	additional_message=resolver_output.result_explanation,
	llm_config=llm_config,
	base_domain=base_domain,
	)
	else:
	send_pull_request(
	issue=resolver_output.issue,
	token=token,
	username=username,
	platform=platform,
	patch_dir=patched_repo_dir,
	pr_type=pr_type,
	fork_owner=fork_owner,
	additional_message=resolver_output.result_explanation,
	target_branch=target_branch,
	reviewer=reviewer,
	pr_title=pr_title,
	base_domain=base_domain,
	)


	def main() -> None:
	parser = argparse.ArgumentParser(
	description='Send a pull request to Github or Gitlab.'
	)
	parser.add_argument(
	'--selected-repo',
	type=str,
	default=None,
	help='repository to send pull request in form of `owner/repo`.',
	)
	parser.add_argument(
	'--token',
	type=str,
	default=None,
	help='token to access the repository.',
	)
	parser.add_argument(
	'--username',
	type=str,
	default=None,
	help='username to access the repository.',
	)
	parser.add_argument(
	'--output-dir',
	type=str,
	default='output',
	help='Output directory to write the results.',
	)
	parser.add_argument(
	'--pr-type',
	type=str,
	default='draft',
	choices=['branch', 'draft', 'ready'],
	help='Type of the pull request to send [branch, draft, ready]',
	)
	parser.add_argument(
	'--issue-number',
	type=str,
	required=True,
	help="Issue number to send the pull request for, or 'all_successful' to process all successful issues.",
	)
	parser.add_argument(
	'--fork-owner',
	type=str,
	default=None,
	help='Owner of the fork to push changes to (if different from the original repo owner).',
	)
	parser.add_argument(
	'--send-on-failure',
	action='store_true',
	help='Send a pull request even if the issue was not successfully resolved.',
	)
	parser.add_argument(
	'--llm-model',
	type=str,
	default=None,
	help='LLM model to use for summarizing changes.',
	)
	parser.add_argument(
	'--llm-api-key',
	type=str,
	default=None,
	help='API key for the LLM model.',
	)
	parser.add_argument(
	'--llm-base-url',
	type=str,
	default=None,
	help='Base URL for the LLM model.',
	)
	parser.add_argument(
	'--target-branch',
	type=str,
	default=None,
	help='Target branch to create the pull request against (defaults to repository default branch)',
	)
	parser.add_argument(
	'--reviewer',
	type=str,
	help='GitHub or GitLab username of the person to request review from',
	default=None,
	)
	parser.add_argument(
	'--pr-title',
	type=str,
	help='Custom title for the pull request',
	default=None,
	)
	parser.add_argument(
	'--base-domain',
	type=str,
	default=None,
	help='Base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)',
	)
	my_args = parser.parse_args()

	token = my_args.token or os.getenv('GITHUB_TOKEN') or os.getenv('GITLAB_TOKEN')
	if not token:
	raise ValueError(
	'token is not set, set via --token or GITHUB_TOKEN or GITLAB_TOKEN environment variable.'
	)
	username = my_args.username if my_args.username else os.getenv('GIT_USERNAME')

	platform = call_async_from_sync(
	identify_token,
	GENERAL_TIMEOUT,
	token,
	my_args.base_domain,
	)

	api_key = my_args.llm_api_key or os.environ['LLM_API_KEY']
	llm_config = LLMConfig(
	model=my_args.llm_model or os.environ['LLM_MODEL'],
	api_key=SecretStr(api_key) if api_key else None,
	base_url=my_args.llm_base_url or os.environ.get('LLM_BASE_URL', None),
	)

	if not os.path.exists(my_args.output_dir):
	raise ValueError(f'Output directory {my_args.output_dir} does not exist.')

	if not my_args.issue_number.isdigit():
	raise ValueError(f'Issue number {my_args.issue_number} is not a number.')
	issue_number = int(my_args.issue_number)
	output_path = os.path.join(my_args.output_dir, 'output.jsonl')
	resolver_output = load_single_resolver_output(output_path, issue_number)
	if not username:
	raise ValueError('username is required.')
	process_single_issue(
	my_args.output_dir,
	resolver_output,
	token,
	username,
	platform,
	my_args.pr_type,
	llm_config,
	my_args.fork_owner,
	my_args.send_on_failure,
	my_args.target_branch,
	my_args.reviewer,
	my_args.pr_title,
	my_args.base_domain,
	)


	if __name__ == '__main__':
	main()