OpenHands / openhands /resolver /send_pull_request.py
Backup-bdg's picture
Upload 964 files
51ff9e5 verified
raw
history blame
24.7 kB
import argparse
import json
import os
import shutil
import subprocess
import jinja2
from pydantic import SecretStr
from openhands.core.config import LLMConfig
from openhands.core.logger import openhands_logger as logger
from openhands.integrations.service_types import ProviderType
from openhands.llm.llm import LLM
from openhands.resolver.interfaces.github import GithubIssueHandler
from openhands.resolver.interfaces.gitlab import GitlabIssueHandler
from openhands.resolver.interfaces.issue import Issue
from openhands.resolver.interfaces.issue_definitions import ServiceContextIssue
from openhands.resolver.io_utils import (
load_single_resolver_output,
)
from openhands.resolver.patching import apply_diff, parse_patch
from openhands.resolver.resolver_output import ResolverOutput
from openhands.resolver.utils import identify_token
from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync
def apply_patch(repo_dir: str, patch: str) -> None:
"""Apply a patch to a repository.
Args:
repo_dir: The directory containing the repository
patch: The patch to apply
"""
diffs = parse_patch(patch)
for diff in diffs:
if not diff.header.new_path:
logger.warning('Could not determine file to patch')
continue
# Remove both "a/" and "b/" prefixes from paths
old_path = (
os.path.join(
repo_dir, diff.header.old_path.removeprefix('a/').removeprefix('b/')
)
if diff.header.old_path and diff.header.old_path != '/dev/null'
else None
)
new_path = os.path.join(
repo_dir, diff.header.new_path.removeprefix('a/').removeprefix('b/')
)
# Check if the file is being deleted
if diff.header.new_path == '/dev/null':
assert old_path is not None
if os.path.exists(old_path):
os.remove(old_path)
logger.info(f'Deleted file: {old_path}')
continue
# Handle file rename
if old_path and new_path and 'rename from' in patch:
# Create parent directory of new path
os.makedirs(os.path.dirname(new_path), exist_ok=True)
try:
# Try to move the file directly
shutil.move(old_path, new_path)
except shutil.SameFileError:
# If it's the same file (can happen with directory renames), copy first then remove
shutil.copy2(old_path, new_path)
os.remove(old_path)
# Try to remove empty parent directories
old_dir = os.path.dirname(old_path)
while old_dir and old_dir.startswith(repo_dir):
try:
os.rmdir(old_dir)
old_dir = os.path.dirname(old_dir)
except OSError:
# Directory not empty or other error, stop trying to remove parents
break
continue
if old_path:
# Open the file in binary mode to detect line endings
with open(old_path, 'rb') as f:
original_content = f.read()
# Detect line endings
if b'\r\n' in original_content:
newline = '\r\n'
elif b'\n' in original_content:
newline = '\n'
else:
newline = None # Let Python decide
try:
with open(old_path, 'r', newline=newline) as f:
split_content = [x.strip(newline) for x in f.readlines()]
except UnicodeDecodeError as e:
logger.error(f'Error reading file {old_path}: {e}')
split_content = []
else:
newline = '\n'
split_content = []
if diff.changes is None:
logger.warning(f'No changes to apply for {old_path}')
continue
new_content = apply_diff(diff, split_content)
# Ensure the directory exists before writing the file
os.makedirs(os.path.dirname(new_path), exist_ok=True)
# Write the new content using the detected line endings
with open(new_path, 'w', newline=newline) as f:
for line in new_content:
print(line, file=f)
logger.info('Patch applied successfully')
def initialize_repo(
output_dir: str, issue_number: int, issue_type: str, base_commit: str | None = None
) -> str:
"""Initialize the repository.
Args:
output_dir: The output directory to write the repository to
issue_number: The issue number to fix
issue_type: The type of the issue
base_commit: The base commit to checkout (if issue_type is pr)
"""
src_dir = os.path.join(output_dir, 'repo')
dest_dir = os.path.join(output_dir, 'patches', f'{issue_type}_{issue_number}')
if not os.path.exists(src_dir):
raise ValueError(f'Source directory {src_dir} does not exist.')
if os.path.exists(dest_dir):
shutil.rmtree(dest_dir)
shutil.copytree(src_dir, dest_dir)
logger.info(f'Copied repository to {dest_dir}')
# Checkout the base commit if provided
if base_commit:
result = subprocess.run(
f'git -C {dest_dir} checkout {base_commit}',
shell=True,
capture_output=True,
text=True,
)
if result.returncode != 0:
logger.info(f'Error checking out commit: {result.stderr}')
raise RuntimeError('Failed to check out commit')
return dest_dir
def make_commit(repo_dir: str, issue: Issue, issue_type: str) -> None:
"""Make a commit with the changes to the repository.
Args:
repo_dir: The directory containing the repository
issue: The issue to fix
issue_type: The type of the issue
"""
# Check if git username is set
result = subprocess.run(
f'git -C {repo_dir} config user.name',
shell=True,
capture_output=True,
text=True,
)
if not result.stdout.strip():
# If username is not set, configure git
subprocess.run(
f'git -C {repo_dir} config user.name "openhands" && '
f'git -C {repo_dir} config user.email "[email protected]" && '
f'git -C {repo_dir} config alias.git "git --no-pager"',
shell=True,
check=True,
)
logger.info('Git user configured as openhands')
# Add all changes to the git index
result = subprocess.run(
f'git -C {repo_dir} add .', shell=True, capture_output=True, text=True
)
if result.returncode != 0:
logger.error(f'Error adding files: {result.stderr}')
raise RuntimeError('Failed to add files to git')
# Check the status of the git index
status_result = subprocess.run(
f'git -C {repo_dir} status --porcelain',
shell=True,
capture_output=True,
text=True,
)
# If there are no changes, raise an error
if not status_result.stdout.strip():
logger.error(
f'No changes to commit for issue #{issue.number}. Skipping commit.'
)
raise RuntimeError('ERROR: Openhands failed to make code changes.')
# Prepare the commit message
commit_message = f'Fix {issue_type} #{issue.number}: {issue.title}'
# Commit the changes
result = subprocess.run(
['git', '-C', repo_dir, 'commit', '-m', commit_message],
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(f'Failed to commit changes: {result}')
def send_pull_request(
issue: Issue,
token: str,
username: str | None,
platform: ProviderType,
patch_dir: str,
pr_type: str,
fork_owner: str | None = None,
additional_message: str | None = None,
target_branch: str | None = None,
reviewer: str | None = None,
pr_title: str | None = None,
base_domain: str | None = None,
) -> str:
"""Send a pull request to a GitHub or Gitlab repository.
Args:
issue: The issue to send the pull request for
token: The GitHub or Gitlab token to use for authentication
username: The GitHub or Gitlab username, if provided
platform: The platform of the repository.
patch_dir: The directory containing the patches to apply
pr_type: The type: branch (no PR created), draft or ready (regular PR created)
fork_owner: The owner of the fork to push changes to (if different from the original repo owner)
additional_message: The additional messages to post as a comment on the PR in json list format
target_branch: The target branch to create the pull request against (defaults to repository default branch)
reviewer: The GitHub or Gitlab username of the reviewer to assign
pr_title: Custom title for the pull request (optional)
base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)
"""
if pr_type not in ['branch', 'draft', 'ready']:
raise ValueError(f'Invalid pr_type: {pr_type}')
# Determine default base_domain based on platform
if base_domain is None:
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'
handler = None
if platform == ProviderType.GITHUB:
handler = ServiceContextIssue(
GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain),
None,
)
else: # platform == Platform.GITLAB
handler = ServiceContextIssue(
GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain),
None,
)
# Create a new branch with a unique name
base_branch_name = f'openhands-fix-issue-{issue.number}'
branch_name = handler.get_branch_name(
base_branch_name=base_branch_name,
)
# Get the default branch or use specified target branch
logger.info('Getting base branch...')
if target_branch:
base_branch = target_branch
exists = handler.branch_exists(branch_name=target_branch)
if not exists:
raise ValueError(f'Target branch {target_branch} does not exist')
else:
base_branch = handler.get_default_branch_name()
logger.info(f'Base branch: {base_branch}')
# Create and checkout the new branch
logger.info('Creating new branch...')
result = subprocess.run(
['git', '-C', patch_dir, 'checkout', '-b', branch_name],
capture_output=True,
text=True,
)
if result.returncode != 0:
logger.error(f'Error creating new branch: {result.stderr}')
raise RuntimeError(
f'Failed to create a new branch {branch_name} in {patch_dir}:'
)
# Determine the repository to push to (original or fork)
push_owner = fork_owner if fork_owner else issue.owner
handler._strategy.set_owner(push_owner)
logger.info('Pushing changes...')
push_url = handler.get_clone_url()
result = subprocess.run(
['git', '-C', patch_dir, 'push', push_url, branch_name],
capture_output=True,
text=True,
)
if result.returncode != 0:
logger.error(f'Error pushing changes: {result.stderr}')
raise RuntimeError('Failed to push changes to the remote repository')
# Prepare the PR data: title and body
final_pr_title = (
pr_title if pr_title else f'Fix issue #{issue.number}: {issue.title}'
)
pr_body = f'This pull request fixes #{issue.number}.'
if additional_message:
pr_body += f'\n\n{additional_message}'
pr_body += '\n\nAutomatic fix generated by [OpenHands](https://github.com/All-Hands-AI/OpenHands/) 🙌'
# For cross repo pull request, we need to send head parameter like fork_owner:branch as per git documentation here : https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#create-a-pull-request
# head parameter usage : The name of the branch where your changes are implemented. For cross-repository pull requests in the same network, namespace head with a user like this: username:branch.
if fork_owner and platform == ProviderType.GITHUB:
head_branch = f'{fork_owner}:{branch_name}'
else:
head_branch = branch_name
# If we are not sending a PR, we can finish early and return the
# URL for the user to open a PR manually
if pr_type == 'branch':
url = handler.get_compare_url(branch_name)
else:
# Prepare the PR for the GitHub API
data = {
'title': final_pr_title,
('body' if platform == ProviderType.GITHUB else 'description'): pr_body,
(
'head' if platform == ProviderType.GITHUB else 'source_branch'
): head_branch,
(
'base' if platform == ProviderType.GITHUB else 'target_branch'
): base_branch,
'draft': pr_type == 'draft',
}
pr_data = handler.create_pull_request(data)
url = pr_data['html_url']
# Request review if a reviewer was specified
if reviewer and pr_type != 'branch':
number = pr_data['number']
handler.request_reviewers(reviewer, number)
logger.info(
f'{pr_type} created: {url}\n\n--- Title: {final_pr_title}\n\n--- Body:\n{pr_body}'
)
return url
def update_existing_pull_request(
issue: Issue,
token: str,
username: str | None,
platform: ProviderType,
patch_dir: str,
llm_config: LLMConfig,
comment_message: str | None = None,
additional_message: str | None = None,
base_domain: str | None = None,
) -> str:
"""Update an existing pull request with the new patches.
Args:
issue: The issue to update.
token: The token to use for authentication.
username: The username to use for authentication.
platform: The platform of the repository.
patch_dir: The directory containing the patches to apply.
llm_config: The LLM configuration to use for summarizing changes.
comment_message: The main message to post as a comment on the PR.
additional_message: The additional messages to post as a comment on the PR in json list format.
base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)
"""
# Set up headers and base URL for GitHub or GitLab API
# Determine default base_domain based on platform
if base_domain is None:
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'
handler = None
if platform == ProviderType.GITHUB:
handler = ServiceContextIssue(
GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain),
llm_config,
)
else: # platform == Platform.GITLAB
handler = ServiceContextIssue(
GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain),
llm_config,
)
branch_name = issue.head_branch
# Prepare the push command
push_command = (
f'git -C {patch_dir} push '
f'{handler.get_authorize_url()}'
f'{issue.owner}/{issue.repo}.git {branch_name}'
)
# Push the changes to the existing branch
result = subprocess.run(push_command, shell=True, capture_output=True, text=True)
if result.returncode != 0:
logger.error(f'Error pushing changes: {result.stderr}')
raise RuntimeError('Failed to push changes to the remote repository')
pr_url = handler.get_pull_url(issue.number)
logger.info(f'Updated pull request {pr_url} with new patches.')
# Generate a summary of all comment success indicators for PR message
if not comment_message and additional_message:
try:
explanations = json.loads(additional_message)
if explanations:
comment_message = (
'OpenHands made the following changes to resolve the issues:\n\n'
)
for explanation in explanations:
comment_message += f'- {explanation}\n'
# Summarize with LLM if provided
if llm_config is not None:
llm = LLM(llm_config)
with open(
os.path.join(
os.path.dirname(__file__),
'prompts/resolve/pr-changes-summary.jinja',
),
'r',
) as f:
template = jinja2.Template(f.read())
prompt = template.render(comment_message=comment_message)
response = llm.completion(
messages=[{'role': 'user', 'content': prompt}],
)
comment_message = response.choices[0].message.content.strip()
except (json.JSONDecodeError, TypeError):
comment_message = f'A new OpenHands update is available, but failed to parse or summarize the changes:\n{additional_message}'
# Post a comment on the PR
if comment_message:
handler.send_comment_msg(issue.number, comment_message)
# Reply to each unresolved comment thread
if additional_message and issue.thread_ids:
try:
explanations = json.loads(additional_message)
for count, reply_comment in enumerate(explanations):
comment_id = issue.thread_ids[count]
handler.reply_to_comment(issue.number, comment_id, reply_comment)
except (json.JSONDecodeError, TypeError):
msg = f'Error occurred when replying to threads; success explanations {additional_message}'
handler.send_comment_msg(issue.number, msg)
return pr_url
def process_single_issue(
output_dir: str,
resolver_output: ResolverOutput,
token: str,
username: str,
platform: ProviderType,
pr_type: str,
llm_config: LLMConfig,
fork_owner: str | None,
send_on_failure: bool,
target_branch: str | None = None,
reviewer: str | None = None,
pr_title: str | None = None,
base_domain: str | None = None,
) -> None:
# Determine default base_domain based on platform
if base_domain is None:
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com'
if not resolver_output.success and not send_on_failure:
logger.info(
f'Issue {resolver_output.issue.number} was not successfully resolved. Skipping PR creation.'
)
return
issue_type = resolver_output.issue_type
if issue_type == 'issue':
patched_repo_dir = initialize_repo(
output_dir,
resolver_output.issue.number,
issue_type,
resolver_output.base_commit,
)
elif issue_type == 'pr':
patched_repo_dir = initialize_repo(
output_dir,
resolver_output.issue.number,
issue_type,
resolver_output.issue.head_branch,
)
else:
raise ValueError(f'Invalid issue type: {issue_type}')
apply_patch(patched_repo_dir, resolver_output.git_patch)
make_commit(patched_repo_dir, resolver_output.issue, issue_type)
if issue_type == 'pr':
update_existing_pull_request(
issue=resolver_output.issue,
token=token,
username=username,
platform=platform,
patch_dir=patched_repo_dir,
additional_message=resolver_output.result_explanation,
llm_config=llm_config,
base_domain=base_domain,
)
else:
send_pull_request(
issue=resolver_output.issue,
token=token,
username=username,
platform=platform,
patch_dir=patched_repo_dir,
pr_type=pr_type,
fork_owner=fork_owner,
additional_message=resolver_output.result_explanation,
target_branch=target_branch,
reviewer=reviewer,
pr_title=pr_title,
base_domain=base_domain,
)
def main() -> None:
parser = argparse.ArgumentParser(
description='Send a pull request to Github or Gitlab.'
)
parser.add_argument(
'--selected-repo',
type=str,
default=None,
help='repository to send pull request in form of `owner/repo`.',
)
parser.add_argument(
'--token',
type=str,
default=None,
help='token to access the repository.',
)
parser.add_argument(
'--username',
type=str,
default=None,
help='username to access the repository.',
)
parser.add_argument(
'--output-dir',
type=str,
default='output',
help='Output directory to write the results.',
)
parser.add_argument(
'--pr-type',
type=str,
default='draft',
choices=['branch', 'draft', 'ready'],
help='Type of the pull request to send [branch, draft, ready]',
)
parser.add_argument(
'--issue-number',
type=str,
required=True,
help="Issue number to send the pull request for, or 'all_successful' to process all successful issues.",
)
parser.add_argument(
'--fork-owner',
type=str,
default=None,
help='Owner of the fork to push changes to (if different from the original repo owner).',
)
parser.add_argument(
'--send-on-failure',
action='store_true',
help='Send a pull request even if the issue was not successfully resolved.',
)
parser.add_argument(
'--llm-model',
type=str,
default=None,
help='LLM model to use for summarizing changes.',
)
parser.add_argument(
'--llm-api-key',
type=str,
default=None,
help='API key for the LLM model.',
)
parser.add_argument(
'--llm-base-url',
type=str,
default=None,
help='Base URL for the LLM model.',
)
parser.add_argument(
'--target-branch',
type=str,
default=None,
help='Target branch to create the pull request against (defaults to repository default branch)',
)
parser.add_argument(
'--reviewer',
type=str,
help='GitHub or GitLab username of the person to request review from',
default=None,
)
parser.add_argument(
'--pr-title',
type=str,
help='Custom title for the pull request',
default=None,
)
parser.add_argument(
'--base-domain',
type=str,
default=None,
help='Base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)',
)
my_args = parser.parse_args()
token = my_args.token or os.getenv('GITHUB_TOKEN') or os.getenv('GITLAB_TOKEN')
if not token:
raise ValueError(
'token is not set, set via --token or GITHUB_TOKEN or GITLAB_TOKEN environment variable.'
)
username = my_args.username if my_args.username else os.getenv('GIT_USERNAME')
platform = call_async_from_sync(
identify_token,
GENERAL_TIMEOUT,
token,
my_args.base_domain,
)
api_key = my_args.llm_api_key or os.environ['LLM_API_KEY']
llm_config = LLMConfig(
model=my_args.llm_model or os.environ['LLM_MODEL'],
api_key=SecretStr(api_key) if api_key else None,
base_url=my_args.llm_base_url or os.environ.get('LLM_BASE_URL', None),
)
if not os.path.exists(my_args.output_dir):
raise ValueError(f'Output directory {my_args.output_dir} does not exist.')
if not my_args.issue_number.isdigit():
raise ValueError(f'Issue number {my_args.issue_number} is not a number.')
issue_number = int(my_args.issue_number)
output_path = os.path.join(my_args.output_dir, 'output.jsonl')
resolver_output = load_single_resolver_output(output_path, issue_number)
if not username:
raise ValueError('username is required.')
process_single_issue(
my_args.output_dir,
resolver_output,
token,
username,
platform,
my_args.pr_type,
llm_config,
my_args.fork_owner,
my_args.send_on_failure,
my_args.target_branch,
my_args.reviewer,
my_args.pr_title,
my_args.base_domain,
)
if __name__ == '__main__':
main()