import argparse import json import os import shutil import subprocess import jinja2 from pydantic import SecretStr from openhands.core.config import LLMConfig from openhands.core.logger import openhands_logger as logger from openhands.integrations.service_types import ProviderType from openhands.llm.llm import LLM from openhands.resolver.interfaces.github import GithubIssueHandler from openhands.resolver.interfaces.gitlab import GitlabIssueHandler from openhands.resolver.interfaces.issue import Issue from openhands.resolver.interfaces.issue_definitions import ServiceContextIssue from openhands.resolver.io_utils import ( load_single_resolver_output, ) from openhands.resolver.patching import apply_diff, parse_patch from openhands.resolver.resolver_output import ResolverOutput from openhands.resolver.utils import identify_token from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync def apply_patch(repo_dir: str, patch: str) -> None: """Apply a patch to a repository. Args: repo_dir: The directory containing the repository patch: The patch to apply """ diffs = parse_patch(patch) for diff in diffs: if not diff.header.new_path: logger.warning('Could not determine file to patch') continue # Remove both "a/" and "b/" prefixes from paths old_path = ( os.path.join( repo_dir, diff.header.old_path.removeprefix('a/').removeprefix('b/') ) if diff.header.old_path and diff.header.old_path != '/dev/null' else None ) new_path = os.path.join( repo_dir, diff.header.new_path.removeprefix('a/').removeprefix('b/') ) # Check if the file is being deleted if diff.header.new_path == '/dev/null': assert old_path is not None if os.path.exists(old_path): os.remove(old_path) logger.info(f'Deleted file: {old_path}') continue # Handle file rename if old_path and new_path and 'rename from' in patch: # Create parent directory of new path os.makedirs(os.path.dirname(new_path), exist_ok=True) try: # Try to move the file directly shutil.move(old_path, new_path) except shutil.SameFileError: # If it's the same file (can happen with directory renames), copy first then remove shutil.copy2(old_path, new_path) os.remove(old_path) # Try to remove empty parent directories old_dir = os.path.dirname(old_path) while old_dir and old_dir.startswith(repo_dir): try: os.rmdir(old_dir) old_dir = os.path.dirname(old_dir) except OSError: # Directory not empty or other error, stop trying to remove parents break continue if old_path: # Open the file in binary mode to detect line endings with open(old_path, 'rb') as f: original_content = f.read() # Detect line endings if b'\r\n' in original_content: newline = '\r\n' elif b'\n' in original_content: newline = '\n' else: newline = None # Let Python decide try: with open(old_path, 'r', newline=newline) as f: split_content = [x.strip(newline) for x in f.readlines()] except UnicodeDecodeError as e: logger.error(f'Error reading file {old_path}: {e}') split_content = [] else: newline = '\n' split_content = [] if diff.changes is None: logger.warning(f'No changes to apply for {old_path}') continue new_content = apply_diff(diff, split_content) # Ensure the directory exists before writing the file os.makedirs(os.path.dirname(new_path), exist_ok=True) # Write the new content using the detected line endings with open(new_path, 'w', newline=newline) as f: for line in new_content: print(line, file=f) logger.info('Patch applied successfully') def initialize_repo( output_dir: str, issue_number: int, issue_type: str, base_commit: str | None = None ) -> str: """Initialize the repository. Args: output_dir: The output directory to write the repository to issue_number: The issue number to fix issue_type: The type of the issue base_commit: The base commit to checkout (if issue_type is pr) """ src_dir = os.path.join(output_dir, 'repo') dest_dir = os.path.join(output_dir, 'patches', f'{issue_type}_{issue_number}') if not os.path.exists(src_dir): raise ValueError(f'Source directory {src_dir} does not exist.') if os.path.exists(dest_dir): shutil.rmtree(dest_dir) shutil.copytree(src_dir, dest_dir) logger.info(f'Copied repository to {dest_dir}') # Checkout the base commit if provided if base_commit: result = subprocess.run( f'git -C {dest_dir} checkout {base_commit}', shell=True, capture_output=True, text=True, ) if result.returncode != 0: logger.info(f'Error checking out commit: {result.stderr}') raise RuntimeError('Failed to check out commit') return dest_dir def make_commit(repo_dir: str, issue: Issue, issue_type: str) -> None: """Make a commit with the changes to the repository. Args: repo_dir: The directory containing the repository issue: The issue to fix issue_type: The type of the issue """ # Check if git username is set result = subprocess.run( f'git -C {repo_dir} config user.name', shell=True, capture_output=True, text=True, ) if not result.stdout.strip(): # If username is not set, configure git subprocess.run( f'git -C {repo_dir} config user.name "openhands" && ' f'git -C {repo_dir} config user.email "openhands@all-hands.dev" && ' f'git -C {repo_dir} config alias.git "git --no-pager"', shell=True, check=True, ) logger.info('Git user configured as openhands') # Add all changes to the git index result = subprocess.run( f'git -C {repo_dir} add .', shell=True, capture_output=True, text=True ) if result.returncode != 0: logger.error(f'Error adding files: {result.stderr}') raise RuntimeError('Failed to add files to git') # Check the status of the git index status_result = subprocess.run( f'git -C {repo_dir} status --porcelain', shell=True, capture_output=True, text=True, ) # If there are no changes, raise an error if not status_result.stdout.strip(): logger.error( f'No changes to commit for issue #{issue.number}. Skipping commit.' ) raise RuntimeError('ERROR: Openhands failed to make code changes.') # Prepare the commit message commit_message = f'Fix {issue_type} #{issue.number}: {issue.title}' # Commit the changes result = subprocess.run( ['git', '-C', repo_dir, 'commit', '-m', commit_message], capture_output=True, text=True, ) if result.returncode != 0: raise RuntimeError(f'Failed to commit changes: {result}') def send_pull_request( issue: Issue, token: str, username: str | None, platform: ProviderType, patch_dir: str, pr_type: str, fork_owner: str | None = None, additional_message: str | None = None, target_branch: str | None = None, reviewer: str | None = None, pr_title: str | None = None, base_domain: str | None = None, ) -> str: """Send a pull request to a GitHub or Gitlab repository. Args: issue: The issue to send the pull request for token: The GitHub or Gitlab token to use for authentication username: The GitHub or Gitlab username, if provided platform: The platform of the repository. patch_dir: The directory containing the patches to apply pr_type: The type: branch (no PR created), draft or ready (regular PR created) fork_owner: The owner of the fork to push changes to (if different from the original repo owner) additional_message: The additional messages to post as a comment on the PR in json list format target_branch: The target branch to create the pull request against (defaults to repository default branch) reviewer: The GitHub or Gitlab username of the reviewer to assign pr_title: Custom title for the pull request (optional) base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab) """ if pr_type not in ['branch', 'draft', 'ready']: raise ValueError(f'Invalid pr_type: {pr_type}') # Determine default base_domain based on platform if base_domain is None: base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' handler = None if platform == ProviderType.GITHUB: handler = ServiceContextIssue( GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain), None, ) else: # platform == Platform.GITLAB handler = ServiceContextIssue( GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain), None, ) # Create a new branch with a unique name base_branch_name = f'openhands-fix-issue-{issue.number}' branch_name = handler.get_branch_name( base_branch_name=base_branch_name, ) # Get the default branch or use specified target branch logger.info('Getting base branch...') if target_branch: base_branch = target_branch exists = handler.branch_exists(branch_name=target_branch) if not exists: raise ValueError(f'Target branch {target_branch} does not exist') else: base_branch = handler.get_default_branch_name() logger.info(f'Base branch: {base_branch}') # Create and checkout the new branch logger.info('Creating new branch...') result = subprocess.run( ['git', '-C', patch_dir, 'checkout', '-b', branch_name], capture_output=True, text=True, ) if result.returncode != 0: logger.error(f'Error creating new branch: {result.stderr}') raise RuntimeError( f'Failed to create a new branch {branch_name} in {patch_dir}:' ) # Determine the repository to push to (original or fork) push_owner = fork_owner if fork_owner else issue.owner handler._strategy.set_owner(push_owner) logger.info('Pushing changes...') push_url = handler.get_clone_url() result = subprocess.run( ['git', '-C', patch_dir, 'push', push_url, branch_name], capture_output=True, text=True, ) if result.returncode != 0: logger.error(f'Error pushing changes: {result.stderr}') raise RuntimeError('Failed to push changes to the remote repository') # Prepare the PR data: title and body final_pr_title = ( pr_title if pr_title else f'Fix issue #{issue.number}: {issue.title}' ) pr_body = f'This pull request fixes #{issue.number}.' if additional_message: pr_body += f'\n\n{additional_message}' pr_body += '\n\nAutomatic fix generated by [OpenHands](https://github.com/All-Hands-AI/OpenHands/) 🙌' # For cross repo pull request, we need to send head parameter like fork_owner:branch as per git documentation here : https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#create-a-pull-request # head parameter usage : The name of the branch where your changes are implemented. For cross-repository pull requests in the same network, namespace head with a user like this: username:branch. if fork_owner and platform == ProviderType.GITHUB: head_branch = f'{fork_owner}:{branch_name}' else: head_branch = branch_name # If we are not sending a PR, we can finish early and return the # URL for the user to open a PR manually if pr_type == 'branch': url = handler.get_compare_url(branch_name) else: # Prepare the PR for the GitHub API data = { 'title': final_pr_title, ('body' if platform == ProviderType.GITHUB else 'description'): pr_body, ( 'head' if platform == ProviderType.GITHUB else 'source_branch' ): head_branch, ( 'base' if platform == ProviderType.GITHUB else 'target_branch' ): base_branch, 'draft': pr_type == 'draft', } pr_data = handler.create_pull_request(data) url = pr_data['html_url'] # Request review if a reviewer was specified if reviewer and pr_type != 'branch': number = pr_data['number'] handler.request_reviewers(reviewer, number) logger.info( f'{pr_type} created: {url}\n\n--- Title: {final_pr_title}\n\n--- Body:\n{pr_body}' ) return url def update_existing_pull_request( issue: Issue, token: str, username: str | None, platform: ProviderType, patch_dir: str, llm_config: LLMConfig, comment_message: str | None = None, additional_message: str | None = None, base_domain: str | None = None, ) -> str: """Update an existing pull request with the new patches. Args: issue: The issue to update. token: The token to use for authentication. username: The username to use for authentication. platform: The platform of the repository. patch_dir: The directory containing the patches to apply. llm_config: The LLM configuration to use for summarizing changes. comment_message: The main message to post as a comment on the PR. additional_message: The additional messages to post as a comment on the PR in json list format. base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab) """ # Set up headers and base URL for GitHub or GitLab API # Determine default base_domain based on platform if base_domain is None: base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' handler = None if platform == ProviderType.GITHUB: handler = ServiceContextIssue( GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain), llm_config, ) else: # platform == Platform.GITLAB handler = ServiceContextIssue( GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain), llm_config, ) branch_name = issue.head_branch # Prepare the push command push_command = ( f'git -C {patch_dir} push ' f'{handler.get_authorize_url()}' f'{issue.owner}/{issue.repo}.git {branch_name}' ) # Push the changes to the existing branch result = subprocess.run(push_command, shell=True, capture_output=True, text=True) if result.returncode != 0: logger.error(f'Error pushing changes: {result.stderr}') raise RuntimeError('Failed to push changes to the remote repository') pr_url = handler.get_pull_url(issue.number) logger.info(f'Updated pull request {pr_url} with new patches.') # Generate a summary of all comment success indicators for PR message if not comment_message and additional_message: try: explanations = json.loads(additional_message) if explanations: comment_message = ( 'OpenHands made the following changes to resolve the issues:\n\n' ) for explanation in explanations: comment_message += f'- {explanation}\n' # Summarize with LLM if provided if llm_config is not None: llm = LLM(llm_config) with open( os.path.join( os.path.dirname(__file__), 'prompts/resolve/pr-changes-summary.jinja', ), 'r', ) as f: template = jinja2.Template(f.read()) prompt = template.render(comment_message=comment_message) response = llm.completion( messages=[{'role': 'user', 'content': prompt}], ) comment_message = response.choices[0].message.content.strip() except (json.JSONDecodeError, TypeError): comment_message = f'A new OpenHands update is available, but failed to parse or summarize the changes:\n{additional_message}' # Post a comment on the PR if comment_message: handler.send_comment_msg(issue.number, comment_message) # Reply to each unresolved comment thread if additional_message and issue.thread_ids: try: explanations = json.loads(additional_message) for count, reply_comment in enumerate(explanations): comment_id = issue.thread_ids[count] handler.reply_to_comment(issue.number, comment_id, reply_comment) except (json.JSONDecodeError, TypeError): msg = f'Error occurred when replying to threads; success explanations {additional_message}' handler.send_comment_msg(issue.number, msg) return pr_url def process_single_issue( output_dir: str, resolver_output: ResolverOutput, token: str, username: str, platform: ProviderType, pr_type: str, llm_config: LLMConfig, fork_owner: str | None, send_on_failure: bool, target_branch: str | None = None, reviewer: str | None = None, pr_title: str | None = None, base_domain: str | None = None, ) -> None: # Determine default base_domain based on platform if base_domain is None: base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' if not resolver_output.success and not send_on_failure: logger.info( f'Issue {resolver_output.issue.number} was not successfully resolved. Skipping PR creation.' ) return issue_type = resolver_output.issue_type if issue_type == 'issue': patched_repo_dir = initialize_repo( output_dir, resolver_output.issue.number, issue_type, resolver_output.base_commit, ) elif issue_type == 'pr': patched_repo_dir = initialize_repo( output_dir, resolver_output.issue.number, issue_type, resolver_output.issue.head_branch, ) else: raise ValueError(f'Invalid issue type: {issue_type}') apply_patch(patched_repo_dir, resolver_output.git_patch) make_commit(patched_repo_dir, resolver_output.issue, issue_type) if issue_type == 'pr': update_existing_pull_request( issue=resolver_output.issue, token=token, username=username, platform=platform, patch_dir=patched_repo_dir, additional_message=resolver_output.result_explanation, llm_config=llm_config, base_domain=base_domain, ) else: send_pull_request( issue=resolver_output.issue, token=token, username=username, platform=platform, patch_dir=patched_repo_dir, pr_type=pr_type, fork_owner=fork_owner, additional_message=resolver_output.result_explanation, target_branch=target_branch, reviewer=reviewer, pr_title=pr_title, base_domain=base_domain, ) def main() -> None: parser = argparse.ArgumentParser( description='Send a pull request to Github or Gitlab.' ) parser.add_argument( '--selected-repo', type=str, default=None, help='repository to send pull request in form of `owner/repo`.', ) parser.add_argument( '--token', type=str, default=None, help='token to access the repository.', ) parser.add_argument( '--username', type=str, default=None, help='username to access the repository.', ) parser.add_argument( '--output-dir', type=str, default='output', help='Output directory to write the results.', ) parser.add_argument( '--pr-type', type=str, default='draft', choices=['branch', 'draft', 'ready'], help='Type of the pull request to send [branch, draft, ready]', ) parser.add_argument( '--issue-number', type=str, required=True, help="Issue number to send the pull request for, or 'all_successful' to process all successful issues.", ) parser.add_argument( '--fork-owner', type=str, default=None, help='Owner of the fork to push changes to (if different from the original repo owner).', ) parser.add_argument( '--send-on-failure', action='store_true', help='Send a pull request even if the issue was not successfully resolved.', ) parser.add_argument( '--llm-model', type=str, default=None, help='LLM model to use for summarizing changes.', ) parser.add_argument( '--llm-api-key', type=str, default=None, help='API key for the LLM model.', ) parser.add_argument( '--llm-base-url', type=str, default=None, help='Base URL for the LLM model.', ) parser.add_argument( '--target-branch', type=str, default=None, help='Target branch to create the pull request against (defaults to repository default branch)', ) parser.add_argument( '--reviewer', type=str, help='GitHub or GitLab username of the person to request review from', default=None, ) parser.add_argument( '--pr-title', type=str, help='Custom title for the pull request', default=None, ) parser.add_argument( '--base-domain', type=str, default=None, help='Base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)', ) my_args = parser.parse_args() token = my_args.token or os.getenv('GITHUB_TOKEN') or os.getenv('GITLAB_TOKEN') if not token: raise ValueError( 'token is not set, set via --token or GITHUB_TOKEN or GITLAB_TOKEN environment variable.' ) username = my_args.username if my_args.username else os.getenv('GIT_USERNAME') platform = call_async_from_sync( identify_token, GENERAL_TIMEOUT, token, my_args.base_domain, ) api_key = my_args.llm_api_key or os.environ['LLM_API_KEY'] llm_config = LLMConfig( model=my_args.llm_model or os.environ['LLM_MODEL'], api_key=SecretStr(api_key) if api_key else None, base_url=my_args.llm_base_url or os.environ.get('LLM_BASE_URL', None), ) if not os.path.exists(my_args.output_dir): raise ValueError(f'Output directory {my_args.output_dir} does not exist.') if not my_args.issue_number.isdigit(): raise ValueError(f'Issue number {my_args.issue_number} is not a number.') issue_number = int(my_args.issue_number) output_path = os.path.join(my_args.output_dir, 'output.jsonl') resolver_output = load_single_resolver_output(output_path, issue_number) if not username: raise ValueError('username is required.') process_single_issue( my_args.output_dir, resolver_output, token, username, platform, my_args.pr_type, llm_config, my_args.fork_owner, my_args.send_on_failure, my_args.target_branch, my_args.reviewer, my_args.pr_title, my_args.base_domain, ) if __name__ == '__main__': main()