Spaces:
Build error
Build error
import argparse | |
import json | |
import os | |
import shutil | |
import subprocess | |
import jinja2 | |
from pydantic import SecretStr | |
from openhands.core.config import LLMConfig | |
from openhands.core.logger import openhands_logger as logger | |
from openhands.integrations.service_types import ProviderType | |
from openhands.llm.llm import LLM | |
from openhands.resolver.interfaces.github import GithubIssueHandler | |
from openhands.resolver.interfaces.gitlab import GitlabIssueHandler | |
from openhands.resolver.interfaces.issue import Issue | |
from openhands.resolver.interfaces.issue_definitions import ServiceContextIssue | |
from openhands.resolver.io_utils import ( | |
load_single_resolver_output, | |
) | |
from openhands.resolver.patching import apply_diff, parse_patch | |
from openhands.resolver.resolver_output import ResolverOutput | |
from openhands.resolver.utils import identify_token | |
from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync | |
def apply_patch(repo_dir: str, patch: str) -> None: | |
"""Apply a patch to a repository. | |
Args: | |
repo_dir: The directory containing the repository | |
patch: The patch to apply | |
""" | |
diffs = parse_patch(patch) | |
for diff in diffs: | |
if not diff.header.new_path: | |
logger.warning('Could not determine file to patch') | |
continue | |
# Remove both "a/" and "b/" prefixes from paths | |
old_path = ( | |
os.path.join( | |
repo_dir, diff.header.old_path.removeprefix('a/').removeprefix('b/') | |
) | |
if diff.header.old_path and diff.header.old_path != '/dev/null' | |
else None | |
) | |
new_path = os.path.join( | |
repo_dir, diff.header.new_path.removeprefix('a/').removeprefix('b/') | |
) | |
# Check if the file is being deleted | |
if diff.header.new_path == '/dev/null': | |
assert old_path is not None | |
if os.path.exists(old_path): | |
os.remove(old_path) | |
logger.info(f'Deleted file: {old_path}') | |
continue | |
# Handle file rename | |
if old_path and new_path and 'rename from' in patch: | |
# Create parent directory of new path | |
os.makedirs(os.path.dirname(new_path), exist_ok=True) | |
try: | |
# Try to move the file directly | |
shutil.move(old_path, new_path) | |
except shutil.SameFileError: | |
# If it's the same file (can happen with directory renames), copy first then remove | |
shutil.copy2(old_path, new_path) | |
os.remove(old_path) | |
# Try to remove empty parent directories | |
old_dir = os.path.dirname(old_path) | |
while old_dir and old_dir.startswith(repo_dir): | |
try: | |
os.rmdir(old_dir) | |
old_dir = os.path.dirname(old_dir) | |
except OSError: | |
# Directory not empty or other error, stop trying to remove parents | |
break | |
continue | |
if old_path: | |
# Open the file in binary mode to detect line endings | |
with open(old_path, 'rb') as f: | |
original_content = f.read() | |
# Detect line endings | |
if b'\r\n' in original_content: | |
newline = '\r\n' | |
elif b'\n' in original_content: | |
newline = '\n' | |
else: | |
newline = None # Let Python decide | |
try: | |
with open(old_path, 'r', newline=newline) as f: | |
split_content = [x.strip(newline) for x in f.readlines()] | |
except UnicodeDecodeError as e: | |
logger.error(f'Error reading file {old_path}: {e}') | |
split_content = [] | |
else: | |
newline = '\n' | |
split_content = [] | |
if diff.changes is None: | |
logger.warning(f'No changes to apply for {old_path}') | |
continue | |
new_content = apply_diff(diff, split_content) | |
# Ensure the directory exists before writing the file | |
os.makedirs(os.path.dirname(new_path), exist_ok=True) | |
# Write the new content using the detected line endings | |
with open(new_path, 'w', newline=newline) as f: | |
for line in new_content: | |
print(line, file=f) | |
logger.info('Patch applied successfully') | |
def initialize_repo( | |
output_dir: str, issue_number: int, issue_type: str, base_commit: str | None = None | |
) -> str: | |
"""Initialize the repository. | |
Args: | |
output_dir: The output directory to write the repository to | |
issue_number: The issue number to fix | |
issue_type: The type of the issue | |
base_commit: The base commit to checkout (if issue_type is pr) | |
""" | |
src_dir = os.path.join(output_dir, 'repo') | |
dest_dir = os.path.join(output_dir, 'patches', f'{issue_type}_{issue_number}') | |
if not os.path.exists(src_dir): | |
raise ValueError(f'Source directory {src_dir} does not exist.') | |
if os.path.exists(dest_dir): | |
shutil.rmtree(dest_dir) | |
shutil.copytree(src_dir, dest_dir) | |
logger.info(f'Copied repository to {dest_dir}') | |
# Checkout the base commit if provided | |
if base_commit: | |
result = subprocess.run( | |
f'git -C {dest_dir} checkout {base_commit}', | |
shell=True, | |
capture_output=True, | |
text=True, | |
) | |
if result.returncode != 0: | |
logger.info(f'Error checking out commit: {result.stderr}') | |
raise RuntimeError('Failed to check out commit') | |
return dest_dir | |
def make_commit(repo_dir: str, issue: Issue, issue_type: str) -> None: | |
"""Make a commit with the changes to the repository. | |
Args: | |
repo_dir: The directory containing the repository | |
issue: The issue to fix | |
issue_type: The type of the issue | |
""" | |
# Check if git username is set | |
result = subprocess.run( | |
f'git -C {repo_dir} config user.name', | |
shell=True, | |
capture_output=True, | |
text=True, | |
) | |
if not result.stdout.strip(): | |
# If username is not set, configure git | |
subprocess.run( | |
f'git -C {repo_dir} config user.name "openhands" && ' | |
f'git -C {repo_dir} config user.email "[email protected]" && ' | |
f'git -C {repo_dir} config alias.git "git --no-pager"', | |
shell=True, | |
check=True, | |
) | |
logger.info('Git user configured as openhands') | |
# Add all changes to the git index | |
result = subprocess.run( | |
f'git -C {repo_dir} add .', shell=True, capture_output=True, text=True | |
) | |
if result.returncode != 0: | |
logger.error(f'Error adding files: {result.stderr}') | |
raise RuntimeError('Failed to add files to git') | |
# Check the status of the git index | |
status_result = subprocess.run( | |
f'git -C {repo_dir} status --porcelain', | |
shell=True, | |
capture_output=True, | |
text=True, | |
) | |
# If there are no changes, raise an error | |
if not status_result.stdout.strip(): | |
logger.error( | |
f'No changes to commit for issue #{issue.number}. Skipping commit.' | |
) | |
raise RuntimeError('ERROR: Openhands failed to make code changes.') | |
# Prepare the commit message | |
commit_message = f'Fix {issue_type} #{issue.number}: {issue.title}' | |
# Commit the changes | |
result = subprocess.run( | |
['git', '-C', repo_dir, 'commit', '-m', commit_message], | |
capture_output=True, | |
text=True, | |
) | |
if result.returncode != 0: | |
raise RuntimeError(f'Failed to commit changes: {result}') | |
def send_pull_request( | |
issue: Issue, | |
token: str, | |
username: str | None, | |
platform: ProviderType, | |
patch_dir: str, | |
pr_type: str, | |
fork_owner: str | None = None, | |
additional_message: str | None = None, | |
target_branch: str | None = None, | |
reviewer: str | None = None, | |
pr_title: str | None = None, | |
base_domain: str | None = None, | |
) -> str: | |
"""Send a pull request to a GitHub or Gitlab repository. | |
Args: | |
issue: The issue to send the pull request for | |
token: The GitHub or Gitlab token to use for authentication | |
username: The GitHub or Gitlab username, if provided | |
platform: The platform of the repository. | |
patch_dir: The directory containing the patches to apply | |
pr_type: The type: branch (no PR created), draft or ready (regular PR created) | |
fork_owner: The owner of the fork to push changes to (if different from the original repo owner) | |
additional_message: The additional messages to post as a comment on the PR in json list format | |
target_branch: The target branch to create the pull request against (defaults to repository default branch) | |
reviewer: The GitHub or Gitlab username of the reviewer to assign | |
pr_title: Custom title for the pull request (optional) | |
base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab) | |
""" | |
if pr_type not in ['branch', 'draft', 'ready']: | |
raise ValueError(f'Invalid pr_type: {pr_type}') | |
# Determine default base_domain based on platform | |
if base_domain is None: | |
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' | |
handler = None | |
if platform == ProviderType.GITHUB: | |
handler = ServiceContextIssue( | |
GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain), | |
None, | |
) | |
else: # platform == Platform.GITLAB | |
handler = ServiceContextIssue( | |
GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain), | |
None, | |
) | |
# Create a new branch with a unique name | |
base_branch_name = f'openhands-fix-issue-{issue.number}' | |
branch_name = handler.get_branch_name( | |
base_branch_name=base_branch_name, | |
) | |
# Get the default branch or use specified target branch | |
logger.info('Getting base branch...') | |
if target_branch: | |
base_branch = target_branch | |
exists = handler.branch_exists(branch_name=target_branch) | |
if not exists: | |
raise ValueError(f'Target branch {target_branch} does not exist') | |
else: | |
base_branch = handler.get_default_branch_name() | |
logger.info(f'Base branch: {base_branch}') | |
# Create and checkout the new branch | |
logger.info('Creating new branch...') | |
result = subprocess.run( | |
['git', '-C', patch_dir, 'checkout', '-b', branch_name], | |
capture_output=True, | |
text=True, | |
) | |
if result.returncode != 0: | |
logger.error(f'Error creating new branch: {result.stderr}') | |
raise RuntimeError( | |
f'Failed to create a new branch {branch_name} in {patch_dir}:' | |
) | |
# Determine the repository to push to (original or fork) | |
push_owner = fork_owner if fork_owner else issue.owner | |
handler._strategy.set_owner(push_owner) | |
logger.info('Pushing changes...') | |
push_url = handler.get_clone_url() | |
result = subprocess.run( | |
['git', '-C', patch_dir, 'push', push_url, branch_name], | |
capture_output=True, | |
text=True, | |
) | |
if result.returncode != 0: | |
logger.error(f'Error pushing changes: {result.stderr}') | |
raise RuntimeError('Failed to push changes to the remote repository') | |
# Prepare the PR data: title and body | |
final_pr_title = ( | |
pr_title if pr_title else f'Fix issue #{issue.number}: {issue.title}' | |
) | |
pr_body = f'This pull request fixes #{issue.number}.' | |
if additional_message: | |
pr_body += f'\n\n{additional_message}' | |
pr_body += '\n\nAutomatic fix generated by [OpenHands](https://github.com/All-Hands-AI/OpenHands/) 🙌' | |
# For cross repo pull request, we need to send head parameter like fork_owner:branch as per git documentation here : https://docs.github.com/en/rest/pulls/pulls?apiVersion=2022-11-28#create-a-pull-request | |
# head parameter usage : The name of the branch where your changes are implemented. For cross-repository pull requests in the same network, namespace head with a user like this: username:branch. | |
if fork_owner and platform == ProviderType.GITHUB: | |
head_branch = f'{fork_owner}:{branch_name}' | |
else: | |
head_branch = branch_name | |
# If we are not sending a PR, we can finish early and return the | |
# URL for the user to open a PR manually | |
if pr_type == 'branch': | |
url = handler.get_compare_url(branch_name) | |
else: | |
# Prepare the PR for the GitHub API | |
data = { | |
'title': final_pr_title, | |
('body' if platform == ProviderType.GITHUB else 'description'): pr_body, | |
( | |
'head' if platform == ProviderType.GITHUB else 'source_branch' | |
): head_branch, | |
( | |
'base' if platform == ProviderType.GITHUB else 'target_branch' | |
): base_branch, | |
'draft': pr_type == 'draft', | |
} | |
pr_data = handler.create_pull_request(data) | |
url = pr_data['html_url'] | |
# Request review if a reviewer was specified | |
if reviewer and pr_type != 'branch': | |
number = pr_data['number'] | |
handler.request_reviewers(reviewer, number) | |
logger.info( | |
f'{pr_type} created: {url}\n\n--- Title: {final_pr_title}\n\n--- Body:\n{pr_body}' | |
) | |
return url | |
def update_existing_pull_request( | |
issue: Issue, | |
token: str, | |
username: str | None, | |
platform: ProviderType, | |
patch_dir: str, | |
llm_config: LLMConfig, | |
comment_message: str | None = None, | |
additional_message: str | None = None, | |
base_domain: str | None = None, | |
) -> str: | |
"""Update an existing pull request with the new patches. | |
Args: | |
issue: The issue to update. | |
token: The token to use for authentication. | |
username: The username to use for authentication. | |
platform: The platform of the repository. | |
patch_dir: The directory containing the patches to apply. | |
llm_config: The LLM configuration to use for summarizing changes. | |
comment_message: The main message to post as a comment on the PR. | |
additional_message: The additional messages to post as a comment on the PR in json list format. | |
base_domain: The base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab) | |
""" | |
# Set up headers and base URL for GitHub or GitLab API | |
# Determine default base_domain based on platform | |
if base_domain is None: | |
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' | |
handler = None | |
if platform == ProviderType.GITHUB: | |
handler = ServiceContextIssue( | |
GithubIssueHandler(issue.owner, issue.repo, token, username, base_domain), | |
llm_config, | |
) | |
else: # platform == Platform.GITLAB | |
handler = ServiceContextIssue( | |
GitlabIssueHandler(issue.owner, issue.repo, token, username, base_domain), | |
llm_config, | |
) | |
branch_name = issue.head_branch | |
# Prepare the push command | |
push_command = ( | |
f'git -C {patch_dir} push ' | |
f'{handler.get_authorize_url()}' | |
f'{issue.owner}/{issue.repo}.git {branch_name}' | |
) | |
# Push the changes to the existing branch | |
result = subprocess.run(push_command, shell=True, capture_output=True, text=True) | |
if result.returncode != 0: | |
logger.error(f'Error pushing changes: {result.stderr}') | |
raise RuntimeError('Failed to push changes to the remote repository') | |
pr_url = handler.get_pull_url(issue.number) | |
logger.info(f'Updated pull request {pr_url} with new patches.') | |
# Generate a summary of all comment success indicators for PR message | |
if not comment_message and additional_message: | |
try: | |
explanations = json.loads(additional_message) | |
if explanations: | |
comment_message = ( | |
'OpenHands made the following changes to resolve the issues:\n\n' | |
) | |
for explanation in explanations: | |
comment_message += f'- {explanation}\n' | |
# Summarize with LLM if provided | |
if llm_config is not None: | |
llm = LLM(llm_config) | |
with open( | |
os.path.join( | |
os.path.dirname(__file__), | |
'prompts/resolve/pr-changes-summary.jinja', | |
), | |
'r', | |
) as f: | |
template = jinja2.Template(f.read()) | |
prompt = template.render(comment_message=comment_message) | |
response = llm.completion( | |
messages=[{'role': 'user', 'content': prompt}], | |
) | |
comment_message = response.choices[0].message.content.strip() | |
except (json.JSONDecodeError, TypeError): | |
comment_message = f'A new OpenHands update is available, but failed to parse or summarize the changes:\n{additional_message}' | |
# Post a comment on the PR | |
if comment_message: | |
handler.send_comment_msg(issue.number, comment_message) | |
# Reply to each unresolved comment thread | |
if additional_message and issue.thread_ids: | |
try: | |
explanations = json.loads(additional_message) | |
for count, reply_comment in enumerate(explanations): | |
comment_id = issue.thread_ids[count] | |
handler.reply_to_comment(issue.number, comment_id, reply_comment) | |
except (json.JSONDecodeError, TypeError): | |
msg = f'Error occurred when replying to threads; success explanations {additional_message}' | |
handler.send_comment_msg(issue.number, msg) | |
return pr_url | |
def process_single_issue( | |
output_dir: str, | |
resolver_output: ResolverOutput, | |
token: str, | |
username: str, | |
platform: ProviderType, | |
pr_type: str, | |
llm_config: LLMConfig, | |
fork_owner: str | None, | |
send_on_failure: bool, | |
target_branch: str | None = None, | |
reviewer: str | None = None, | |
pr_title: str | None = None, | |
base_domain: str | None = None, | |
) -> None: | |
# Determine default base_domain based on platform | |
if base_domain is None: | |
base_domain = 'github.com' if platform == ProviderType.GITHUB else 'gitlab.com' | |
if not resolver_output.success and not send_on_failure: | |
logger.info( | |
f'Issue {resolver_output.issue.number} was not successfully resolved. Skipping PR creation.' | |
) | |
return | |
issue_type = resolver_output.issue_type | |
if issue_type == 'issue': | |
patched_repo_dir = initialize_repo( | |
output_dir, | |
resolver_output.issue.number, | |
issue_type, | |
resolver_output.base_commit, | |
) | |
elif issue_type == 'pr': | |
patched_repo_dir = initialize_repo( | |
output_dir, | |
resolver_output.issue.number, | |
issue_type, | |
resolver_output.issue.head_branch, | |
) | |
else: | |
raise ValueError(f'Invalid issue type: {issue_type}') | |
apply_patch(patched_repo_dir, resolver_output.git_patch) | |
make_commit(patched_repo_dir, resolver_output.issue, issue_type) | |
if issue_type == 'pr': | |
update_existing_pull_request( | |
issue=resolver_output.issue, | |
token=token, | |
username=username, | |
platform=platform, | |
patch_dir=patched_repo_dir, | |
additional_message=resolver_output.result_explanation, | |
llm_config=llm_config, | |
base_domain=base_domain, | |
) | |
else: | |
send_pull_request( | |
issue=resolver_output.issue, | |
token=token, | |
username=username, | |
platform=platform, | |
patch_dir=patched_repo_dir, | |
pr_type=pr_type, | |
fork_owner=fork_owner, | |
additional_message=resolver_output.result_explanation, | |
target_branch=target_branch, | |
reviewer=reviewer, | |
pr_title=pr_title, | |
base_domain=base_domain, | |
) | |
def main() -> None: | |
parser = argparse.ArgumentParser( | |
description='Send a pull request to Github or Gitlab.' | |
) | |
parser.add_argument( | |
'--selected-repo', | |
type=str, | |
default=None, | |
help='repository to send pull request in form of `owner/repo`.', | |
) | |
parser.add_argument( | |
'--token', | |
type=str, | |
default=None, | |
help='token to access the repository.', | |
) | |
parser.add_argument( | |
'--username', | |
type=str, | |
default=None, | |
help='username to access the repository.', | |
) | |
parser.add_argument( | |
'--output-dir', | |
type=str, | |
default='output', | |
help='Output directory to write the results.', | |
) | |
parser.add_argument( | |
'--pr-type', | |
type=str, | |
default='draft', | |
choices=['branch', 'draft', 'ready'], | |
help='Type of the pull request to send [branch, draft, ready]', | |
) | |
parser.add_argument( | |
'--issue-number', | |
type=str, | |
required=True, | |
help="Issue number to send the pull request for, or 'all_successful' to process all successful issues.", | |
) | |
parser.add_argument( | |
'--fork-owner', | |
type=str, | |
default=None, | |
help='Owner of the fork to push changes to (if different from the original repo owner).', | |
) | |
parser.add_argument( | |
'--send-on-failure', | |
action='store_true', | |
help='Send a pull request even if the issue was not successfully resolved.', | |
) | |
parser.add_argument( | |
'--llm-model', | |
type=str, | |
default=None, | |
help='LLM model to use for summarizing changes.', | |
) | |
parser.add_argument( | |
'--llm-api-key', | |
type=str, | |
default=None, | |
help='API key for the LLM model.', | |
) | |
parser.add_argument( | |
'--llm-base-url', | |
type=str, | |
default=None, | |
help='Base URL for the LLM model.', | |
) | |
parser.add_argument( | |
'--target-branch', | |
type=str, | |
default=None, | |
help='Target branch to create the pull request against (defaults to repository default branch)', | |
) | |
parser.add_argument( | |
'--reviewer', | |
type=str, | |
help='GitHub or GitLab username of the person to request review from', | |
default=None, | |
) | |
parser.add_argument( | |
'--pr-title', | |
type=str, | |
help='Custom title for the pull request', | |
default=None, | |
) | |
parser.add_argument( | |
'--base-domain', | |
type=str, | |
default=None, | |
help='Base domain for the git server (defaults to "github.com" for GitHub and "gitlab.com" for GitLab)', | |
) | |
my_args = parser.parse_args() | |
token = my_args.token or os.getenv('GITHUB_TOKEN') or os.getenv('GITLAB_TOKEN') | |
if not token: | |
raise ValueError( | |
'token is not set, set via --token or GITHUB_TOKEN or GITLAB_TOKEN environment variable.' | |
) | |
username = my_args.username if my_args.username else os.getenv('GIT_USERNAME') | |
platform = call_async_from_sync( | |
identify_token, | |
GENERAL_TIMEOUT, | |
token, | |
my_args.base_domain, | |
) | |
api_key = my_args.llm_api_key or os.environ['LLM_API_KEY'] | |
llm_config = LLMConfig( | |
model=my_args.llm_model or os.environ['LLM_MODEL'], | |
api_key=SecretStr(api_key) if api_key else None, | |
base_url=my_args.llm_base_url or os.environ.get('LLM_BASE_URL', None), | |
) | |
if not os.path.exists(my_args.output_dir): | |
raise ValueError(f'Output directory {my_args.output_dir} does not exist.') | |
if not my_args.issue_number.isdigit(): | |
raise ValueError(f'Issue number {my_args.issue_number} is not a number.') | |
issue_number = int(my_args.issue_number) | |
output_path = os.path.join(my_args.output_dir, 'output.jsonl') | |
resolver_output = load_single_resolver_output(output_path, issue_number) | |
if not username: | |
raise ValueError('username is required.') | |
process_single_issue( | |
my_args.output_dir, | |
resolver_output, | |
token, | |
username, | |
platform, | |
my_args.pr_type, | |
llm_config, | |
my_args.fork_owner, | |
my_args.send_on_failure, | |
my_args.target_branch, | |
my_args.reviewer, | |
my_args.pr_title, | |
my_args.base_domain, | |
) | |
if __name__ == '__main__': | |
main() | |