Backup-bdg's picture
Upload 964 files
51ff9e5 verified
from typing import Any
from urllib.parse import quote
import httpx
from openhands.core.logger import openhands_logger as logger
from openhands.resolver.interfaces.issue import (
Issue,
IssueHandlerInterface,
ReviewThread,
)
from openhands.resolver.utils import extract_issue_references
class GitlabIssueHandler(IssueHandlerInterface):
def __init__(
self,
owner: str,
repo: str,
token: str,
username: str | None = None,
base_domain: str = 'gitlab.com',
):
"""Initialize a GitLab issue handler.
Args:
owner: The owner of the repository
repo: The name of the repository
token: The GitLab personal access token
username: Optional GitLab username
base_domain: The domain for GitLab Enterprise (default: "gitlab.com")
"""
self.owner = owner
self.repo = repo
self.token = token
self.username = username
self.base_domain = base_domain
self.base_url = self.get_base_url()
self.download_url = self.get_download_url()
self.clone_url = self.get_clone_url()
self.headers = self.get_headers()
def set_owner(self, owner: str) -> None:
self.owner = owner
def get_headers(self) -> dict[str, str]:
return {
'Authorization': f'Bearer {self.token}',
'Accept': 'application/json',
}
def get_base_url(self) -> str:
project_path = quote(f'{self.owner}/{self.repo}', safe='')
return f'https://{self.base_domain}/api/v4/projects/{project_path}'
def get_authorize_url(self) -> str:
return f'https://{self.username}:{self.token}@{self.base_domain}/'
def get_branch_url(self, branch_name: str) -> str:
return self.get_base_url() + f'/repository/branches/{branch_name}'
def get_download_url(self) -> str:
return f'{self.base_url}/issues'
def get_clone_url(self) -> str:
username_and_token = self.token
if self.username:
username_and_token = f'{self.username}:{self.token}'
return f'https://{username_and_token}@{self.base_domain}/{self.owner}/{self.repo}.git'
def get_graphql_url(self) -> str:
return f'https://{self.base_domain}/api/graphql'
def get_compare_url(self, branch_name: str) -> str:
return f'https://{self.base_domain}/{self.owner}/{self.repo}/-/compare/{self.get_default_branch_name()}...{branch_name}'
def get_converted_issues(
self, issue_numbers: list[int] | None = None, comment_id: int | None = None
) -> list[Issue]:
"""Download issues from Gitlab.
Args:
issue_numbers: The numbers of the issues to download
comment_id: The ID of a single comment, if provided, otherwise all comments
Returns:
List of Gitlab issues.
"""
if not issue_numbers:
raise ValueError('Unspecified issue number')
all_issues = self.download_issues()
logger.info(f'Limiting resolving to issues {issue_numbers}.')
all_issues = [
issue
for issue in all_issues
# if issue['iid'] in issue_numbers and issue['merge_requests_count'] == 0
if issue['iid'] in issue_numbers # TODO for testing
]
if len(issue_numbers) == 1 and not all_issues:
raise ValueError(f'Issue {issue_numbers[0]} not found')
converted_issues = []
for issue in all_issues:
if any([issue.get(key) is None for key in ['iid', 'title']]):
logger.warning(f'Skipping issue {issue} as it is missing iid or title.')
continue
# Handle empty body by using empty string
if issue.get('description') is None:
issue['description'] = ''
# Get issue thread comments
thread_comments = self.get_issue_comments(
issue['iid'], comment_id=comment_id
)
# Convert empty lists to None for optional fields
issue_details = Issue(
owner=self.owner,
repo=self.repo,
number=issue['iid'],
title=issue['title'],
body=issue['description'],
thread_comments=thread_comments,
review_comments=None, # Initialize review comments as None for regular issues
)
converted_issues.append(issue_details)
return converted_issues
def download_issues(self) -> list[Any]:
params: dict[str, int | str] = {
'state': 'opened',
'scope': 'all',
'per_page': 100,
'page': 1,
}
all_issues = []
while True:
response = httpx.get(self.download_url, headers=self.headers, params=params)
response.raise_for_status()
issues = response.json()
if not issues:
break
if not isinstance(issues, list) or any(
[not isinstance(issue, dict) for issue in issues]
):
raise ValueError(
'Expected list of dictionaries from Service Gitlab API.'
)
all_issues.extend(issues)
assert isinstance(params['page'], int)
params['page'] += 1
return all_issues
def get_issue_comments(
self, issue_number: int, comment_id: int | None = None
) -> list[str] | None:
"""Download comments for a specific issue from Gitlab."""
url = f'{self.download_url}/{issue_number}/notes'
params = {'per_page': 100, 'page': 1}
all_comments = []
while True:
response = httpx.get(url, headers=self.headers, params=params)
response.raise_for_status()
comments = response.json()
if not comments:
break
if comment_id:
matching_comment = next(
(
comment['body']
for comment in comments
if comment['id'] == comment_id
),
None,
)
if matching_comment:
return [matching_comment]
else:
all_comments.extend([comment['body'] for comment in comments])
params['page'] += 1
return all_comments if all_comments else None
def branch_exists(self, branch_name: str) -> bool:
logger.info(f'Checking if branch {branch_name} exists...')
response = httpx.get(
f'{self.base_url}/repository/branches/{branch_name}', headers=self.headers
)
exists = response.status_code == 200
logger.info(f'Branch {branch_name} exists: {exists}')
return exists
def get_branch_name(self, base_branch_name: str) -> str:
branch_name = base_branch_name
attempt = 1
while self.branch_exists(branch_name):
attempt += 1
branch_name = f'{base_branch_name}-try{attempt}'
return branch_name
def reply_to_comment(self, pr_number: int, comment_id: str, reply: str) -> None:
response = httpx.get(
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split("/")[-1]}',
headers=self.headers,
)
response.raise_for_status()
discussions = response.json()
if len(discussions.get('notes', [])) > 0:
data = {
'body': f'Openhands fix success summary\n\n\n{reply}',
'note_id': discussions.get('notes', [])[-1]['id'],
}
response = httpx.post(
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split("/")[-1]}/notes',
headers=self.headers,
json=data,
)
response.raise_for_status()
def get_pull_url(self, pr_number: int) -> str:
return f'https://{self.base_domain}/{self.owner}/{self.repo}/-/merge_requests/{pr_number}'
def get_default_branch_name(self) -> str:
response = httpx.get(f'{self.base_url}', headers=self.headers)
response.raise_for_status()
data = response.json()
return str(data['default_branch'])
def create_pull_request(self, data: dict[str, Any] | None = None) -> dict[str, Any]:
if data is None:
data = {}
response = httpx.post(
f'{self.base_url}/merge_requests', headers=self.headers, json=data
)
if response.status_code == 403:
raise RuntimeError(
'Failed to create pull request due to missing permissions. '
'Make sure that the provided token has push permissions for the repository.'
)
response.raise_for_status()
pr_data = response.json()
if 'web_url' in pr_data:
pr_data['html_url'] = pr_data['web_url']
if 'iid' in pr_data:
pr_data['number'] = pr_data['iid']
return dict(pr_data)
def request_reviewers(self, reviewer: str, pr_number: int) -> None:
response = httpx.get(
f'https://{self.base_domain}/api/v4/users?username={reviewer}',
headers=self.headers,
)
response.raise_for_status()
user_data = response.json()
if len(user_data) > 0:
review_data = {'reviewer_ids': [user_data[0]['id']]}
review_response = httpx.put(
f'{self.base_url}/merge_requests/{pr_number}',
headers=self.headers,
json=review_data,
)
if review_response.status_code != 200:
logger.warning(
f'Failed to request review from {reviewer}: {review_response.text}'
)
def send_comment_msg(self, issue_number: int, msg: str) -> None:
"""Send a comment message to a GitHub issue or pull request.
Args:
issue_number: The issue or pull request number
msg: The message content to post as a comment
"""
# Post a comment on the PR
comment_url = f'{self.base_url}/issues/{issue_number}/notes'
comment_data = {'body': msg}
comment_response = httpx.post(
comment_url, headers=self.headers, json=comment_data
)
if comment_response.status_code != 201:
logger.error(
f'Failed to post comment: {comment_response.status_code} {comment_response.text}'
)
else:
logger.info(f'Comment added to the PR: {msg}')
def get_context_from_external_issues_references(
self,
closing_issues: list[str],
closing_issue_numbers: list[int],
issue_body: str,
review_comments: list[str] | None,
review_threads: list[ReviewThread],
thread_comments: list[str] | None,
) -> list[str]:
return []
class GitlabPRHandler(GitlabIssueHandler):
def __init__(
self,
owner: str,
repo: str,
token: str,
username: str | None = None,
base_domain: str = 'gitlab.com',
):
"""Initialize a GitLab PR handler.
Args:
owner: The owner of the repository
repo: The name of the repository
token: The GitLab personal access token
username: Optional GitLab username
base_domain: The domain for GitLab Enterprise (default: "gitlab.com")
"""
super().__init__(owner, repo, token, username, base_domain)
self.download_url = f'{self.base_url}/merge_requests'
def download_pr_metadata(
self, pull_number: int, comment_id: int | None = None
) -> tuple[list[str], list[int], list[str] | None, list[ReviewThread], list[str]]:
"""Run a GraphQL query against the Gitlab API for information.
Retrieves information about:
1. unresolved review comments
2. referenced issues the pull request would close
Args:
pull_number: The number of the pull request to query.
comment_id: Optional ID of a specific comment to focus on.
query: The GraphQL query as a string.
variables: A dictionary of variables for the query.
token: Your Gitlab personal access token.
Returns:
The JSON response from the Gitlab API.
"""
# Using graphql as REST API doesn't indicate resolved status for review comments
# TODO: grabbing the first 10 issues, 100 review threads, and 100 coments; add pagination to retrieve all
response = httpx.get(
f'{self.base_url}/merge_requests/{pull_number}/related_issues',
headers=self.headers,
)
response.raise_for_status()
closing_issues = response.json()
closing_issues_bodies = [issue['description'] for issue in closing_issues]
closing_issue_numbers = [
issue['iid'] for issue in closing_issues
] # Extract issue numbers
query = """
query($projectPath: ID!, $pr: String!) {
project(fullPath: $projectPath) {
mergeRequest(iid: $pr) {
webUrl
discussions(first: 100) {
edges {
node {
id
resolved
resolvable
notes(first: 100) {
nodes {
body
id
position {
filePath
}
}
}
}
}
}
}
}
}
"""
project_path = f'{self.owner}/{self.repo}'
variables = {'projectPath': project_path, 'pr': str(pull_number)}
response = httpx.post(
self.get_graphql_url(),
json={'query': query, 'variables': variables},
headers=self.headers,
)
response.raise_for_status()
response_json = response.json()
# Parse the response to get closing issue references and unresolved review comments
pr_data = (
response_json.get('data', {}).get('project', {}).get('mergeRequest', {})
)
# Get review comments
review_bodies = None
# Get unresolved review threads
review_threads = []
thread_ids = [] # Store thread IDs; agent replies to the thread
raw_review_threads = pr_data.get('discussions', {}).get('edges', [])
for thread in raw_review_threads:
node = thread.get('node', {})
if not node.get('resolved', True) and node.get(
'resolvable', True
): # Check if the review thread is unresolved
id = node.get('id')
thread_contains_comment_id = False
my_review_threads = node.get('notes', {}).get('nodes', [])
message = ''
files = []
for i, review_thread in enumerate(my_review_threads):
if (
comment_id is not None
and int(review_thread['id'].split('/')[-1]) == comment_id
):
thread_contains_comment_id = True
if (
i == len(my_review_threads) - 1
): # Check if it's the last thread in the thread
if len(my_review_threads) > 1:
message += '---\n' # Add "---" before the last message if there's more than one thread
message += 'latest feedback:\n' + review_thread['body'] + '\n'
else:
message += (
review_thread['body'] + '\n'
) # Add each thread in a new line
file = review_thread.get('position', {})
file = file.get('filePath') if file is not None else None
if file and file not in files:
files.append(file)
if comment_id is None or thread_contains_comment_id:
unresolved_thread = ReviewThread(comment=message, files=files)
review_threads.append(unresolved_thread)
thread_ids.append(id)
return (
closing_issues_bodies,
closing_issue_numbers,
review_bodies,
review_threads,
thread_ids,
)
# Override processing of downloaded issues
def get_pr_comments(
self, pr_number: int, comment_id: int | None = None
) -> list[str] | None:
"""Download comments for a specific pull request from Gitlab."""
url = f'{self.base_url}/merge_requests/{pr_number}/notes'
params = {'per_page': 100, 'page': 1}
all_comments = []
while True:
response = httpx.get(url, headers=self.headers, params=params)
response.raise_for_status()
comments = response.json()
comments = [
comment
for comment in comments
if comment.get('resolvable', True) and not comment.get('system', True)
]
if not comments:
break
if comment_id is not None:
matching_comment = next(
(
comment['body']
for comment in comments
if comment['id'] == comment_id
),
None,
)
if matching_comment:
return [matching_comment]
else:
all_comments.extend([comment['body'] for comment in comments])
params['page'] += 1
return all_comments if all_comments else None
def get_context_from_external_issues_references(
self,
closing_issues: list[str],
closing_issue_numbers: list[int],
issue_body: str,
review_comments: list[str] | None,
review_threads: list[ReviewThread],
thread_comments: list[str] | None,
) -> list[str]:
new_issue_references = []
if issue_body:
new_issue_references.extend(extract_issue_references(issue_body))
if review_comments:
for comment in review_comments:
new_issue_references.extend(extract_issue_references(comment))
if review_threads:
for review_thread in review_threads:
new_issue_references.extend(
extract_issue_references(review_thread.comment)
)
if thread_comments:
for thread_comment in thread_comments:
new_issue_references.extend(extract_issue_references(thread_comment))
non_duplicate_references = set(new_issue_references)
unique_issue_references = non_duplicate_references.difference(
closing_issue_numbers
)
for issue_number in unique_issue_references:
try:
url = f'{self.base_url}/issues/{issue_number}'
response = httpx.get(url, headers=self.headers)
response.raise_for_status()
issue_data = response.json()
issue_body = issue_data.get('description', '')
if issue_body:
closing_issues.append(issue_body)
except httpx.HTTPError as e:
logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')
return closing_issues
def get_converted_issues(
self, issue_numbers: list[int] | None = None, comment_id: int | None = None
) -> list[Issue]:
if not issue_numbers:
raise ValueError('Unspecified issue numbers')
all_issues = self.download_issues()
logger.info(f'Limiting resolving to issues {issue_numbers}.')
all_issues = [issue for issue in all_issues if issue['iid'] in issue_numbers]
converted_issues = []
for issue in all_issues:
# For PRs, body can be None
if any([issue.get(key) is None for key in ['iid', 'title']]):
logger.warning(f'Skipping #{issue} as it is missing iid or title.')
continue
# Handle None body for PRs
body = (
issue.get('description') if issue.get('description') is not None else ''
)
(
closing_issues,
closing_issues_numbers,
review_comments,
review_threads,
thread_ids,
) = self.download_pr_metadata(issue['iid'], comment_id=comment_id)
head_branch = issue['source_branch']
# Get PR thread comments
thread_comments = self.get_pr_comments(issue['iid'], comment_id=comment_id)
closing_issues = self.get_context_from_external_issues_references(
closing_issues,
closing_issues_numbers,
body,
review_comments,
review_threads,
thread_comments,
)
issue_details = Issue(
owner=self.owner,
repo=self.repo,
number=issue['iid'],
title=issue['title'],
body=body,
closing_issues=closing_issues,
review_comments=review_comments,
review_threads=review_threads,
thread_ids=thread_ids,
head_branch=head_branch,
thread_comments=thread_comments,
)
converted_issues.append(issue_details)
return converted_issues