zzz / tests /unit /resolver /test_pr_handler_guess_success.py
ar08's picture
Upload 1040 files
246d201 verified
raw
history blame
22.1 kB
import json
from unittest.mock import MagicMock, patch
import pytest
from openhands.core.config import LLMConfig
from openhands.events.action.message import MessageAction
from openhands.llm.llm import LLM
from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.resolver.issue_definitions import PRHandler
@pytest.fixture
def pr_handler():
llm_config = LLMConfig(model='test-model')
return PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
@pytest.fixture
def mock_llm_success_response():
return MagicMock(
choices=[
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes look good"""
)
)
]
)
def test_guess_success_review_threads_litellm_call():
"""Test that the completion() call for review threads contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with review threads
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=None,
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=None,
review_threads=[
ReviewThread(
comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
files=['/src/file1.py', '/src/file2.py'],
),
ReviewThread(
comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
files=['/tests/test_file.py'],
),
],
thread_ids=['1', '2'],
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Fixed formatting in file1.py and file2.py
2. Added docstrings to all functions
3. Added test cases in test_file.py"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model='test-model', api_key='test-key')
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() calls
assert mock_completion.call_count == 2 # One call per review thread
# Check first call
first_call = mock_completion.call_args_list[0]
first_prompt = first_call[1]['messages'][0]['content']
assert (
'Issue descriptions:\n'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in first_prompt
)
assert (
'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
in first_prompt
)
assert (
'Files locations:\n'
+ json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
in first_prompt
)
assert 'Last message from AI agent:\n' + history[0].content in first_prompt
# Check second call
second_call = mock_completion.call_args_list[1]
second_prompt = second_call[1]['messages'][0]['content']
assert (
'Issue descriptions:\n'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in second_prompt
)
assert (
'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
in second_prompt
)
assert (
'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
in second_prompt
)
assert 'Last message from AI agent:\n' + history[0].content in second_prompt
assert len(json.loads(explanation)) == 2
def test_guess_success_thread_comments_litellm_call():
"""Test that the completion() call for thread comments contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with thread comments
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=[
'Please improve error handling',
'Add input validation',
'latest feedback:\nHandle edge cases',
],
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=None,
thread_ids=None,
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Added try/catch blocks for error handling
2. Added input validation checks
3. Added handling for edge cases"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model='test-model', api_key='test-key')
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert (
'Issue descriptions:\n'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in prompt
)
assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
assert 'Last message from AI agent:\n' + history[0].content in prompt
assert len(json.loads(explanation)) == 1
def test_check_feedback_with_llm():
"""Test the _check_feedback_with_llm helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Test cases for different LLM responses
test_cases = [
{
'response': '--- success\ntrue\n--- explanation\nChanges look good',
'expected': (True, 'Changes look good'),
},
{
'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
'expected': (False, 'Not all issues fixed'),
},
{
'response': 'Invalid response format',
'expected': (
False,
'Failed to decode answer from LLM response: Invalid response format',
),
},
{
'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
'expected': (True, 'Multiline\nexplanation\nhere'),
},
]
for case in test_cases:
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
# Test the function
with patch.object(LLM, 'completion', return_value=mock_response):
success, explanation = handler._check_feedback_with_llm('test prompt')
assert (success, explanation) == case['expected']
def test_check_review_thread_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_thread = ReviewThread(
comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
files=['/src/file1.py', '/src/file2.py'],
)
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the formatting and added docstrings'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""Added docstring."""\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_thread(
review_thread, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'Feedback:\n' + review_thread.comment in prompt
assert (
'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
)
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_thread():
"""Test the _check_review_thread helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_thread = ReviewThread(
comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
files=['/src/file1.py', '/src/file2.py'],
)
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the formatting and added docstrings'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_thread(
review_thread, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'Feedback:\n' + review_thread.comment in prompt
assert (
'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
)
assert 'Last message from AI agent:\n' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_thread_comments_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
thread_comments = [
'Please improve error handling',
'Add input validation',
'latest feedback:\nHandle edge cases',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have added error handling and input validation'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+try:\n+ validate_input()\n+except ValueError:\n+ handle_error()\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_thread_comments(
thread_comments, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_thread_comments():
"""Test the _check_thread_comments helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
thread_comments = [
'Please improve error handling',
'Add input validation',
'latest feedback:\nHandle edge cases',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have added error handling and input validation'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_thread_comments(
thread_comments, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_comments_with_git_patch():
"""Test that git patch from complete_runtime is included in the prompt."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_comments = [
'Please fix the code style',
'Add more test cases',
'latest feedback:\nImprove documentation',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have fixed the code style and added tests'
git_patch = 'diff --git a/src/file1.py b/src/file1.py\n+"""This module does X."""\n+def func():\n+ """Do Y."""\n'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_comments(
review_comments, issues_context, last_message, git_patch
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
assert 'Changes made (git patch):\n' + git_patch in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_check_review_comments():
"""Test the _check_review_comments helper function."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create test data
review_comments = [
'Please improve code readability',
'Add comments to complex functions',
'Follow PEP 8 style guide',
]
issues_context = json.dumps(
['Issue 1 description', 'Issue 2 description'], indent=4
)
last_message = 'I have improved code readability and added comments'
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_comments(
review_comments, issues_context, last_message
)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert 'Issue descriptions:\n' + issues_context in prompt
assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
assert 'Last message from AI agent:\n' + last_message in prompt
# Check result
assert success is True
assert explanation == 'Changes look good'
def test_guess_success_review_comments_litellm_call():
"""Test that the completion() call for review comments contains the expected content."""
# Create a PR handler instance
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
# Create a mock issue with review comments
issue = GithubIssue(
owner='test-owner',
repo='test-repo',
number=1,
title='Test PR',
body='Test Body',
thread_comments=None,
closing_issues=['Issue 1 description', 'Issue 2 description'],
review_comments=[
'Please improve code readability',
'Add comments to complex functions',
'Follow PEP 8 style guide',
],
thread_ids=None,
head_branch='test-branch',
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Improved code readability by breaking down complex functions
2. Added detailed comments to all complex functions
3. Fixed code style to follow PEP 8"""
)
]
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch.object(LLM, 'completion') as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(issue, history)
# Verify the completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]['messages'][0]['content']
# Check prompt content
assert (
'Issue descriptions:\n'
+ json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
in prompt
)
assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
assert 'Last message from AI agent:\n' + history[0].content in prompt
assert len(json.loads(explanation)) == 1