|
|
|
|
|
|
|
""" |
|
AI Review Service |
|
|
|
This module provides functionality for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. |
|
""" |
|
|
|
import os |
|
import logging |
|
import json |
|
import re |
|
import concurrent.futures |
|
from openai import OpenAI |
|
from dotenv import load_dotenv |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
load_dotenv() |
|
|
|
class AIReviewService: |
|
""" |
|
Service for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. |
|
""" |
|
|
|
def __init__(self): |
|
""" |
|
Initialize the AIReviewService. |
|
""" |
|
self.api_key = os.getenv('NEBIUS_API_KEY') |
|
if not self.api_key: |
|
logger.warning("NEBIUS_API_KEY not found in environment variables. AI review will not be available.") |
|
else: |
|
self.client = OpenAI( |
|
base_url="https://api.studio.nebius.com/v1/", |
|
api_key=self.api_key |
|
) |
|
|
|
logger.info("Initialized AIReviewService") |
|
|
|
def is_available(self): |
|
""" |
|
Check if the AI review service is available. |
|
|
|
Returns: |
|
bool: True if the service is available, False otherwise. |
|
""" |
|
return self.api_key is not None |
|
|
|
def review_code(self, file_path, file_content, language, context=None): |
|
""" |
|
Review code using Qwen. |
|
|
|
Args: |
|
file_path (str): The path to the file being reviewed. |
|
file_content (str): The content of the file being reviewed. |
|
language (str): The programming language of the file. |
|
context (dict, optional): Additional context for the review. |
|
|
|
Returns: |
|
dict: The review results. |
|
""" |
|
if not self.is_available(): |
|
return { |
|
'status': 'error', |
|
'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', |
|
'suggestions': [], |
|
} |
|
|
|
logger.info(f"Reviewing {language} code in {file_path}") |
|
|
|
|
|
prompt = self._prepare_prompt(file_path, file_content, language, context) |
|
|
|
try: |
|
|
|
response = self.client.chat.completions.create( |
|
model="Qwen/Qwen2.5-72B-Instruct", |
|
max_tokens=4000, |
|
temperature=0, |
|
messages=[ |
|
{"role": "system", "content": self._get_system_prompt(language)}, |
|
{"role": "user", "content": prompt} |
|
] |
|
) |
|
|
|
|
|
review_text = response.choices[0].message.content |
|
suggestions = self._parse_review(review_text) |
|
|
|
return { |
|
'status': 'success', |
|
'review_text': review_text, |
|
'suggestions': suggestions, |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"Error calling Qwen API: {e}") |
|
return { |
|
'status': 'error', |
|
'error': str(e), |
|
'suggestions': [], |
|
} |
|
|
|
def review_repository(self, repo_path, files, languages, analysis_results=None): |
|
""" |
|
Review a repository using Qwen with parallel processing. |
|
|
|
Args: |
|
repo_path (str): The path to the repository. |
|
files (list): A list of files to review. |
|
languages (list): A list of programming languages in the repository. |
|
analysis_results (dict, optional): Results from other analysis tools. |
|
|
|
Returns: |
|
dict: The review results. |
|
""" |
|
if not self.is_available(): |
|
return { |
|
'status': 'error', |
|
'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', |
|
'reviews': {}, |
|
'summary': '', |
|
} |
|
|
|
logger.info(f"Reviewing repository at {repo_path} with {len(files)} files") |
|
|
|
|
|
max_files = 20 |
|
if len(files) > max_files: |
|
logger.warning(f"Too many files to review ({len(files)}). Limiting to {max_files} files.") |
|
files = files[:max_files] |
|
|
|
|
|
def review_file(file_path): |
|
try: |
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: |
|
file_content = f.read() |
|
|
|
|
|
_, ext = os.path.splitext(file_path) |
|
language = self._get_language_from_extension(ext) |
|
|
|
if language: |
|
|
|
context = None |
|
if analysis_results: |
|
context = self._extract_context_for_file(file_path, analysis_results) |
|
|
|
|
|
review_result = self.review_code(file_path, file_content, language, context) |
|
return file_path, review_result |
|
return file_path, { |
|
'status': 'error', |
|
'error': f'Unsupported language for file {file_path}', |
|
'suggestions': [], |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"Error reviewing file {file_path}: {e}") |
|
return file_path, { |
|
'status': 'error', |
|
'error': str(e), |
|
'suggestions': [], |
|
} |
|
|
|
|
|
reviews = {} |
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(5, len(files))) as executor: |
|
|
|
future_to_file = {executor.submit(review_file, file_path): file_path for file_path in files} |
|
|
|
|
|
for future in concurrent.futures.as_completed(future_to_file): |
|
file_path = future_to_file[future] |
|
try: |
|
path, result = future.result() |
|
reviews[path] = result |
|
logger.info(f"Completed review for {path}") |
|
except Exception as e: |
|
logger.error(f"Exception occurred during review of {file_path}: {e}") |
|
reviews[file_path] = { |
|
'status': 'error', |
|
'error': str(e), |
|
'suggestions': [], |
|
} |
|
|
|
|
|
summary = self._generate_repository_summary(repo_path, reviews, languages, analysis_results) |
|
|
|
return { |
|
'status': 'success', |
|
'reviews': reviews, |
|
'summary': summary, |
|
} |
|
|
|
def _prepare_prompt(self, file_path, file_content, language, context=None): |
|
""" |
|
Prepare a prompt for Qwen. |
|
|
|
Args: |
|
file_path (str): The path to the file being reviewed. |
|
file_content (str): The content of the file being reviewed. |
|
language (str): The programming language of the file. |
|
context (dict, optional): Additional context for the review. |
|
|
|
Returns: |
|
str: The prompt for Qwen. |
|
""" |
|
prompt = f"""Please review the following {language} code and provide constructive feedback: |
|
|
|
File: {file_path} |
|
|
|
```{language} |
|
{file_content} |
|
``` |
|
|
|
""" |
|
|
|
if context: |
|
prompt += "Additional context:\n" |
|
if 'issues' in context: |
|
prompt += "\nIssues detected by other tools:\n" |
|
for issue in context['issues']: |
|
prompt += f"- {issue.get('issue', 'Unknown issue')} at line {issue.get('line', 'unknown')}: {issue.get('description', '')}\n" |
|
|
|
if 'vulnerabilities' in context: |
|
prompt += "\nSecurity vulnerabilities detected:\n" |
|
for vuln in context['vulnerabilities']: |
|
prompt += f"- {vuln.get('issue', 'Unknown vulnerability')} at line {vuln.get('line', 'unknown')}: {vuln.get('description', '')}\n" |
|
|
|
prompt += "\nPlease provide your review with the following sections:\n" |
|
prompt += "1. Code Quality: Assess the overall quality, readability, and maintainability.\n" |
|
prompt += "2. Potential Issues: Identify any bugs, edge cases, or potential problems.\n" |
|
prompt += "3. Security Concerns: Highlight any security vulnerabilities or risks.\n" |
|
prompt += "4. Performance Considerations: Note any performance bottlenecks or inefficiencies.\n" |
|
prompt += "5. Specific Suggestions: Provide concrete, actionable suggestions for improvement.\n" |
|
|
|
return prompt |
|
|
|
def _get_system_prompt(self, language): |
|
""" |
|
Get the system prompt for Qwen based on the programming language. |
|
|
|
Args: |
|
language (str): The programming language. |
|
|
|
Returns: |
|
str: The system prompt for Qwen. |
|
""" |
|
base_prompt = """You are an expert code reviewer with deep knowledge of software development best practices, design patterns, and security. |
|
Your task is to review code and provide constructive, actionable feedback. |
|
Be thorough but prioritize the most important issues. |
|
Format your response in markdown with clear sections. |
|
For each suggestion, include the line number, the issue, and a recommended solution. |
|
Focus on: |
|
- Code quality and readability |
|
- Potential bugs and edge cases |
|
- Security vulnerabilities |
|
- Performance optimizations |
|
- Adherence to best practices |
|
|
|
Your feedback should be specific, actionable, and educational. Explain why each suggestion matters. |
|
Do not hallucinate vulnerabilities. Base claims on code patterns. |
|
""" |
|
|
|
|
|
if language == 'Python': |
|
base_prompt += "\nFor Python code, pay special attention to PEP 8 compliance, proper exception handling, and Pythonic idioms." |
|
elif language in ['JavaScript', 'TypeScript']: |
|
base_prompt += "\nFor JavaScript/TypeScript code, focus on modern ES6+ practices, proper async handling, and potential type issues." |
|
elif language == 'Java': |
|
base_prompt += "\nFor Java code, examine object-oriented design, proper exception handling, and resource management." |
|
elif language == 'Go': |
|
base_prompt += "\nFor Go code, check for idiomatic Go patterns, proper error handling, and concurrency issues." |
|
elif language == 'Rust': |
|
base_prompt += "\nFor Rust code, verify memory safety, proper use of ownership/borrowing, and idiomatic Rust patterns." |
|
|
|
return base_prompt |
|
|
|
def _parse_review(self, review_text): |
|
""" |
|
Parse the review text from Qwen to extract structured suggestions. |
|
|
|
Args: |
|
review_text (str): The review text from Qwen. |
|
|
|
Returns: |
|
list: A list of structured suggestions. |
|
""" |
|
suggestions = [] |
|
|
|
|
|
sections = review_text.split('##') |
|
|
|
for section in sections: |
|
if not section.strip(): |
|
continue |
|
|
|
lines = section.strip().split('\n') |
|
section_title = lines[0].strip() |
|
|
|
current_suggestion = None |
|
for line in lines[1:]: |
|
line = line.strip() |
|
if not line: |
|
continue |
|
|
|
|
|
line_number_match = re.search(r'line\s+(\d+)', line, re.IGNORECASE) |
|
if line_number_match or line.startswith('- ') or line.startswith('* '): |
|
|
|
if current_suggestion: |
|
suggestions.append(current_suggestion) |
|
|
|
|
|
line_number = int(line_number_match.group(1)) if line_number_match else None |
|
current_suggestion = { |
|
'section': section_title, |
|
'line': line_number, |
|
'description': line, |
|
'details': '', |
|
} |
|
elif current_suggestion: |
|
|
|
current_suggestion['details'] += line + '\n' |
|
|
|
|
|
if current_suggestion: |
|
suggestions.append(current_suggestion) |
|
|
|
return suggestions |
|
|
|
|
|
|
|
def _get_language_from_extension(self, extension): |
|
""" |
|
Get the programming language from a file extension. |
|
|
|
Args: |
|
extension (str): The file extension. |
|
|
|
Returns: |
|
str: The programming language, or None if unknown. |
|
""" |
|
extension_to_language = { |
|
'.py': 'Python', |
|
'.js': 'JavaScript', |
|
'.jsx': 'JavaScript', |
|
'.ts': 'TypeScript', |
|
'.tsx': 'TypeScript', |
|
'.java': 'Java', |
|
'.go': 'Go', |
|
'.rs': 'Rust', |
|
'.cpp': 'C++', |
|
'.cc': 'C++', |
|
'.c': 'C', |
|
'.h': 'C', |
|
'.hpp': 'C++', |
|
'.cs': 'C#', |
|
'.php': 'PHP', |
|
'.rb': 'Ruby', |
|
} |
|
|
|
return extension_to_language.get(extension.lower()) |
|
|
|
def _extract_context_for_file(self, file_path, analysis_results): |
|
""" |
|
Extract relevant context for a file from analysis results. |
|
|
|
Args: |
|
file_path (str): The path to the file. |
|
analysis_results (dict): Results from other analysis tools. |
|
|
|
Returns: |
|
dict: Context for the file. |
|
""" |
|
context = { |
|
'issues': [], |
|
'vulnerabilities': [], |
|
} |
|
|
|
|
|
if 'code_analysis' in analysis_results: |
|
for language, language_results in analysis_results['code_analysis'].items(): |
|
for issue in language_results.get('issues', []): |
|
if issue.get('file', '') == file_path: |
|
context['issues'].append(issue) |
|
|
|
|
|
if 'security_scan' in analysis_results: |
|
for language, language_results in analysis_results['security_scan'].items(): |
|
for vuln in language_results.get('vulnerabilities', []): |
|
if vuln.get('file', '') == file_path: |
|
context['vulnerabilities'].append(vuln) |
|
|
|
|
|
if 'performance_analysis' in analysis_results: |
|
for language, language_results in analysis_results['performance_analysis'].get('language_results', {}).items(): |
|
for issue in language_results.get('issues', []): |
|
if issue.get('file', '') == file_path: |
|
context['issues'].append(issue) |
|
|
|
return context |
|
|
|
def _generate_repository_summary(self, repo_path, reviews, languages, analysis_results=None): |
|
""" |
|
Generate a summary of the repository review. |
|
|
|
Args: |
|
repo_path (str): The path to the repository. |
|
reviews (dict): The review results for each file. |
|
languages (list): A list of programming languages in the repository. |
|
analysis_results (dict, optional): Results from other analysis tools. |
|
|
|
Returns: |
|
str: A summary of the repository review. |
|
""" |
|
if not self.is_available(): |
|
return "AI review service is not available. Please set ANTHROPIC_API_KEY in environment variables." |
|
|
|
|
|
prompt = f"""Please provide a summary of the code review for the repository at {repo_path}. |
|
|
|
Languages used in the repository: {', '.join(languages)} |
|
|
|
""" |
|
|
|
|
|
prompt += "\nFiles reviewed:\n" |
|
for file_path, review in reviews.items(): |
|
if review.get('status') == 'success': |
|
suggestion_count = len(review.get('suggestions', [])) |
|
prompt += f"- {file_path}: {suggestion_count} suggestions\n" |
|
else: |
|
prompt += f"- {file_path}: Error - {review.get('error', 'Unknown error')}\n" |
|
|
|
|
|
if analysis_results: |
|
prompt += "\nAnalysis results summary:\n" |
|
|
|
if 'code_analysis' in analysis_results: |
|
total_issues = sum(result.get('issue_count', 0) for result in analysis_results['code_analysis'].values()) |
|
prompt += f"- Code quality issues: {total_issues}\n" |
|
|
|
if 'security_scan' in analysis_results: |
|
total_vulns = sum(result.get('vulnerability_count', 0) for result in analysis_results['security_scan'].values()) |
|
prompt += f"- Security vulnerabilities: {total_vulns}\n" |
|
|
|
if 'performance_analysis' in analysis_results: |
|
total_perf_issues = sum(result.get('issue_count', 0) for result in analysis_results['performance_analysis'].get('language_results', {}).values()) |
|
prompt += f"- Performance issues: {total_perf_issues}\n" |
|
|
|
prompt += "\nPlease provide a comprehensive summary of the code review, including:\n" |
|
prompt += "1. Overall code quality assessment\n" |
|
prompt += "2. Common patterns and issues found across the codebase\n" |
|
prompt += "3. Strengths of the codebase\n" |
|
prompt += "4. Areas for improvement\n" |
|
prompt += "5. Prioritized recommendations\n" |
|
|
|
try: |
|
|
|
response = self.client.chat.completions.create( |
|
model="Qwen/Qwen2.5-72B-Instruct", |
|
max_tokens=4000, |
|
temperature=0, |
|
messages=[ |
|
{"role": "system", "content": "You are an expert code reviewer providing a summary of a repository review. Be concise, insightful, and actionable in your feedback. Format your response in markdown with clear sections."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
) |
|
|
|
return response.choices[0].message.content |
|
|
|
except Exception as e: |
|
logger.error(f"Error generating repository summary: {e}") |
|
return f"Error generating repository summary: {e}" |