#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Report Generator Service This module provides functionality for generating comprehensive code review reports in various formats based on the analysis results. """ import os import json import logging import datetime from pathlib import Path import markdown import csv logger = logging.getLogger(__name__) class ReportGenerator: """ Service for generating code review reports in various formats. """ def __init__(self, output_dir="reports"): """ Initialize the ReportGenerator. Args: output_dir (str): Directory to save generated reports. """ # Use absolute path for output directory if not os.path.isabs(output_dir): # Get the absolute path relative to the project root project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) self.output_dir = os.path.join(project_root, output_dir) else: self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) logger.info(f"Initialized ReportGenerator with output directory: {self.output_dir}") def generate_report(self, repo_name, results, format_type="all"): """ Generate a report based on the analysis results. Args: repo_name (str): Name of the repository. results (dict): Analysis results. format_type (str): Report format type (json, html, csv, or all). Returns: dict: Paths to the generated reports. """ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") report_name = f"{repo_name}_{timestamp}" report_paths = {} # Create report content report_content = self._create_report_content(repo_name, results) # Generate reports in requested formats if format_type in ["json", "all"]: json_path = self._generate_json_report(report_name, report_content) report_paths["json"] = json_path if format_type in ["html", "all"]: html_path = self._generate_html_report(report_name, report_content) report_paths["html"] = html_path if format_type in ["csv", "all"]: csv_path = self._generate_csv_report(report_name, report_content) report_paths["csv"] = csv_path logger.info(f"Generated {len(report_paths)} report(s) for {repo_name}") return report_paths def _create_report_content(self, repo_name, results): """ Create the content for the report. Args: repo_name (str): Name of the repository. results (dict): Analysis results. Returns: dict: Structured report content. """ # Extract repository info and metrics repo_info = results.get("repository_info", {}) total_files = repo_info.get("file_count", 0) repo_size = repo_info.get("size_bytes", 0) # Extract code analysis results code_analysis = results.get("code_analysis", {}) total_code_issues = sum(len(lang_result.get("issues", [])) for lang_result in code_analysis.values()) critical_code_issues = sum(1 for lang_result in code_analysis.values() for issue in lang_result.get("issues", []) if issue.get("severity", "").lower() == "critical") # Extract security scan results security_scan = results.get("security", {}) total_vulnerabilities = sum(len(lang_result.get("vulnerabilities", [])) for lang_result in security_scan.get("vulnerabilities_by_language", {}).values()) critical_vulnerabilities = len(security_scan.get("critical_vulnerabilities", [])) # Extract performance analysis results performance_analysis = results.get("performance", {}) total_performance_issues = sum(len(lang_result.get("issues", [])) for lang_result in performance_analysis.get("issues_by_language", {}).values()) performance_hotspots = len(performance_analysis.get("hotspots", [])) # Calculate overall score and rating max_score = 100 deductions = { "code_issues": total_code_issues * 2, "critical_code_issues": critical_code_issues * 5, "vulnerabilities": total_vulnerabilities * 3, "critical_vulnerabilities": critical_vulnerabilities * 10, "performance_issues": total_performance_issues * 2, "performance_hotspots": performance_hotspots * 3 } overall_score = max(0, max_score - sum(deductions.values())) quality_ratings = [ (95, "Excellent"), (85, "Very Good"), (75, "Good"), (65, "Fair"), (0, "Poor") ] quality_rating = next(rating for threshold, rating in quality_ratings if overall_score >= threshold) # Extract language breakdown language_breakdown = {} for language in code_analysis.keys(): if code_analysis[language].get("status") != "error": language_breakdown[language] = { "files": len([f for f in code_analysis[language].get("issues", []) if "file" in f]), "lines": code_analysis[language].get("total_lines", 0), "percentage": code_analysis[language].get("percentage", 0), "issues": len(code_analysis[language].get("issues", [])) } # Extract AI review results ai_review = results.get("ai_review", {}) # Calculate summary metrics summary_metrics = self._calculate_summary_metrics(results) # Create report structure report = { "metadata": { "repository_name": repo_name, "report_date": datetime.datetime.now().isoformat(), "repository_info": repo_info, }, "summary": { "metrics": summary_metrics, "language_breakdown": language_breakdown, "executive_summary": ai_review.get("summary", "No AI review summary available."), }, "code_quality": { "issues_by_language": code_analysis, "top_issues": self._extract_top_issues(code_analysis), }, "security": { "vulnerabilities_by_language": security_scan, "critical_vulnerabilities": self._extract_critical_vulnerabilities(security_scan), }, "performance": { "issues_by_language": performance_analysis.get("language_results", {}), "hotspots": performance_analysis.get("hotspots", []), }, "ai_review": { "file_reviews": ai_review.get("reviews", {}), "summary": ai_review.get("summary", "No AI review summary available."), }, "recommendations": self._generate_recommendations(results), } return report def _calculate_summary_metrics(self, results): """ Calculate summary metrics from the analysis results. Args: results (dict): Analysis results. Returns: dict: Summary metrics. """ metrics = { "total_files": results.get("repository_info", {}).get("file_count", 0), "repository_size": results.get("repository_info", {}).get("size", 0), } # Count code quality issues code_analysis = results.get("code_analysis", {}) total_issues = 0 critical_issues = 0 for language, language_results in code_analysis.items(): total_issues += language_results.get("issue_count", 0) for issue in language_results.get("issues", []): if issue.get("severity", "").lower() in ["critical", "high"]: critical_issues += 1 metrics["total_code_issues"] = total_issues metrics["critical_code_issues"] = critical_issues # Count security vulnerabilities security_scan = results.get("security_scan", {}) total_vulnerabilities = 0 critical_vulnerabilities = 0 for language, language_results in security_scan.items(): total_vulnerabilities += language_results.get("vulnerability_count", 0) for vuln in language_results.get("vulnerabilities", []): if vuln.get("severity", "").lower() in ["critical", "high"]: critical_vulnerabilities += 1 metrics["total_vulnerabilities"] = total_vulnerabilities metrics["critical_vulnerabilities"] = critical_vulnerabilities # Count performance issues performance_analysis = results.get("performance_analysis", {}) total_performance_issues = 0 for language, language_results in performance_analysis.get("language_results", {}).items(): total_performance_issues += language_results.get("issue_count", 0) metrics["total_performance_issues"] = total_performance_issues metrics["performance_hotspots"] = len(performance_analysis.get("hotspots", [])) # Calculate overall score (0-100) # This is a simple scoring algorithm that can be refined base_score = 100 deductions = 0 # Deduct for code issues (more weight for critical issues) if metrics["total_files"] > 0: code_issue_ratio = metrics["total_code_issues"] / metrics["total_files"] deductions += min(30, code_issue_ratio * 100) deductions += min(20, (metrics["critical_code_issues"] / metrics["total_files"]) * 200) # Deduct for security vulnerabilities (heavy weight for critical vulnerabilities) if metrics["total_files"] > 0: deductions += min(30, (metrics["total_vulnerabilities"] / metrics["total_files"]) * 150) deductions += min(40, (metrics["critical_vulnerabilities"] / metrics["total_files"]) * 300) # Deduct for performance issues if metrics["total_files"] > 0: deductions += min(20, (metrics["total_performance_issues"] / metrics["total_files"]) * 80) deductions += min(10, (metrics["performance_hotspots"] / metrics["total_files"]) * 100) metrics["overall_score"] = max(0, min(100, base_score - deductions)) # Determine quality rating based on score if metrics["overall_score"] >= 90: metrics["quality_rating"] = "Excellent" elif metrics["overall_score"] >= 80: metrics["quality_rating"] = "Good" elif metrics["overall_score"] >= 70: metrics["quality_rating"] = "Satisfactory" elif metrics["overall_score"] >= 50: metrics["quality_rating"] = "Needs Improvement" else: metrics["quality_rating"] = "Poor" return metrics def _extract_top_issues(self, code_analysis, limit=10): """ Extract the top code quality issues from the analysis results. Args: code_analysis (dict): Code analysis results. limit (int): Maximum number of issues to extract. Returns: list: Top code quality issues. """ all_issues = [] for language, language_results in code_analysis.items(): for issue in language_results.get("issues", []): # Add language to the issue issue["language"] = language all_issues.append(issue) # Sort issues by severity and then by line count if available severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} def issue_sort_key(issue): severity = issue.get("severity", "").lower() severity_value = severity_order.get(severity, 5) return (severity_value, -issue.get("line_count", 0)) sorted_issues = sorted(all_issues, key=issue_sort_key) return sorted_issues[:limit] def _extract_critical_vulnerabilities(self, security_scan, limit=10): """ Extract critical security vulnerabilities from the scan results. Args: security_scan (dict): Security scan results. limit (int): Maximum number of vulnerabilities to extract. Returns: list: Critical security vulnerabilities. """ all_vulnerabilities = [] for language, language_results in security_scan.items(): for vuln in language_results.get("vulnerabilities", []): # Add language to the vulnerability vuln["language"] = language all_vulnerabilities.append(vuln) # Sort vulnerabilities by severity severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} def vuln_sort_key(vuln): severity = vuln.get("severity", "").lower() severity_value = severity_order.get(severity, 5) return severity_value sorted_vulnerabilities = sorted(all_vulnerabilities, key=vuln_sort_key) return sorted_vulnerabilities[:limit] def _generate_recommendations(self, results): """ Generate recommendations based on the analysis results. Args: results (dict): Analysis results. Returns: dict: Recommendations categorized by priority. """ recommendations = { "high_priority": [], "medium_priority": [], "low_priority": [], } # Extract critical security vulnerabilities as high priority recommendations security_scan = results.get("security_scan", {}) for language, language_results in security_scan.items(): for vuln in language_results.get("vulnerabilities", []): if vuln.get("severity", "").lower() in ["critical", "high"]: recommendations["high_priority"].append({ "type": "security", "language": language, "issue": vuln.get("issue", "Unknown vulnerability"), "description": vuln.get("description", ""), "file": vuln.get("file", ""), "line": vuln.get("line", ""), "recommendation": vuln.get("recommendation", "Fix this security vulnerability."), }) # Extract critical code quality issues as medium priority recommendations code_analysis = results.get("code_analysis", {}) for language, language_results in code_analysis.items(): for issue in language_results.get("issues", []): if issue.get("severity", "").lower() in ["critical", "high"]: recommendations["medium_priority"].append({ "type": "code_quality", "language": language, "issue": issue.get("issue", "Unknown issue"), "description": issue.get("description", ""), "file": issue.get("file", ""), "line": issue.get("line", ""), "recommendation": issue.get("recommendation", "Address this code quality issue."), }) # Extract performance hotspots as medium priority recommendations performance_analysis = results.get("performance_analysis", {}) for hotspot in performance_analysis.get("hotspots", []): recommendations["medium_priority"].append({ "type": "performance", "language": hotspot.get("language", ""), "issue": "Performance Hotspot", "description": f"File contains {hotspot.get('issue_count', 0)} performance issues", "file": hotspot.get("file", ""), "recommendation": "Optimize this file to improve performance.", }) # Extract other performance issues as low priority recommendations for language, language_results in performance_analysis.get("language_results", {}).items(): for issue in language_results.get("issues", []): # Skip issues that are already part of hotspots if any(hotspot.get("file", "") == issue.get("file", "") for hotspot in performance_analysis.get("hotspots", [])): continue recommendations["low_priority"].append({ "type": "performance", "language": language, "issue": issue.get("issue", "Unknown issue"), "description": issue.get("description", ""), "file": issue.get("file", ""), "line": issue.get("line", ""), "recommendation": issue.get("recommendation", "Consider optimizing this code."), }) # Extract AI review suggestions as recommendations ai_review = results.get("ai_review", {}) for file_path, review in ai_review.get("reviews", {}).items(): for suggestion in review.get("suggestions", []): priority = "medium_priority" if "security" in suggestion.get("section", "").lower(): priority = "high_priority" elif "performance" in suggestion.get("section", "").lower(): priority = "low_priority" recommendations[priority].append({ "type": "ai_review", "language": "", # AI review doesn't specify language "issue": suggestion.get("section", "AI Suggestion"), "description": suggestion.get("description", ""), "file": file_path, "line": suggestion.get("line", ""), "recommendation": suggestion.get("details", ""), }) # Limit the number of recommendations in each category limit = 15 recommendations["high_priority"] = recommendations["high_priority"][:limit] recommendations["medium_priority"] = recommendations["medium_priority"][:limit] recommendations["low_priority"] = recommendations["low_priority"][:limit] return recommendations def _generate_json_report(self, report_name, report_content): """ Generate a JSON report. Args: report_name (str): Name of the report. report_content (dict): Report content. Returns: str: Path to the generated report. """ report_path = os.path.join(self.output_dir, f"{report_name}.json") with open(report_path, "w", encoding="utf-8") as f: json.dump(report_content, f, indent=2, ensure_ascii=False) logger.info(f"Generated JSON report: {report_path}") return report_path def _generate_html_report(self, report_name, report_content): """ Generate an HTML report. Args: report_name (str): Name of the report. report_content (dict): Report content. Returns: str: Path to the generated report. """ report_path = os.path.join(self.output_dir, f"{report_name}.html") # Convert report content to markdown md_content = self._convert_to_markdown(report_content) # Convert markdown to HTML html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"]) # Add CSS styling html_content = f""" Code Review Report: {report_content['metadata']['repository_name']} {html_content} """ with open(report_path, "w", encoding="utf-8") as f: f.write(html_content) logger.info(f"Generated HTML report: {report_path}") return report_path def _generate_csv_report(self, report_name, report_content): """ Generate a CSV report with issues and recommendations. Args: report_name (str): Name of the report. report_content (dict): Report content. Returns: str: Path to the generated report. """ report_path = os.path.join(self.output_dir, f"{report_name}.csv") # Collect all issues and recommendations rows = [] # Add code quality issues for language, language_results in report_content["code_quality"]["issues_by_language"].items(): for issue in language_results.get("issues", []): rows.append({ "Type": "Code Quality", "Language": language, "Severity": issue.get("severity", ""), "Issue": issue.get("issue", ""), "Description": issue.get("description", ""), "File": issue.get("file", ""), "Line": issue.get("line", ""), "Recommendation": issue.get("recommendation", ""), }) # Add security vulnerabilities for language, language_results in report_content["security"]["vulnerabilities_by_language"].items(): for vuln in language_results.get("vulnerabilities", []): rows.append({ "Type": "Security", "Language": language, "Severity": vuln.get("severity", ""), "Issue": vuln.get("issue", ""), "Description": vuln.get("description", ""), "File": vuln.get("file", ""), "Line": vuln.get("line", ""), "Recommendation": vuln.get("recommendation", ""), }) # Add performance issues for language, language_results in report_content["performance"]["issues_by_language"].items(): for issue in language_results.get("issues", []): rows.append({ "Type": "Performance", "Language": language, "Severity": issue.get("severity", "Medium"), "Issue": issue.get("issue", ""), "Description": issue.get("description", ""), "File": issue.get("file", ""), "Line": issue.get("line", ""), "Recommendation": issue.get("recommendation", ""), }) # Add AI review suggestions for file_path, review in report_content["ai_review"]["file_reviews"].items(): for suggestion in review.get("suggestions", []): rows.append({ "Type": "AI Review", "Language": "", "Severity": "", "Issue": suggestion.get("section", ""), "Description": suggestion.get("description", ""), "File": file_path, "Line": suggestion.get("line", ""), "Recommendation": suggestion.get("details", ""), }) # Write to CSV with open(report_path, "w", newline="", encoding="utf-8") as f: fieldnames = ["Type", "Language", "Severity", "Issue", "Description", "File", "Line", "Recommendation"] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) logger.info(f"Generated CSV report: {report_path}") return report_path def _convert_to_markdown(self, report_content): """ Convert report content to markdown format. Args: report_content (dict): Report content. Returns: str: Markdown formatted report. """ md = [] # Title and metadata md.append(f"# Code Review Report: {report_content['metadata']['repository_name']}") md.append(f"**Report Date:** {report_content['metadata']['report_date']}") md.append("") # Repository info repo_info = report_content['metadata']['repository_info'] md.append("## Repository Information") md.append(f"**Branch:** {repo_info.get('branch', 'N/A')}") md.append(f"**Commit:** {repo_info.get('commit', 'N/A')}") md.append(f"**Remote URL:** {repo_info.get('remote_url', 'N/A')}") md.append(f"**Size:** {repo_info.get('size', 0)} bytes") md.append(f"**File Count:** {repo_info.get('file_count', 0)}") md.append("") # Summary metrics md.append("## Executive Summary") metrics = report_content['summary']['metrics'] md.append(f"**Overall Score:** {metrics.get('overall_score', 0)}/100") md.append(f"**Quality Rating:** {metrics.get('quality_rating', 'N/A')}") md.append("") md.append("### Key Metrics") md.append("| Metric | Value |") md.append("| ------ | ----- |") md.append(f"| Total Files | {metrics.get('total_files', 0)} |") md.append(f"| Code Quality Issues | {metrics.get('total_code_issues', 0)} |") md.append(f"| Critical Code Issues | {metrics.get('critical_code_issues', 0)} |") md.append(f"| Security Vulnerabilities | {metrics.get('total_vulnerabilities', 0)} |") md.append(f"| Critical Vulnerabilities | {metrics.get('critical_vulnerabilities', 0)} |") md.append(f"| Performance Issues | {metrics.get('total_performance_issues', 0)} |") md.append(f"| Performance Hotspots | {metrics.get('performance_hotspots', 0)} |") md.append("") # Language breakdown md.append("### Language Breakdown") language_breakdown = report_content['summary']['language_breakdown'] md.append("| Language | Files | Lines | Percentage |") md.append("| -------- | ----- | ----- | ---------- |") for language, stats in language_breakdown.items(): md.append(f"| {language} | {stats.get('files', 0)} | {stats.get('lines', 0)} | {stats.get('percentage', 0)}% |") md.append("") # Executive summary from AI review md.append("### Executive Summary") md.append(report_content['summary']['executive_summary']) md.append("") # Code quality issues md.append("## Code Quality Analysis") md.append("### Top Issues") top_issues = report_content['code_quality']['top_issues'] if top_issues: md.append("| Severity | Language | Issue | File | Line |") md.append("| -------- | -------- | ----- | ---- | ---- |") for issue in top_issues: md.append(f"| {issue.get('severity', 'N/A')} | {issue.get('language', 'N/A')} | {issue.get('issue', 'N/A')} | {issue.get('file', 'N/A')} | {issue.get('line', 'N/A')} |") else: md.append("No code quality issues found.") md.append("") # Security vulnerabilities md.append("## Security Analysis") md.append("### Critical Vulnerabilities") critical_vulnerabilities = report_content['security']['critical_vulnerabilities'] if critical_vulnerabilities: md.append("| Severity | Language | Vulnerability | File | Line |") md.append("| -------- | -------- | ------------- | ---- | ---- |") for vuln in critical_vulnerabilities: md.append(f"| {vuln.get('severity', 'N/A')} | {vuln.get('language', 'N/A')} | {vuln.get('issue', 'N/A')} | {vuln.get('file', 'N/A')} | {vuln.get('line', 'N/A')} |") else: md.append("No critical security vulnerabilities found.") md.append("") # Performance analysis md.append("## Performance Analysis") md.append("### Performance Hotspots") hotspots = report_content['performance']['hotspots'] if hotspots: md.append("| Language | File | Issue Count |") md.append("| -------- | ---- | ----------- |") for hotspot in hotspots: md.append(f"| {hotspot.get('language', 'N/A')} | {hotspot.get('file', 'N/A')} | {hotspot.get('issue_count', 0)} |") else: md.append("No performance hotspots found.") md.append("") # Recommendations md.append("## Recommendations") # High priority recommendations md.append("### High Priority") high_priority = report_content['recommendations']['high_priority'] if high_priority: for i, rec in enumerate(high_priority, 1): md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**") md.append(f"- **Type:** {rec.get('type', 'N/A')}") md.append(f"- **File:** {rec.get('file', 'N/A')}") if rec.get('line'): md.append(f"- **Line:** {rec.get('line')}") md.append(f"- **Description:** {rec.get('description', 'N/A')}") md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}") md.append("") else: md.append("No high priority recommendations.") md.append("") # Medium priority recommendations md.append("### Medium Priority") medium_priority = report_content['recommendations']['medium_priority'] if medium_priority: for i, rec in enumerate(medium_priority, 1): md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**") md.append(f"- **Type:** {rec.get('type', 'N/A')}") md.append(f"- **File:** {rec.get('file', 'N/A')}") if rec.get('line'): md.append(f"- **Line:** {rec.get('line')}") md.append(f"- **Description:** {rec.get('description', 'N/A')}") md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}") md.append("") else: md.append("No medium priority recommendations.") md.append("") # Low priority recommendations md.append("### Low Priority") low_priority = report_content['recommendations']['low_priority'] if low_priority: for i, rec in enumerate(low_priority, 1): md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**") md.append(f"- **Type:** {rec.get('type', 'N/A')}") md.append(f"- **File:** {rec.get('file', 'N/A')}") if rec.get('line'): md.append(f"- **Line:** {rec.get('line')}") md.append(f"- **Description:** {rec.get('description', 'N/A')}") md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}") md.append("") else: md.append("No low priority recommendations.") return "\n".join(md)