#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Report Generator Service This module provides functionality for generating comprehensive code review reports in various formats based on the analysis results. """ import os import json import logging import datetime from pathlib import Path import markdown import csv logger = logging.getLogger(__name__) class ReportGenerator: """ Service for generating code review reports in various formats. """ def __init__(self, output_dir="reports"): """ Initialize the ReportGenerator. Args: output_dir (str): Directory to save generated reports. """ # Use absolute path for output directory if not os.path.isabs(output_dir): # Get the absolute path relative to the project root project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) self.output_dir = os.path.join(project_root, output_dir) else: self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) logger.info(f"Initialized ReportGenerator with output directory: {self.output_dir}") def generate_report(self, repo_name, results, format_type="all"): """ Generate a report based on the analysis results. Args: repo_name (str): Name of the repository. results (dict): Analysis results. format_type (str): Report format type (json, html, csv, or all). Returns: dict: Paths to the generated reports. """ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") report_name = f"{repo_name}_{timestamp}" report_paths = {} # Create report content report_content = self._create_report_content(repo_name, results) # Generate reports in requested formats if format_type in ["json", "all"]: json_path = self._generate_json_report(report_name, report_content) report_paths["json"] = json_path if format_type in ["html", "all"]: html_path = self._generate_html_report(report_name, report_content) report_paths["html"] = html_path if format_type in ["csv", "all"]: csv_path = self._generate_csv_report(report_name, report_content) report_paths["csv"] = csv_path logger.info(f"Generated {len(report_paths)} report(s) for {repo_name}") return report_paths def _create_report_content(self, repo_name, results): """ Create the content for the report. Args: repo_name (str): Name of the repository. results (dict): Analysis results. Returns: dict: Structured report content. """ # Extract repository info and metrics repo_info = results.get("repository_info", {}) total_files = repo_info.get("file_count", 0) repo_size = repo_info.get("size_bytes", 0) # Extract code analysis results code_analysis = results.get("code_analysis", {}) total_code_issues = sum(len(lang_result.get("issues", [])) for lang_result in code_analysis.values()) critical_code_issues = sum(1 for lang_result in code_analysis.values() for issue in lang_result.get("issues", []) if issue.get("severity", "").lower() == "critical") # Extract security scan results security_scan = results.get("security", {}) total_vulnerabilities = sum(len(lang_result.get("vulnerabilities", [])) for lang_result in security_scan.get("vulnerabilities_by_language", {}).values()) critical_vulnerabilities = len(security_scan.get("critical_vulnerabilities", [])) # Extract performance analysis results performance_analysis = results.get("performance", {}) total_performance_issues = sum(len(lang_result.get("issues", [])) for lang_result in performance_analysis.get("issues_by_language", {}).values()) performance_hotspots = len(performance_analysis.get("hotspots", [])) # Calculate overall score and rating max_score = 100 deductions = { "code_issues": total_code_issues * 2, "critical_code_issues": critical_code_issues * 5, "vulnerabilities": total_vulnerabilities * 3, "critical_vulnerabilities": critical_vulnerabilities * 10, "performance_issues": total_performance_issues * 2, "performance_hotspots": performance_hotspots * 3 } overall_score = max(0, max_score - sum(deductions.values())) quality_ratings = [ (95, "Excellent"), (85, "Very Good"), (75, "Good"), (65, "Fair"), (0, "Poor") ] quality_rating = next(rating for threshold, rating in quality_ratings if overall_score >= threshold) # Extract language breakdown language_breakdown = {} for language in code_analysis.keys(): if code_analysis[language].get("status") != "error": language_breakdown[language] = { "files": len([f for f in code_analysis[language].get("issues", []) if "file" in f]), "lines": code_analysis[language].get("total_lines", 0), "percentage": code_analysis[language].get("percentage", 0), "issues": len(code_analysis[language].get("issues", [])) } # Extract AI review results ai_review = results.get("ai_review", {}) # Calculate summary metrics summary_metrics = self._calculate_summary_metrics(results) # Create report structure report = { "metadata": { "repository_name": repo_name, "report_date": datetime.datetime.now().isoformat(), "repository_info": repo_info, }, "summary": { "metrics": summary_metrics, "language_breakdown": language_breakdown, "executive_summary": ai_review.get("summary", "No AI review summary available."), }, "code_quality": { "issues_by_language": code_analysis, "top_issues": self._extract_top_issues(code_analysis), }, "security": { "vulnerabilities_by_language": security_scan, "critical_vulnerabilities": self._extract_critical_vulnerabilities(security_scan), }, "performance": { "issues_by_language": performance_analysis.get("language_results", {}), "hotspots": performance_analysis.get("hotspots", []), }, "ai_review": { "file_reviews": ai_review.get("reviews", {}), "summary": ai_review.get("summary", "No AI review summary available."), }, "recommendations": self._generate_recommendations(results), } return report def _calculate_summary_metrics(self, results): """ Calculate summary metrics from the analysis results. Args: results (dict): Analysis results. Returns: dict: Summary metrics. """ metrics = { "total_files": results.get("repository_info", {}).get("file_count", 0), "repository_size": results.get("repository_info", {}).get("size", 0), } # Count code quality issues code_analysis = results.get("code_analysis", {}) total_issues = 0 critical_issues = 0 for language, language_results in code_analysis.items(): total_issues += language_results.get("issue_count", 0) for issue in language_results.get("issues", []): if issue.get("severity", "").lower() in ["critical", "high"]: critical_issues += 1 metrics["total_code_issues"] = total_issues metrics["critical_code_issues"] = critical_issues # Count security vulnerabilities security_scan = results.get("security_scan", {}) total_vulnerabilities = 0 critical_vulnerabilities = 0 for language, language_results in security_scan.items(): total_vulnerabilities += language_results.get("vulnerability_count", 0) for vuln in language_results.get("vulnerabilities", []): if vuln.get("severity", "").lower() in ["critical", "high"]: critical_vulnerabilities += 1 metrics["total_vulnerabilities"] = total_vulnerabilities metrics["critical_vulnerabilities"] = critical_vulnerabilities # Count performance issues performance_analysis = results.get("performance_analysis", {}) total_performance_issues = 0 for language, language_results in performance_analysis.get("language_results", {}).items(): total_performance_issues += language_results.get("issue_count", 0) metrics["total_performance_issues"] = total_performance_issues metrics["performance_hotspots"] = len(performance_analysis.get("hotspots", [])) # Calculate overall score (0-100) # This is a simple scoring algorithm that can be refined base_score = 100 deductions = 0 # Deduct for code issues (more weight for critical issues) if metrics["total_files"] > 0: code_issue_ratio = metrics["total_code_issues"] / metrics["total_files"] deductions += min(30, code_issue_ratio * 100) deductions += min(20, (metrics["critical_code_issues"] / metrics["total_files"]) * 200) # Deduct for security vulnerabilities (heavy weight for critical vulnerabilities) if metrics["total_files"] > 0: deductions += min(30, (metrics["total_vulnerabilities"] / metrics["total_files"]) * 150) deductions += min(40, (metrics["critical_vulnerabilities"] / metrics["total_files"]) * 300) # Deduct for performance issues if metrics["total_files"] > 0: deductions += min(20, (metrics["total_performance_issues"] / metrics["total_files"]) * 80) deductions += min(10, (metrics["performance_hotspots"] / metrics["total_files"]) * 100) metrics["overall_score"] = max(0, min(100, base_score - deductions)) # Determine quality rating based on score if metrics["overall_score"] >= 90: metrics["quality_rating"] = "Excellent" elif metrics["overall_score"] >= 80: metrics["quality_rating"] = "Good" elif metrics["overall_score"] >= 70: metrics["quality_rating"] = "Satisfactory" elif metrics["overall_score"] >= 50: metrics["quality_rating"] = "Needs Improvement" else: metrics["quality_rating"] = "Poor" return metrics def _extract_top_issues(self, code_analysis, limit=10): """ Extract the top code quality issues from the analysis results. Args: code_analysis (dict): Code analysis results. limit (int): Maximum number of issues to extract. Returns: list: Top code quality issues. """ all_issues = [] for language, language_results in code_analysis.items(): for issue in language_results.get("issues", []): # Add language to the issue issue["language"] = language all_issues.append(issue) # Sort issues by severity and then by line count if available severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} def issue_sort_key(issue): severity = issue.get("severity", "").lower() severity_value = severity_order.get(severity, 5) return (severity_value, -issue.get("line_count", 0)) sorted_issues = sorted(all_issues, key=issue_sort_key) return sorted_issues[:limit] def _extract_critical_vulnerabilities(self, security_scan, limit=10): """ Extract critical security vulnerabilities from the scan results. Args: security_scan (dict): Security scan results. limit (int): Maximum number of vulnerabilities to extract. Returns: list: Critical security vulnerabilities. """ all_vulnerabilities = [] for language, language_results in security_scan.items(): for vuln in language_results.get("vulnerabilities", []): # Add language to the vulnerability vuln["language"] = language all_vulnerabilities.append(vuln) # Sort vulnerabilities by severity severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4} def vuln_sort_key(vuln): severity = vuln.get("severity", "").lower() severity_value = severity_order.get(severity, 5) return severity_value sorted_vulnerabilities = sorted(all_vulnerabilities, key=vuln_sort_key) return sorted_vulnerabilities[:limit] def _generate_recommendations(self, results): """ Generate recommendations based on the analysis results. Args: results (dict): Analysis results. Returns: dict: Recommendations categorized by priority. """ recommendations = { "high_priority": [], "medium_priority": [], "low_priority": [], } # Extract critical security vulnerabilities as high priority recommendations security_scan = results.get("security_scan", {}) for language, language_results in security_scan.items(): for vuln in language_results.get("vulnerabilities", []): if vuln.get("severity", "").lower() in ["critical", "high"]: recommendations["high_priority"].append({ "type": "security", "language": language, "issue": vuln.get("issue", "Unknown vulnerability"), "description": vuln.get("description", ""), "file": vuln.get("file", ""), "line": vuln.get("line", ""), "recommendation": vuln.get("recommendation", "Fix this security vulnerability."), }) # Extract critical code quality issues as medium priority recommendations code_analysis = results.get("code_analysis", {}) for language, language_results in code_analysis.items(): for issue in language_results.get("issues", []): if issue.get("severity", "").lower() in ["critical", "high"]: recommendations["medium_priority"].append({ "type": "code_quality", "language": language, "issue": issue.get("issue", "Unknown issue"), "description": issue.get("description", ""), "file": issue.get("file", ""), "line": issue.get("line", ""), "recommendation": issue.get("recommendation", "Address this code quality issue."), }) # Extract performance hotspots as medium priority recommendations performance_analysis = results.get("performance_analysis", {}) for hotspot in performance_analysis.get("hotspots", []): recommendations["medium_priority"].append({ "type": "performance", "language": hotspot.get("language", ""), "issue": "Performance Hotspot", "description": f"File contains {hotspot.get('issue_count', 0)} performance issues", "file": hotspot.get("file", ""), "recommendation": "Optimize this file to improve performance.", }) # Extract other performance issues as low priority recommendations for language, language_results in performance_analysis.get("language_results", {}).items(): for issue in language_results.get("issues", []): # Skip issues that are already part of hotspots if any(hotspot.get("file", "") == issue.get("file", "") for hotspot in performance_analysis.get("hotspots", [])): continue recommendations["low_priority"].append({ "type": "performance", "language": language, "issue": issue.get("issue", "Unknown issue"), "description": issue.get("description", ""), "file": issue.get("file", ""), "line": issue.get("line", ""), "recommendation": issue.get("recommendation", "Consider optimizing this code."), }) # Extract AI review suggestions as recommendations ai_review = results.get("ai_review", {}) for file_path, review in ai_review.get("reviews", {}).items(): for suggestion in review.get("suggestions", []): priority = "medium_priority" if "security" in suggestion.get("section", "").lower(): priority = "high_priority" elif "performance" in suggestion.get("section", "").lower(): priority = "low_priority" recommendations[priority].append({ "type": "ai_review", "language": "", # AI review doesn't specify language "issue": suggestion.get("section", "AI Suggestion"), "description": suggestion.get("description", ""), "file": file_path, "line": suggestion.get("line", ""), "recommendation": suggestion.get("details", ""), }) # Limit the number of recommendations in each category limit = 15 recommendations["high_priority"] = recommendations["high_priority"][:limit] recommendations["medium_priority"] = recommendations["medium_priority"][:limit] recommendations["low_priority"] = recommendations["low_priority"][:limit] return recommendations def _generate_json_report(self, report_name, report_content): """ Generate a JSON report. Args: report_name (str): Name of the report. report_content (dict): Report content. Returns: str: Path to the generated report. """ report_path = os.path.join(self.output_dir, f"{report_name}.json") with open(report_path, "w", encoding="utf-8") as f: json.dump(report_content, f, indent=2, ensure_ascii=False) logger.info(f"Generated JSON report: {report_path}") return report_path def _generate_html_report(self, report_name, report_content): """ Generate an HTML report. Args: report_name (str): Name of the report. report_content (dict): Report content. Returns: str: Path to the generated report. """ report_path = os.path.join(self.output_dir, f"{report_name}.html") # Convert report content to markdown md_content = self._convert_to_markdown(report_content) # Convert markdown to HTML html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"]) # Add CSS styling html_content = f"""