Spaces:

Agents-MCP-Hackathon
/

CodeReviewAgent

Running

File size: 28,180 Bytes

88d205f

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Code Analyzer Service

This module provides functionality for analyzing code quality across different languages.
"""

import os
import subprocess
import logging
import json
import tempfile
import concurrent.futures
from collections import defaultdict

logger = logging.getLogger(__name__)

class CodeAnalyzer:
    """
    Service for analyzing code quality across different languages.
    """
    
    def __init__(self):
        """
        Initialize the CodeAnalyzer.
        """
        logger.info("Initialized CodeAnalyzer")
        self.analyzers = {
            'Python': self._analyze_python,
            'JavaScript': self._analyze_javascript,
            'TypeScript': self._analyze_typescript,
            'Java': self._analyze_java,
            'Go': self._analyze_go,
            'Rust': self._analyze_rust,
        }
    
    def analyze_repository(self, repo_path, languages):
        """
        Analyze code quality in a repository for the specified languages using parallel processing.
        
        Args:
            repo_path (str): The path to the repository.
            languages (list): A list of programming languages to analyze.
        
        Returns:
            dict: A dictionary containing analysis results for each language.
        """
        logger.info(f"Analyzing repository at {repo_path} for languages: {languages}")
        
        results = {}
        
        # Define a function to analyze a single language
        def analyze_language(language):
            if language in self.analyzers:
                try:
                    logger.info(f"Analyzing {language} code in {repo_path}")
                    return language, self.analyzers[language](repo_path)
                except Exception as e:
                    logger.error(f"Error analyzing {language} code: {e}")
                    return language, {
                        'status': 'error',
                        'error': str(e),
                        'issues': [],
                    }
            else:
                logger.warning(f"No analyzer available for {language}")
                return language, {
                    'status': 'not_supported',
                    'message': f"Analysis for {language} is not supported yet.",
                    'issues': [],
                }
        
        # Use ThreadPoolExecutor to analyze languages in parallel
        with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor:
            # Submit all language analysis tasks
            future_to_language = {executor.submit(analyze_language, language): language for language in languages}
            
            # Process results as they complete
            for future in concurrent.futures.as_completed(future_to_language):
                language = future_to_language[future]
                try:
                    lang, result = future.result()
                    results[lang] = result
                    logger.info(f"Completed analysis for {lang}")
                except Exception as e:
                    logger.error(f"Exception occurred during analysis of {language}: {e}")
                    results[language] = {
                        'status': 'error',
                        'error': str(e),
                        'issues': [],
                    }
        
        return results
    
    def _analyze_python(self, repo_path):
        """
        Analyze Python code using pylint.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing Python code in {repo_path}")
        
        # Find Python files
        python_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith('.py'):
                    python_files.append(os.path.join(root, file))
        
        if not python_files:
            return {
                'status': 'no_files',
                'message': 'No Python files found in the repository.',
                'issues': [],
            }
        
        # Create a temporary file to store pylint output
        with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
            temp_path = temp_file.name
        
        try:
            # Run pylint with JSON reporter
            cmd = [
                'python',
                '-m',
                'pylint',
                '--output-format=json',
                '--reports=n',
            ] + python_files
            
            process = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
            )
            
            # Parse pylint output
            if process.stdout.strip():
                try:
                    issues = json.loads(process.stdout)
                except json.JSONDecodeError:
                    logger.error(f"Error parsing pylint output: {process.stdout}")
                    issues = []
            else:
                issues = []
            
            # Group issues by type
            issues_by_type = defaultdict(list)
            for issue in issues:
                issue_type = issue.get('type', 'unknown')
                issues_by_type[issue_type].append(issue)
            
            return {
                'status': 'success',
                'issues': issues,
                'issues_by_type': dict(issues_by_type),
                'issue_count': len(issues),
                'files_analyzed': len(python_files),
            }
        
        except Exception as e:
            logger.error(f"Error running pylint: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }
        
        finally:
            # Clean up the temporary file
            if os.path.exists(temp_path):
                os.unlink(temp_path)
    
    def _analyze_javascript(self, repo_path):
        """
        Analyze JavaScript code using ESLint.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing JavaScript code in {repo_path}")
        
        # Find JavaScript files
        js_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith(('.js', '.jsx')) and not 'node_modules' in root:
                    js_files.append(os.path.join(root, file))
        
        if not js_files:
            return {
                'status': 'no_files',
                'message': 'No JavaScript files found in the repository.',
                'issues': [],
            }
        
        # Create a temporary ESLint configuration file
        eslint_config = {
            "env": {
                "browser": True,
                "es2021": True,
                "node": True
            },
            "extends": "eslint:recommended",
            "parserOptions": {
                "ecmaVersion": 12,
                "sourceType": "module",
                "ecmaFeatures": {
                    "jsx": True
                }
            },
            "rules": {}
        }
        
        with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
            json.dump(eslint_config, temp_config)
            temp_config_path = temp_config.name
        
        try:
            # Run ESLint with JSON formatter
            cmd = [
                'npx',
                'eslint',
                '--config', temp_config_path,
                '--format', 'json',
            ] + js_files
            
            process = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
            )
            
            # Parse ESLint output
            if process.stdout.strip():
                try:
                    eslint_results = json.loads(process.stdout)
                    
                    # Extract issues from ESLint results
                    issues = []
                    for result in eslint_results:
                        file_path = result.get('filePath', '')
                        for message in result.get('messages', []):
                            issues.append({
                                'path': file_path,
                                'line': message.get('line', 0),
                                'column': message.get('column', 0),
                                'message': message.get('message', ''),
                                'severity': message.get('severity', 0),
                                'ruleId': message.get('ruleId', ''),
                            })
                except json.JSONDecodeError:
                    logger.error(f"Error parsing ESLint output: {process.stdout}")
                    issues = []
            else:
                issues = []
            
            # Group issues by severity
            issues_by_severity = defaultdict(list)
            for issue in issues:
                severity = issue.get('severity', 0)
                severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
                issues_by_severity[severity_name].append(issue)
            
            return {
                'status': 'success',
                'issues': issues,
                'issues_by_severity': dict(issues_by_severity),
                'issue_count': len(issues),
                'files_analyzed': len(js_files),
            }
        
        except Exception as e:
            logger.error(f"Error running ESLint: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }
        
        finally:
            # Clean up the temporary configuration file
            if os.path.exists(temp_config_path):
                os.unlink(temp_config_path)
    
    def _analyze_typescript(self, repo_path):
        """
        Analyze TypeScript code using ESLint and TSC.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing TypeScript code in {repo_path}")
        
        # Find TypeScript files
        ts_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith(('.ts', '.tsx')) and not 'node_modules' in root:
                    ts_files.append(os.path.join(root, file))
        
        if not ts_files:
            return {
                'status': 'no_files',
                'message': 'No TypeScript files found in the repository.',
                'issues': [],
            }
        
        # Create a temporary ESLint configuration file for TypeScript
        eslint_config = {
            "env": {
                "browser": True,
                "es2021": True,
                "node": True
            },
            "extends": [
                "eslint:recommended",
                "plugin:@typescript-eslint/recommended"
            ],
            "parser": "@typescript-eslint/parser",
            "parserOptions": {
                "ecmaVersion": 12,
                "sourceType": "module",
                "ecmaFeatures": {
                    "jsx": True
                }
            },
            "plugins": [
                "@typescript-eslint"
            ],
            "rules": {}
        }
        
        with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
            json.dump(eslint_config, temp_config)
            temp_config_path = temp_config.name
        
        # Create a temporary tsconfig.json file
        tsconfig = {
            "compilerOptions": {
                "target": "es2020",
                "module": "commonjs",
                "strict": True,
                "esModuleInterop": True,
                "skipLibCheck": True,
                "forceConsistentCasingInFileNames": True,
                "noEmit": True
            },
            "include": ts_files
        }
        
        with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_tsconfig:
            json.dump(tsconfig, temp_tsconfig)
            temp_tsconfig_path = temp_tsconfig.name
        
        try:
            # Run ESLint with TypeScript support
            eslint_cmd = [
                'npx',
                'eslint',
                '--config', temp_config_path,
                '--format', 'json',
                '--ext', '.ts,.tsx',
            ] + ts_files
            
            eslint_process = subprocess.run(
                eslint_cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
            )
            
            # Parse ESLint output
            eslint_issues = []
            if eslint_process.stdout.strip():
                try:
                    eslint_results = json.loads(eslint_process.stdout)
                    
                    # Extract issues from ESLint results
                    for result in eslint_results:
                        file_path = result.get('filePath', '')
                        for message in result.get('messages', []):
                            eslint_issues.append({
                                'path': file_path,
                                'line': message.get('line', 0),
                                'column': message.get('column', 0),
                                'message': message.get('message', ''),
                                'severity': message.get('severity', 0),
                                'ruleId': message.get('ruleId', ''),
                                'source': 'eslint',
                            })
                except json.JSONDecodeError:
                    logger.error(f"Error parsing ESLint output: {eslint_process.stdout}")
            
            # Run TypeScript compiler for type checking
            tsc_cmd = [
                'npx',
                'tsc',
                '--project', temp_tsconfig_path,
                '--noEmit',
            ]
            
            tsc_process = subprocess.run(
                tsc_cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
            )
            
            # Parse TSC output
            tsc_issues = []
            if tsc_process.stderr.strip():
                # TSC error format: file.ts(line,col): error TS2551: message
                for line in tsc_process.stderr.splitlines():
                    if ': error ' in line or ': warning ' in line:
                        try:
                            file_info, error_info = line.split(':', 1)
                            file_path, line_col = file_info.rsplit('(', 1)
                            line_num, col_num = line_col.rstrip(')').split(',')
                            
                            error_type, error_message = error_info.split(':', 1)
                            error_type = error_type.strip()
                            error_message = error_message.strip()
                            
                            tsc_issues.append({
                                'path': file_path,
                                'line': int(line_num),
                                'column': int(col_num),
                                'message': error_message,
                                'severity': 2 if 'error' in error_type else 1,
                                'ruleId': error_type,
                                'source': 'tsc',
                            })
                        except Exception as e:
                            logger.warning(f"Error parsing TSC output line: {line}, error: {e}")
            
            # Combine issues from both tools
            all_issues = eslint_issues + tsc_issues
            
            # Group issues by source and severity
            issues_by_source = defaultdict(list)
            issues_by_severity = defaultdict(list)
            
            for issue in all_issues:
                source = issue.get('source', 'unknown')
                issues_by_source[source].append(issue)
                
                severity = issue.get('severity', 0)
                severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
                issues_by_severity[severity_name].append(issue)
            
            return {
                'status': 'success',
                'issues': all_issues,
                'issues_by_source': dict(issues_by_source),
                'issues_by_severity': dict(issues_by_severity),
                'issue_count': len(all_issues),
                'files_analyzed': len(ts_files),
            }
        
        except Exception as e:
            logger.error(f"Error analyzing TypeScript code: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }
        
        finally:
            # Clean up temporary files
            for temp_file in [temp_config_path, temp_tsconfig_path]:
                if os.path.exists(temp_file):
                    os.unlink(temp_file)
    
    def _analyze_java(self, repo_path):
        """
        Analyze Java code using PMD.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing Java code in {repo_path}")
        
        # Find Java files
        java_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith('.java'):
                    java_files.append(os.path.join(root, file))
        
        if not java_files:
            return {
                'status': 'no_files',
                'message': 'No Java files found in the repository.',
                'issues': [],
            }
        
        # Create a temporary file to store PMD output
        with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
            temp_path = temp_file.name
        
        try:
            # Run PMD with JSON reporter
            cmd = [
                'pmd',
                'check',
                '--dir', repo_path,
                '--format', 'json',
                '--rulesets', 'category/java/bestpractices.xml,category/java/codestyle.xml,category/java/design.xml,category/java/errorprone.xml,category/java/multithreading.xml,category/java/performance.xml,category/java/security.xml',
            ]
            
            process = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
            )
            
            # Parse PMD output
            if process.stdout.strip():
                try:
                    pmd_results = json.loads(process.stdout)
                    
                    # Extract issues from PMD results
                    issues = []
                    for file_result in pmd_results.get('files', []):
                        file_path = file_result.get('filename', '')
                        for violation in file_result.get('violations', []):
                            issues.append({
                                'path': file_path,
                                'line': violation.get('beginline', 0),
                                'endLine': violation.get('endline', 0),
                                'column': violation.get('begincolumn', 0),
                                'endColumn': violation.get('endcolumn', 0),
                                'message': violation.get('description', ''),
                                'rule': violation.get('rule', ''),
                                'ruleset': violation.get('ruleset', ''),
                                'priority': violation.get('priority', 0),
                            })
                except json.JSONDecodeError:
                    logger.error(f"Error parsing PMD output: {process.stdout}")
                    issues = []
            else:
                issues = []
            
            # Group issues by ruleset
            issues_by_ruleset = defaultdict(list)
            for issue in issues:
                ruleset = issue.get('ruleset', 'unknown')
                issues_by_ruleset[ruleset].append(issue)
            
            return {
                'status': 'success',
                'issues': issues,
                'issues_by_ruleset': dict(issues_by_ruleset),
                'issue_count': len(issues),
                'files_analyzed': len(java_files),
            }
        
        except Exception as e:
            logger.error(f"Error running PMD: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }
        
        finally:
            # Clean up the temporary file
            if os.path.exists(temp_path):
                os.unlink(temp_path)
    
    def _analyze_go(self, repo_path):
        """
        Analyze Go code using golangci-lint.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing Go code in {repo_path}")
        
        # Find Go files
        go_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith('.go'):
                    go_files.append(os.path.join(root, file))
        
        if not go_files:
            return {
                'status': 'no_files',
                'message': 'No Go files found in the repository.',
                'issues': [],
            }
        
        try:
            # Run golangci-lint with JSON output
            cmd = [
                'golangci-lint',
                'run',
                '--out-format=json',
                repo_path,
            ]
            
            process = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                cwd=repo_path,  # Run in the repository directory
            )
            
            # Parse golangci-lint output
            if process.stdout.strip():
                try:
                    lint_results = json.loads(process.stdout)
                    
                    # Extract issues from golangci-lint results
                    issues = []
                    for issue in lint_results.get('Issues', []):
                        issues.append({
                            'path': issue.get('Pos', {}).get('Filename', ''),
                            'line': issue.get('Pos', {}).get('Line', 0),
                            'column': issue.get('Pos', {}).get('Column', 0),
                            'message': issue.get('Text', ''),
                            'linter': issue.get('FromLinter', ''),
                            'severity': 'error' if issue.get('Severity', '') == 'error' else 'warning',
                        })
                except json.JSONDecodeError:
                    logger.error(f"Error parsing golangci-lint output: {process.stdout}")
                    issues = []
            else:
                issues = []
            
            # Group issues by linter
            issues_by_linter = defaultdict(list)
            for issue in issues:
                linter = issue.get('linter', 'unknown')
                issues_by_linter[linter].append(issue)
            
            return {
                'status': 'success',
                'issues': issues,
                'issues_by_linter': dict(issues_by_linter),
                'issue_count': len(issues),
                'files_analyzed': len(go_files),
            }
        
        except Exception as e:
            logger.error(f"Error running golangci-lint: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }
    
    def _analyze_rust(self, repo_path):
        """
        Analyze Rust code using clippy.
        
        Args:
            repo_path (str): The path to the repository.
        
        Returns:
            dict: Analysis results.
        """
        logger.info(f"Analyzing Rust code in {repo_path}")
        
        # Find Rust files
        rust_files = []
        for root, _, files in os.walk(repo_path):
            for file in files:
                if file.endswith('.rs'):
                    rust_files.append(os.path.join(root, file))
        
        if not rust_files:
            return {
                'status': 'no_files',
                'message': 'No Rust files found in the repository.',
                'issues': [],
            }
        
        try:
            # Run clippy with JSON output
            cmd = [
                'cargo',
                'clippy',
                '--message-format=json',
            ]
            
            process = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=False,
                cwd=repo_path,  # Run in the repository directory
            )
            
            # Parse clippy output
            issues = []
            if process.stdout.strip():
                for line in process.stdout.splitlines():
                    try:
                        message = json.loads(line)
                        if message.get('reason') == 'compiler-message':
                            msg = message.get('message', {})
                            spans = msg.get('spans', [])
                            
                            if spans:
                                primary_span = next((s for s in spans if s.get('is_primary')), spans[0])
                                file_path = primary_span.get('file_name', '')
                                line_num = primary_span.get('line_start', 0)
                                column = primary_span.get('column_start', 0)
                                
                                issues.append({
                                    'path': file_path,
                                    'line': line_num,
                                    'column': column,
                                    'message': msg.get('message', ''),
                                    'level': msg.get('level', ''),
                                    'code': msg.get('code', {}).get('code', ''),
                                })
                    except json.JSONDecodeError:
                        continue
            
            # Group issues by level
            issues_by_level = defaultdict(list)
            for issue in issues:
                level = issue.get('level', 'unknown')
                issues_by_level[level].append(issue)
            
            return {
                'status': 'success',
                'issues': issues,
                'issues_by_level': dict(issues_by_level),
                'issue_count': len(issues),
                'files_analyzed': len(rust_files),
            }
        
        except Exception as e:
            logger.error(f"Error running clippy: {e}")
            return {
                'status': 'error',
                'error': str(e),
                'issues': [],
            }