#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Code Analyzer Service This module provides functionality for analyzing code quality across different languages. """ import os import subprocess import logging import json import tempfile import concurrent.futures from collections import defaultdict logger = logging.getLogger(__name__) class CodeAnalyzer: """ Service for analyzing code quality across different languages. """ def __init__(self): """ Initialize the CodeAnalyzer. """ logger.info("Initialized CodeAnalyzer") self.analyzers = { 'Python': self._analyze_python, 'JavaScript': self._analyze_javascript, 'TypeScript': self._analyze_typescript, 'Java': self._analyze_java, 'Go': self._analyze_go, 'Rust': self._analyze_rust, } def analyze_repository(self, repo_path, languages): """ Analyze code quality in a repository for the specified languages using parallel processing. Args: repo_path (str): The path to the repository. languages (list): A list of programming languages to analyze. Returns: dict: A dictionary containing analysis results for each language. """ logger.info(f"Analyzing repository at {repo_path} for languages: {languages}") results = {} # Define a function to analyze a single language def analyze_language(language): if language in self.analyzers: try: logger.info(f"Analyzing {language} code in {repo_path}") return language, self.analyzers[language](repo_path) except Exception as e: logger.error(f"Error analyzing {language} code: {e}") return language, { 'status': 'error', 'error': str(e), 'issues': [], } else: logger.warning(f"No analyzer available for {language}") return language, { 'status': 'not_supported', 'message': f"Analysis for {language} is not supported yet.", 'issues': [], } # Use ThreadPoolExecutor to analyze languages in parallel with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor: # Submit all language analysis tasks future_to_language = {executor.submit(analyze_language, language): language for language in languages} # Process results as they complete for future in concurrent.futures.as_completed(future_to_language): language = future_to_language[future] try: lang, result = future.result() results[lang] = result logger.info(f"Completed analysis for {lang}") except Exception as e: logger.error(f"Exception occurred during analysis of {language}: {e}") results[language] = { 'status': 'error', 'error': str(e), 'issues': [], } return results def _analyze_python(self, repo_path): """ Analyze Python code using pylint. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing Python code in {repo_path}") # Find Python files python_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith('.py'): python_files.append(os.path.join(root, file)) if not python_files: return { 'status': 'no_files', 'message': 'No Python files found in the repository.', 'issues': [], } # Create a temporary file to store pylint output with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file: temp_path = temp_file.name try: # Run pylint with JSON reporter cmd = [ 'python', '-m', 'pylint', '--output-format=json', '--reports=n', ] + python_files process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) # Parse pylint output if process.stdout.strip(): try: issues = json.loads(process.stdout) except json.JSONDecodeError: logger.error(f"Error parsing pylint output: {process.stdout}") issues = [] else: issues = [] # Group issues by type issues_by_type = defaultdict(list) for issue in issues: issue_type = issue.get('type', 'unknown') issues_by_type[issue_type].append(issue) return { 'status': 'success', 'issues': issues, 'issues_by_type': dict(issues_by_type), 'issue_count': len(issues), 'files_analyzed': len(python_files), } except Exception as e: logger.error(f"Error running pylint: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], } finally: # Clean up the temporary file if os.path.exists(temp_path): os.unlink(temp_path) def _analyze_javascript(self, repo_path): """ Analyze JavaScript code using ESLint. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing JavaScript code in {repo_path}") # Find JavaScript files js_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith(('.js', '.jsx')) and not 'node_modules' in root: js_files.append(os.path.join(root, file)) if not js_files: return { 'status': 'no_files', 'message': 'No JavaScript files found in the repository.', 'issues': [], } # Create a temporary ESLint configuration file eslint_config = { "env": { "browser": True, "es2021": True, "node": True }, "extends": "eslint:recommended", "parserOptions": { "ecmaVersion": 12, "sourceType": "module", "ecmaFeatures": { "jsx": True } }, "rules": {} } with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config: json.dump(eslint_config, temp_config) temp_config_path = temp_config.name try: # Run ESLint with JSON formatter cmd = [ 'npx', 'eslint', '--config', temp_config_path, '--format', 'json', ] + js_files process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) # Parse ESLint output if process.stdout.strip(): try: eslint_results = json.loads(process.stdout) # Extract issues from ESLint results issues = [] for result in eslint_results: file_path = result.get('filePath', '') for message in result.get('messages', []): issues.append({ 'path': file_path, 'line': message.get('line', 0), 'column': message.get('column', 0), 'message': message.get('message', ''), 'severity': message.get('severity', 0), 'ruleId': message.get('ruleId', ''), }) except json.JSONDecodeError: logger.error(f"Error parsing ESLint output: {process.stdout}") issues = [] else: issues = [] # Group issues by severity issues_by_severity = defaultdict(list) for issue in issues: severity = issue.get('severity', 0) severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown') issues_by_severity[severity_name].append(issue) return { 'status': 'success', 'issues': issues, 'issues_by_severity': dict(issues_by_severity), 'issue_count': len(issues), 'files_analyzed': len(js_files), } except Exception as e: logger.error(f"Error running ESLint: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], } finally: # Clean up the temporary configuration file if os.path.exists(temp_config_path): os.unlink(temp_config_path) def _analyze_typescript(self, repo_path): """ Analyze TypeScript code using ESLint and TSC. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing TypeScript code in {repo_path}") # Find TypeScript files ts_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith(('.ts', '.tsx')) and not 'node_modules' in root: ts_files.append(os.path.join(root, file)) if not ts_files: return { 'status': 'no_files', 'message': 'No TypeScript files found in the repository.', 'issues': [], } # Create a temporary ESLint configuration file for TypeScript eslint_config = { "env": { "browser": True, "es2021": True, "node": True }, "extends": [ "eslint:recommended", "plugin:@typescript-eslint/recommended" ], "parser": "@typescript-eslint/parser", "parserOptions": { "ecmaVersion": 12, "sourceType": "module", "ecmaFeatures": { "jsx": True } }, "plugins": [ "@typescript-eslint" ], "rules": {} } with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config: json.dump(eslint_config, temp_config) temp_config_path = temp_config.name # Create a temporary tsconfig.json file tsconfig = { "compilerOptions": { "target": "es2020", "module": "commonjs", "strict": True, "esModuleInterop": True, "skipLibCheck": True, "forceConsistentCasingInFileNames": True, "noEmit": True }, "include": ts_files } with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_tsconfig: json.dump(tsconfig, temp_tsconfig) temp_tsconfig_path = temp_tsconfig.name try: # Run ESLint with TypeScript support eslint_cmd = [ 'npx', 'eslint', '--config', temp_config_path, '--format', 'json', '--ext', '.ts,.tsx', ] + ts_files eslint_process = subprocess.run( eslint_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) # Parse ESLint output eslint_issues = [] if eslint_process.stdout.strip(): try: eslint_results = json.loads(eslint_process.stdout) # Extract issues from ESLint results for result in eslint_results: file_path = result.get('filePath', '') for message in result.get('messages', []): eslint_issues.append({ 'path': file_path, 'line': message.get('line', 0), 'column': message.get('column', 0), 'message': message.get('message', ''), 'severity': message.get('severity', 0), 'ruleId': message.get('ruleId', ''), 'source': 'eslint', }) except json.JSONDecodeError: logger.error(f"Error parsing ESLint output: {eslint_process.stdout}") # Run TypeScript compiler for type checking tsc_cmd = [ 'npx', 'tsc', '--project', temp_tsconfig_path, '--noEmit', ] tsc_process = subprocess.run( tsc_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) # Parse TSC output tsc_issues = [] if tsc_process.stderr.strip(): # TSC error format: file.ts(line,col): error TS2551: message for line in tsc_process.stderr.splitlines(): if ': error ' in line or ': warning ' in line: try: file_info, error_info = line.split(':', 1) file_path, line_col = file_info.rsplit('(', 1) line_num, col_num = line_col.rstrip(')').split(',') error_type, error_message = error_info.split(':', 1) error_type = error_type.strip() error_message = error_message.strip() tsc_issues.append({ 'path': file_path, 'line': int(line_num), 'column': int(col_num), 'message': error_message, 'severity': 2 if 'error' in error_type else 1, 'ruleId': error_type, 'source': 'tsc', }) except Exception as e: logger.warning(f"Error parsing TSC output line: {line}, error: {e}") # Combine issues from both tools all_issues = eslint_issues + tsc_issues # Group issues by source and severity issues_by_source = defaultdict(list) issues_by_severity = defaultdict(list) for issue in all_issues: source = issue.get('source', 'unknown') issues_by_source[source].append(issue) severity = issue.get('severity', 0) severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown') issues_by_severity[severity_name].append(issue) return { 'status': 'success', 'issues': all_issues, 'issues_by_source': dict(issues_by_source), 'issues_by_severity': dict(issues_by_severity), 'issue_count': len(all_issues), 'files_analyzed': len(ts_files), } except Exception as e: logger.error(f"Error analyzing TypeScript code: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], } finally: # Clean up temporary files for temp_file in [temp_config_path, temp_tsconfig_path]: if os.path.exists(temp_file): os.unlink(temp_file) def _analyze_java(self, repo_path): """ Analyze Java code using PMD. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing Java code in {repo_path}") # Find Java files java_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith('.java'): java_files.append(os.path.join(root, file)) if not java_files: return { 'status': 'no_files', 'message': 'No Java files found in the repository.', 'issues': [], } # Create a temporary file to store PMD output with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file: temp_path = temp_file.name try: # Run PMD with JSON reporter cmd = [ 'pmd', 'check', '--dir', repo_path, '--format', 'json', '--rulesets', 'category/java/bestpractices.xml,category/java/codestyle.xml,category/java/design.xml,category/java/errorprone.xml,category/java/multithreading.xml,category/java/performance.xml,category/java/security.xml', ] process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, ) # Parse PMD output if process.stdout.strip(): try: pmd_results = json.loads(process.stdout) # Extract issues from PMD results issues = [] for file_result in pmd_results.get('files', []): file_path = file_result.get('filename', '') for violation in file_result.get('violations', []): issues.append({ 'path': file_path, 'line': violation.get('beginline', 0), 'endLine': violation.get('endline', 0), 'column': violation.get('begincolumn', 0), 'endColumn': violation.get('endcolumn', 0), 'message': violation.get('description', ''), 'rule': violation.get('rule', ''), 'ruleset': violation.get('ruleset', ''), 'priority': violation.get('priority', 0), }) except json.JSONDecodeError: logger.error(f"Error parsing PMD output: {process.stdout}") issues = [] else: issues = [] # Group issues by ruleset issues_by_ruleset = defaultdict(list) for issue in issues: ruleset = issue.get('ruleset', 'unknown') issues_by_ruleset[ruleset].append(issue) return { 'status': 'success', 'issues': issues, 'issues_by_ruleset': dict(issues_by_ruleset), 'issue_count': len(issues), 'files_analyzed': len(java_files), } except Exception as e: logger.error(f"Error running PMD: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], } finally: # Clean up the temporary file if os.path.exists(temp_path): os.unlink(temp_path) def _analyze_go(self, repo_path): """ Analyze Go code using golangci-lint. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing Go code in {repo_path}") # Find Go files go_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith('.go'): go_files.append(os.path.join(root, file)) if not go_files: return { 'status': 'no_files', 'message': 'No Go files found in the repository.', 'issues': [], } try: # Run golangci-lint with JSON output cmd = [ 'golangci-lint', 'run', '--out-format=json', repo_path, ] process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, cwd=repo_path, # Run in the repository directory ) # Parse golangci-lint output if process.stdout.strip(): try: lint_results = json.loads(process.stdout) # Extract issues from golangci-lint results issues = [] for issue in lint_results.get('Issues', []): issues.append({ 'path': issue.get('Pos', {}).get('Filename', ''), 'line': issue.get('Pos', {}).get('Line', 0), 'column': issue.get('Pos', {}).get('Column', 0), 'message': issue.get('Text', ''), 'linter': issue.get('FromLinter', ''), 'severity': 'error' if issue.get('Severity', '') == 'error' else 'warning', }) except json.JSONDecodeError: logger.error(f"Error parsing golangci-lint output: {process.stdout}") issues = [] else: issues = [] # Group issues by linter issues_by_linter = defaultdict(list) for issue in issues: linter = issue.get('linter', 'unknown') issues_by_linter[linter].append(issue) return { 'status': 'success', 'issues': issues, 'issues_by_linter': dict(issues_by_linter), 'issue_count': len(issues), 'files_analyzed': len(go_files), } except Exception as e: logger.error(f"Error running golangci-lint: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], } def _analyze_rust(self, repo_path): """ Analyze Rust code using clippy. Args: repo_path (str): The path to the repository. Returns: dict: Analysis results. """ logger.info(f"Analyzing Rust code in {repo_path}") # Find Rust files rust_files = [] for root, _, files in os.walk(repo_path): for file in files: if file.endswith('.rs'): rust_files.append(os.path.join(root, file)) if not rust_files: return { 'status': 'no_files', 'message': 'No Rust files found in the repository.', 'issues': [], } try: # Run clippy with JSON output cmd = [ 'cargo', 'clippy', '--message-format=json', ] process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False, cwd=repo_path, # Run in the repository directory ) # Parse clippy output issues = [] if process.stdout.strip(): for line in process.stdout.splitlines(): try: message = json.loads(line) if message.get('reason') == 'compiler-message': msg = message.get('message', {}) spans = msg.get('spans', []) if spans: primary_span = next((s for s in spans if s.get('is_primary')), spans[0]) file_path = primary_span.get('file_name', '') line_num = primary_span.get('line_start', 0) column = primary_span.get('column_start', 0) issues.append({ 'path': file_path, 'line': line_num, 'column': column, 'message': msg.get('message', ''), 'level': msg.get('level', ''), 'code': msg.get('code', {}).get('code', ''), }) except json.JSONDecodeError: continue # Group issues by level issues_by_level = defaultdict(list) for issue in issues: level = issue.get('level', 'unknown') issues_by_level[level].append(issue) return { 'status': 'success', 'issues': issues, 'issues_by_level': dict(issues_by_level), 'issue_count': len(issues), 'files_analyzed': len(rust_files), } except Exception as e: logger.error(f"Error running clippy: {e}") return { 'status': 'error', 'error': str(e), 'issues': [], }