c1r3x commited on
Commit
88d205f
·
1 Parent(s): 52327b3

Review Agent: first commit

Browse files
.gitignore ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ *.manifest
29
+ *.spec
30
+
31
+ # Installer logs
32
+ pip-log.txt
33
+ pip-delete-this-directory.txt
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ .hypothesis/
45
+
46
+ # Translations
47
+ *.mo
48
+ *.pot
49
+
50
+ # Django stuff:
51
+ *.log
52
+ local_settings.py
53
+
54
+ # Flask stuff:
55
+ instance/
56
+ .webassets-cache
57
+
58
+ # Scrapy stuff:
59
+ .scrapy
60
+
61
+ # Sphinx documentation
62
+ docs/_build/
63
+
64
+ # PyBuilder
65
+ target/
66
+
67
+ # Jupyter Notebook
68
+ .ipynb_checkpoints
69
+
70
+ # pyenv
71
+ .python-version
72
+
73
+ # celery beat schedule file
74
+ celerybeat-schedule
75
+
76
+ # SageMath parsed files
77
+ *.sage.py
78
+
79
+ # Environments
80
+ .env
81
+ .venv
82
+ env/
83
+ venv/
84
+ ENV/
85
+ env.bak/
86
+ venv.bak/
87
+
88
+ # Spyder project settings
89
+ .spyderproject
90
+ .spyproject
91
+
92
+ # Rope project settings
93
+ .ropeproject
94
+
95
+ # mkdocs documentation
96
+ /site
97
+
98
+ # mypy
99
+ .mypy_cache/
100
+
101
+ # IDE specific files
102
+ .idea/
103
+ .vscode/
104
+ *.swp
105
+ *.swo
106
+
107
+ # Project specific
108
+ reports/
109
+ temp_repos/
110
+ logs/
111
+
112
+ # Environment variables
113
+ .env
114
+
115
+ # Temporary files
116
+ *.tmp
117
+
118
+ # OS specific
119
+ .DS_Store
120
+ Thumbs.db
121
+
122
+ # Costume files
123
+ DEPLOYMENT_GUIDE.md
124
+ setup.py
125
+ reports
126
+ run.py
Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1 \
7
+ PIP_NO_CACHE_DIR=off \
8
+ PIP_DISABLE_PIP_VERSION_CHECK=on
9
+
10
+ # Set the working directory
11
+ WORKDIR /app
12
+
13
+ # Install system dependencies
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ git \
16
+ wkhtmltopdf \
17
+ && apt-get clean \
18
+ && rm -rf /var/lib/apt/lists/*
19
+
20
+ # Copy requirements file
21
+ COPY requirements.txt .
22
+
23
+ # Install Python dependencies
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Copy the project code
27
+ COPY . .
28
+
29
+ # Create necessary directories
30
+ RUN mkdir -p reports logs temp_repos
31
+
32
+ # Make the run script executable
33
+ RUN chmod +x run.py
34
+
35
+ # Expose the port the app runs on
36
+ EXPOSE 7860
37
+
38
+ # Command to run the application
39
+ CMD ["python", "run.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Code Review Agent
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,172 @@
1
  ---
2
- title: CodeReviewAgent
3
  emoji: 🏆
4
  colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.33.1
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Code Review Agent
3
  emoji: 🏆
4
  colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.33.0
8
  app_file: app.py
9
+ pinned: true
10
+ short_description: An AI-powered code review agent that analyzes Github Repos
11
  ---
12
 
13
+ # Code Review Agent
14
+
15
+ [![agent-demo-track](https://img.shields.io/badge/Gradio-agent--demo--track-orange)](https://huggingface.co/Agents-MCP-Hackathon)
16
+ [![MCP-Server](https://img.shields.io/badge/MCP-Server-blue)](https://www.gradio.app/guides/building-mcp-server-with-gradio)
17
+
18
+ A professional-grade code review agent that leverages AI to provide comprehensive code analysis and actionable recommendations. Built for the Gradio MCP Hackathon, this tool helps developers improve code quality through automated reviews powered by advanced AI models.
19
+
20
+ ![Gradio MCP Hackathon](https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/4HXPmk3CW5zQmVLBxh2Nx.png)
21
+
22
+ ## 🚀 Features
23
+
24
+ - **Multi-Language Support**: Analyzes code in 15+ programming languages including Python, JavaScript, TypeScript, Java, C++, and more
25
+ - **Enterprise-Grade UI**: Professional Gradio interface with modern design and intuitive workflow
26
+ - **Advanced Analysis**: Comprehensive insights on security vulnerabilities, performance bottlenecks, and code quality issues
27
+ - **GitHub Integration**: Support for both public and private GitHub repositories with secure token authentication
28
+ - **MCP Server**: Functions as a Model Context Protocol (MCP) server, allowing LLMs to use this tool for code review operations
29
+ - **Professional Reporting**: Detailed reports with actionable recommendations organized by priority
30
+ - **Customizable Reviews**: Select specific languages to focus the review on parts of your codebase that matter most
31
+
32
+ ## 🏗️ Architecture
33
+
34
+ The system follows a robust 4-layer architecture designed for extensibility and maintainability:
35
+
36
+ 1. **Presentation Layer**: Gradio-based UI with professional dashboard and intuitive controls
37
+ 2. **Application Layer**: Core business logic and review engine with modular components
38
+ 3. **Service Layer**: External integrations and analysis services with clean interfaces
39
+ 4. **MCP Layer**: Implements the Model Context Protocol (MCP) server functionality, allowing LLMs to use this tool for code review operations through standardized communication
40
+
41
+ ## 📋 Requirements
42
+
43
+ - Python 3.8+
44
+ - Git
45
+ - Internet connection for AI model access
46
+ - GitHub token (optional, for private repositories)
47
+
48
+ ## 🔧 Installation
49
+
50
+ ```bash
51
+ # Clone the repository
52
+ git clone https://huggingface.co/spaces/c1r3x/code-review-agent
53
+ cd code-review-agent
54
+
55
+ # Install dependencies
56
+ pip install -r requirements.txt
57
+
58
+ # Set up environment variables (optional)
59
+ cp sample.env .env
60
+ # Edit .env with your configuration
61
+ ```
62
+
63
+ ## 🚀 Usage
64
+
65
+ ```bash
66
+ python src/main.py
67
+ ```
68
+
69
+ Then open your browser to http://localhost:7860 to access the Code Review Agent.
70
+
71
+ ### Analyzing a Repository
72
+
73
+ 1. Enter the GitHub repository URL in the input field
74
+ 2. Optionally enter your GitHub token for private repositories
75
+ 3. Select the programming languages to focus the review on
76
+ 4. Click "Submit" to start the analysis
77
+ 5. View the comprehensive report with actionable recommendations
78
+
79
+ ## 🐳 Docker Deployment
80
+
81
+ ```bash
82
+ # Build and run with Docker Compose
83
+ docker-compose up -d
84
+ ```
85
+
86
+ Access the application at http://localhost:7860
87
+
88
+ ## 🔌 MCP Server Functionality
89
+
90
+ This application functions as a Model Context Protocol (MCP) server, allowing Large Language Models (LLMs) to use it as a tool for code review operations:
91
+
92
+ - **MCP Integration**: Exposes code review capabilities through standardized MCP endpoints
93
+ - **LLM Tool Usage**: Can be called by LLMs like Claude, GPT-4, and others that support the MCP protocol
94
+ - **Tool Operations**: Provides repository analysis, code review, and report generation as MCP operations
95
+ - **Structured Responses**: Returns analysis results in structured formats compatible with LLM processing
96
+
97
+ To use this as an MCP server with your LLM:
98
+
99
+ 1. Deploy the application using the instructions above
100
+ 2. Configure your MCP client to connect to the server endpoint
101
+ 3. The LLM can then use the code review capabilities through MCP tool calls
102
+
103
+ ## 🧪 Testing
104
+
105
+ ```bash
106
+ # Run the test suite
107
+ python -m pytest tests/
108
+ ```
109
+
110
+ ## 📁 Project Structure
111
+
112
+ ```
113
+ ├── .env # Environment variables configuration
114
+ ├── .gitignore # Git ignore file
115
+ ├── DEPLOYMENT_GUIDE.md # Guide for deploying to Hugging Face and Modal
116
+ ├── Dockerfile # Docker configuration
117
+ ├── LICENSE # MIT License file
118
+ ├── README.md # Project documentation
119
+ ├── app.py # Entry point for Hugging Face Spaces
120
+ ├── docker-compose.yml # Docker Compose configuration
121
+ ├── modal_deploy.py # Modal deployment configuration
122
+ ├── prompt.txt # AI prompt templates
123
+ ├── requirements.txt # Python dependencies
124
+ ├── run.py # Alternative entry script
125
+ ├── sample.env # Sample environment variables
126
+ ├── setup.py # Package setup configuration
127
+ ├── src/ # Source code directory
128
+ │ ├── __init__.py # Package initialization
129
+ │ ├── core/ # Core application logic
130
+ │ │ ├── __init__.py
131
+ │ │ ├── agent_manager.py # Agent orchestration
132
+ │ │ └── language_detector.py # Language detection
133
+ │ ├── main.py # Main application entry point
134
+ │ ├── mcp/ # MCP server implementation
135
+ │ │ ├── __init__.py
136
+ │ │ └── ai_review.py # AI review integration
137
+ │ ├── services/ # Service layer components
138
+ │ │ ├── __init__.py
139
+ │ │ ├── code_analyzer.py # Code analysis
140
+ │ │ ├── performance_analyzer.py # Performance analysis
141
+ │ │ ├── report_generator.py # Report generation
142
+ │ │ ├── repository_service.py # Repository operations
143
+ │ │ └── security_scanner.py # Security scanning
144
+ │ └── ui/ # User interface components
145
+ │ ├── __init__.py
146
+ │ ├── components/ # UI components
147
+ │ ├── gradio_app.py # Gradio application
148
+ │ └── styles/ # UI styling
149
+ └── tests/ # Test directory
150
+ ├── test_agent_manager.py
151
+ ├── test_ai_review.py
152
+ ├── test_code_analyzer.py
153
+ ├── test_language_detector.py
154
+ ├── test_performance_analyzer.py
155
+ ├── test_report_generator.py
156
+ ├── test_repository_service.py
157
+ └── test_security_scanner.py
158
+ ```
159
+
160
+ ## 🤝 Contributing
161
+
162
+ Contributions are welcome! Please feel free to submit a Pull Request.
163
+
164
+ 1. Fork the repository
165
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
166
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
167
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
168
+ 5. Open a Pull Request
169
+
170
+ ## 📄 License
171
+
172
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Code Review Agent - Hugging Face Spaces Entry Point
6
+
7
+ This module serves as the entry point for the Code Review Agent application
8
+ when deployed to Hugging Face Spaces.
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import logging
14
+ from dotenv import load_dotenv
15
+
16
+ # Add the project root to the Python path
17
+ sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
18
+
19
+ # Import application modules
20
+ from src.ui.gradio_app import create_gradio_app
21
+ from src.core.agent_manager import AgentManager
22
+
23
+ # Configure logging
24
+ logging.basicConfig(
25
+ level=logging.INFO,
26
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
27
+ handlers=[
28
+ logging.StreamHandler()
29
+ ]
30
+ )
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Load environment variables
35
+ load_dotenv()
36
+
37
+ # Create logs directory if it doesn't exist
38
+ logs_dir = os.path.join(os.path.dirname(__file__), 'logs')
39
+ os.makedirs(logs_dir, exist_ok=True)
40
+
41
+ # Initialize the agent manager
42
+ agent_manager = AgentManager()
43
+
44
+ # Create the Gradio app
45
+ app = create_gradio_app(agent_manager)
46
+
47
+ # Launch the app with specific server configuration for Hugging Face Spaces
48
+ app.launch(server_name="0.0.0.0", server_port=7860, share=False)
docker-compose.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ code-review-agent:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ container_name: code-review-agent
9
+ ports:
10
+ - "7860:7860"
11
+ volumes:
12
+ - ./reports:/app/reports
13
+ - ./logs:/app/logs
14
+ - ./.env:/app/.env
15
+ environment:
16
+ - LOG_LEVEL=INFO
17
+ restart: unless-stopped
modal_deploy.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modal_deploy.py
2
+ from modal import Image, Stub, asgi_app
3
+ import sys
4
+
5
+ # Create a Modal image with the required dependencies
6
+ image = Image.debian_slim().pip_install_from_requirements("requirements.txt")
7
+
8
+ # Create a Modal Stub
9
+ stub = Stub("code-review-agent")
10
+
11
+ @stub.function(image=image, timeout=600)
12
+ @asgi_app()
13
+ def app():
14
+ """
15
+ Deploy the Code Review Agent as an ASGI app on Modal.
16
+
17
+ This function sets up the Gradio application and returns it as an ASGI app
18
+ that Modal can serve. The app will be accessible via a URL provided by Modal
19
+ after deployment.
20
+
21
+ Returns:
22
+ ASGI application: The Gradio app as an ASGI application
23
+ """
24
+ import os
25
+ import sys
26
+ import logging
27
+ from dotenv import load_dotenv
28
+
29
+ # Add the project root to the Python path
30
+ sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
31
+
32
+ # Import application modules
33
+ from src.ui.gradio_app import create_gradio_app
34
+ from src.core.agent_manager import AgentManager
35
+
36
+ # Configure logging
37
+ logging.basicConfig(
38
+ level=logging.INFO,
39
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
40
+ )
41
+
42
+ # Load environment variables
43
+ load_dotenv()
44
+
45
+ # Create logs directory if it doesn't exist
46
+ logs_dir = os.path.join(os.path.dirname(__file__), 'logs')
47
+ os.makedirs(logs_dir, exist_ok=True)
48
+
49
+ # Initialize the agent manager
50
+ agent_manager = AgentManager()
51
+
52
+ # Create the Gradio app
53
+ gradio_app = create_gradio_app(agent_manager)
54
+
55
+ # Return the Gradio app as an ASGI app
56
+ return gradio_app.app
57
+
58
+
59
+ if __name__ == "__main__":
60
+ # For local testing
61
+ stub.serve()
62
+
63
+ # For deployment
64
+ # Run: modal deploy modal_deploy.py
requirements.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Review Agent Dependencies
2
+
3
+ # Core Dependencies
4
+ gradio
5
+ requests
6
+ gitpython
7
+ python-dotenv
8
+
9
+ # UI Dependencies
10
+ markdown
11
+ pydantic
12
+
13
+ # Code Analysis
14
+ astroid # Python AST parsing
15
+ pylint # Python linting
16
+ esprima # JavaScript parsing
17
+ jsbeautifier # JavaScript formatting
18
+ # Note: javascript-obfuscator was removed as it's not available on PyPI
19
+ pyright # TypeScript type checking
20
+
21
+ # Security Analysis
22
+ bandit # Python security scanning
23
+ safety # Dependency vulnerability checking
24
+
25
+ # Performance Analysis
26
+ psutil # System utilization
27
+ memory-profiler # Memory profiling
28
+
29
+ # MCP Integration
30
+ openai # OpenAI API client for Nebius integration
31
+
32
+ # Deployment
33
+ modal
34
+
35
+
36
+ pdfkit
37
+ wkhtmltopdf
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Code Review Agent Package
src/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Core Package for Code Review Agent
src/core/agent_manager.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Agent Manager
6
+
7
+ This module provides the main orchestrator for the Code Review Agent.
8
+ It coordinates the review process and manages the state of the application.
9
+ """
10
+
11
+ import os
12
+ import time
13
+ import logging
14
+ import tempfile
15
+ import json
16
+ import threading
17
+ import concurrent.futures
18
+ from datetime import datetime
19
+ import gradio as gr
20
+
21
+ from src.core.language_detector import LanguageDetector
22
+ from src.services.code_analyzer import CodeAnalyzer
23
+ from src.services.report_generator import ReportGenerator
24
+ from src.services.repository_service import RepositoryService
25
+ from src.services.security_scanner import SecurityScanner
26
+ from src.services.performance_analyzer import PerformanceAnalyzer
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class AgentManager:
32
+ """
33
+ Main orchestrator for the Code Review Agent.
34
+
35
+ This class coordinates the review process, manages the application state,
36
+ and provides the interface between the UI and the business logic.
37
+ """
38
+
39
+ def __init__(self):
40
+ """
41
+ Initialize the AgentManager.
42
+ """
43
+ # Initialize state management
44
+ self.state = {
45
+ 'repo_url': None,
46
+ 'progress': {},
47
+ 'results': {},
48
+ 'current_step': None
49
+ }
50
+
51
+ # Initialize services
52
+ self.language_detector = LanguageDetector()
53
+ self.code_analyzer = CodeAnalyzer()
54
+ self.report_generator = ReportGenerator()
55
+ self.repository_service = RepositoryService()
56
+ self.security_scanner = SecurityScanner()
57
+ self.performance_analyzer = PerformanceAnalyzer()
58
+ self.temp_dir = tempfile.mkdtemp(prefix="code_review_agent_")
59
+
60
+ logger.info(f"Initialized AgentManager with temp directory: {self.temp_dir}")
61
+
62
+ def start_review(self, repo_url, github_token=None, selected_languages=None, progress_components=None):
63
+ """
64
+ Start the code review process for a GitHub repository.
65
+
66
+ Args:
67
+ repo_url (str): The URL of the GitHub repository to review.
68
+ github_token (str, optional): GitHub authentication token for private repositories.
69
+ selected_languages (list, optional): List of languages to analyze. If None,
70
+ languages will be auto-detected.
71
+ progress_components (tuple, optional): Tuple containing (progress_group, overall_progress, status_message, step_progress)
72
+ from create_progress_tracker().
73
+
74
+ Returns:
75
+ tuple: (progress_group, overall_progress, status_message, results_dashboard) - Updated UI components.
76
+ """
77
+ # Initialize or use provided progress components
78
+ if progress_components:
79
+ progress_group, overall_progress, status_message, step_progress = progress_components
80
+ else:
81
+ progress_group = gr.Group(visible=True)
82
+ overall_progress = gr.Slider(value=0)
83
+ status_message = gr.Markdown("*Starting review...*")
84
+ step_progress = {}
85
+
86
+ try:
87
+ # Initialize state
88
+ self.state = {
89
+ 'repo_url': repo_url,
90
+ 'progress': {},
91
+ 'results': {},
92
+ 'current_step': None
93
+ }
94
+ # Store step progress components
95
+ self.step_progress = step_progress
96
+
97
+ # Clone repository
98
+ self._update_progress("Repository Cloning", 0, overall_progress, status_message)
99
+ repo_path = self._clone_repository(repo_url, github_token)
100
+ self._update_progress("Repository Cloning", 100, overall_progress, status_message)
101
+
102
+ # Detect languages
103
+ self._update_progress("Language Detection", 0, overall_progress, status_message)
104
+ if selected_languages and len(selected_languages) > 0:
105
+ languages = selected_languages
106
+ logger.info(f"Using selected languages: {languages}")
107
+ else:
108
+ languages = self.language_detector.detect_languages(repo_path)
109
+ logger.info(f"Auto-detected languages: {languages}")
110
+
111
+ self.state['languages'] = languages
112
+ self._update_progress("Language Detection", 100, overall_progress, status_message)
113
+
114
+ # Initialize progress for all steps
115
+ self._update_progress("Code Analysis", 0, overall_progress, status_message)
116
+ self._update_progress("Security Scanning", 0, overall_progress, status_message)
117
+ self._update_progress("Performance Analysis", 0, overall_progress, status_message)
118
+ self._update_progress("AI Review", 0, overall_progress, status_message)
119
+
120
+ # Create a thread lock for updating shared state
121
+ lock = threading.Lock()
122
+ results = {}
123
+
124
+ # Define worker functions for each analysis type
125
+ def run_code_analysis():
126
+ try:
127
+ code_results = self.code_analyzer.analyze_repository(repo_path, languages)
128
+ with lock:
129
+ results['code_analysis'] = code_results
130
+ self._update_progress("Code Analysis", 100, overall_progress, status_message)
131
+ except Exception as e:
132
+ logger.error(f"Error in code analysis thread: {e}")
133
+ with lock:
134
+ results['code_analysis'] = {'status': 'error', 'error': str(e)}
135
+ self._update_progress("Code Analysis", 100, overall_progress, status_message)
136
+
137
+ def run_security_scan():
138
+ try:
139
+ security_results = self.security_scanner.scan_repository(repo_path, languages)
140
+ with lock:
141
+ results['security'] = security_results
142
+ self._update_progress("Security Scanning", 100, overall_progress, status_message)
143
+ except Exception as e:
144
+ logger.error(f"Error in security scanning thread: {e}")
145
+ with lock:
146
+ results['security'] = {'status': 'error', 'error': str(e)}
147
+ self._update_progress("Security Scanning", 100, overall_progress, status_message)
148
+
149
+ def run_performance_analysis():
150
+ try:
151
+ perf_results = self.performance_analyzer.analyze_repository(repo_path, languages)
152
+ with lock:
153
+ results['performance'] = perf_results
154
+ self._update_progress("Performance Analysis", 100, overall_progress, status_message)
155
+ except Exception as e:
156
+ logger.error(f"Error in performance analysis thread: {e}")
157
+ with lock:
158
+ results['performance'] = {'status': 'error', 'error': str(e)}
159
+ self._update_progress("Performance Analysis", 100, overall_progress, status_message)
160
+
161
+ def run_ai_review():
162
+ try:
163
+ ai_results = self._perform_ai_review(repo_path, languages)
164
+ with lock:
165
+ results['ai_review'] = ai_results
166
+ self._update_progress("AI Review", 100, overall_progress, status_message)
167
+ except Exception as e:
168
+ logger.error(f"Error in AI review thread: {e}")
169
+ with lock:
170
+ results['ai_review'] = {'status': 'error', 'error': str(e)}
171
+ self._update_progress("AI Review", 100, overall_progress, status_message)
172
+
173
+ # Run all analysis tasks in parallel using ThreadPoolExecutor
174
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
175
+ executor.submit(run_code_analysis)
176
+ executor.submit(run_security_scan)
177
+ executor.submit(run_performance_analysis)
178
+ executor.submit(run_ai_review)
179
+
180
+ # Wait for all tasks to complete
181
+ executor.shutdown(wait=True)
182
+
183
+ # Update the state with all results
184
+ with lock:
185
+ self.state['results'].update(results)
186
+
187
+ # Get repository info
188
+ repo_info = self.repository_service.get_repository_info(repo_path)
189
+ self.state['results']['repository_info'] = repo_info
190
+
191
+ # Generate report
192
+ self._update_progress("Report Generation", 0, overall_progress, status_message)
193
+ repo_name = repo_url.split('/')[-1].replace('.git', '')
194
+ report_paths = self.report_generator.generate_report(
195
+ repo_name, self.state['results']
196
+ )
197
+ self.state['report_paths'] = report_paths
198
+ self._update_progress("Report Generation", 100, overall_progress, status_message)
199
+
200
+ # Update results dashboard
201
+ results_dashboard = self._create_results_dashboard(self.state['results'])
202
+ results_dashboard.visible = True
203
+
204
+ return progress_group, overall_progress, status_message, results_dashboard
205
+
206
+ except Exception as e:
207
+ logger.exception(f"Error during code review: {e}")
208
+ # Update progress components with error
209
+ status_message.value = f"*Error: {str(e)}*"
210
+ return progress_group, overall_progress, status_message, None
211
+
212
+ def export_report(self, results_dashboard, export_format):
213
+ """
214
+ Export the code review report in the specified format.
215
+
216
+ Args:
217
+ results_dashboard: The results dashboard component.
218
+ export_format (str): The format to export the report in ('pdf', 'json', 'html', 'csv').
219
+
220
+ Returns:
221
+ str: The path to the exported file.
222
+ """
223
+ try:
224
+ if not self.state.get('results'):
225
+ logger.warning("No results available to export")
226
+ return None
227
+
228
+ # Get the actual format value from the textbox component
229
+ format_value = export_format.value if hasattr(export_format, 'value') else export_format
230
+
231
+ # Create exports directory if it doesn't exist
232
+ exports_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'exports')
233
+ os.makedirs(exports_dir, exist_ok=True)
234
+
235
+ # Generate filename with timestamp
236
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
237
+ repo_name = self.state['repo_url'].split('/')[-1].replace('.git', '')
238
+ filename = f"{repo_name}_review_{timestamp}.{format_value}"
239
+ filepath = os.path.join(exports_dir, filename)
240
+
241
+ # Export report in the specified format using report_generator
242
+ report_paths = self.report_generator.generate_report(
243
+ repo_name, self.state['results'], format_value
244
+ )
245
+
246
+ if format_value in report_paths:
247
+ return report_paths[format_value]
248
+ else:
249
+ logger.warning(f"Unsupported export format: {format_value}")
250
+ return None
251
+
252
+ logger.info(f"Exported report to {filepath}")
253
+ return filepath
254
+
255
+ except Exception as e:
256
+ logger.exception(f"Error exporting report: {e}")
257
+ return None
258
+
259
+ def _clone_repository(self, repo_url, github_token=None):
260
+ """
261
+ Clone the GitHub repository to a temporary directory.
262
+
263
+ Args:
264
+ repo_url (str): The URL of the GitHub repository to clone.
265
+ github_token (str, optional): GitHub authentication token for private repositories.
266
+
267
+ Returns:
268
+ str: The path to the cloned repository.
269
+ """
270
+ # Import the repository service here to avoid circular imports
271
+ from src.services.repository_service import RepositoryService
272
+
273
+ # Create a repository service instance
274
+ repo_service = RepositoryService(base_temp_dir=self.temp_dir)
275
+
276
+ # Clone the repository using the service
277
+ try:
278
+ # If a GitHub token is provided, use it for authentication
279
+ if github_token and github_token.strip():
280
+ # Modify the URL to include the token for authentication
281
+ auth_url = repo_url.replace('https://', f'https://{github_token}@')
282
+ repo_path = repo_service.clone_repository(auth_url)
283
+ logger.info(f"Cloned repository using GitHub token authentication")
284
+ else:
285
+ # Clone without authentication (for public repositories)
286
+ repo_path = repo_service.clone_repository(repo_url)
287
+ logger.info(f"Cloned repository without authentication")
288
+
289
+ return repo_path
290
+ except Exception as e:
291
+ logger.error(f"Error cloning repository: {e}")
292
+ raise
293
+
294
+ def _perform_ai_review(self, repo_path, languages):
295
+ """
296
+ Perform AI-powered code review with parallel processing.
297
+
298
+ Args:
299
+ repo_path (str): The path to the repository.
300
+ languages (list): List of programming languages to analyze.
301
+
302
+ Returns:
303
+ dict: AI review results.
304
+ """
305
+ try:
306
+ # Import and use the AI review service
307
+ from src.mcp.ai_review import AIReviewService
308
+ import os
309
+
310
+ ai_reviewer = AIReviewService()
311
+
312
+ # Check if AI review is available
313
+ if not ai_reviewer.is_available():
314
+ logger.warning("AI review service is not available. Please set NEBIUS_API_KEY in environment variables.")
315
+ return {
316
+ 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.',
317
+ 'suggestions': [],
318
+ 'issues': []
319
+ }
320
+
321
+ # Get all files in the repository
322
+ all_files = []
323
+ language_extensions = {
324
+ 'Python': ['.py'],
325
+ 'JavaScript': ['.js'],
326
+ 'TypeScript': ['.ts', '.tsx'],
327
+ 'Java': ['.java'],
328
+ 'Go': ['.go'],
329
+ 'Rust': ['.rs']
330
+ }
331
+
332
+ # Create a list of extensions to look for based on selected languages
333
+ extensions_to_check = []
334
+ for lang in languages:
335
+ if lang in language_extensions:
336
+ extensions_to_check.extend(language_extensions[lang])
337
+
338
+ # Find all files with the specified extensions
339
+ for root, _, files in os.walk(repo_path):
340
+ for file in files:
341
+ file_path = os.path.join(root, file)
342
+ _, ext = os.path.splitext(file_path)
343
+ if ext in extensions_to_check:
344
+ all_files.append(file_path)
345
+
346
+ # Limit the number of files to review to avoid excessive processing
347
+ max_files = 20
348
+ if len(all_files) > max_files:
349
+ logger.warning(f"Too many files to review ({len(all_files)}). Limiting to {max_files} files.")
350
+ all_files = all_files[:max_files]
351
+
352
+ # Process files in parallel
353
+ # Pass None for the optional analysis_results parameter
354
+ results = ai_reviewer.review_repository(repo_path, all_files, languages, None)
355
+
356
+ logger.info(f"AI review completed for {len(all_files)} files across {len(languages)} languages")
357
+ return results
358
+ except Exception as e:
359
+ logger.error(f"Error during AI review: {e}")
360
+ return {
361
+ 'error': str(e),
362
+ 'suggestions': [],
363
+ 'issues': []
364
+ }
365
+
366
+ def _update_progress(self, step, value, overall_progress=None, status_message=None):
367
+ """Update progress for a specific step and overall progress."""
368
+ # Update state
369
+ self.state['current_step'] = step
370
+ self.state['progress'][step] = value
371
+
372
+ # Calculate overall progress
373
+ total_steps = len(self.state['progress'])
374
+ if total_steps > 0:
375
+ overall = sum(self.state['progress'].values()) / total_steps
376
+ else:
377
+ overall = 0
378
+
379
+ # Update UI components if provided
380
+ if overall_progress is not None:
381
+ overall_progress.value = overall
382
+ if status_message is not None:
383
+ status_message.value = f"*Progress update: {step} - {value}% (Overall: {overall:.1f}%)*"
384
+
385
+ # Update step progress if available
386
+ if hasattr(self, 'step_progress') and step in self.step_progress:
387
+ self.step_progress[step].value = value
388
+
389
+ # Log progress
390
+ logger.info(f"Progress update: {step} - {value}% (Overall: {overall:.1f}%)")
391
+
392
+ def _create_results_dashboard(self, report):
393
+ """
394
+ Create a results dashboard component for the UI.
395
+
396
+ Args:
397
+ report (dict): The code review report.
398
+
399
+ Returns:
400
+ gr.Tabs: A Gradio results dashboard component.
401
+
402
+ """
403
+ # Import the create_results_dashboard function from the UI components
404
+ from src.ui.components.results_dashboard import create_results_dashboard
405
+
406
+ # Create a new results dashboard component
407
+ results_dashboard = create_results_dashboard()
408
+
409
+ # Set the visibility to True
410
+ results_dashboard.visible = True
411
+
412
+ # In a full implementation, we would populate the dashboard with data from the report
413
+ # For now, we're just returning the empty dashboard component
414
+
415
+ return results_dashboard
src/core/language_detector.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Language Detector
6
+
7
+ This module provides functionality for detecting programming languages in a repository.
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ from collections import Counter
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # File extension to language mapping
17
+ EXTENSION_TO_LANGUAGE = {
18
+ '.py': 'Python',
19
+ '.js': 'JavaScript',
20
+ '.jsx': 'JavaScript',
21
+ '.ts': 'TypeScript',
22
+ '.tsx': 'TypeScript',
23
+ '.java': 'Java',
24
+ '.go': 'Go',
25
+ '.rs': 'Rust',
26
+ '.cpp': 'C++',
27
+ '.cc': 'C++',
28
+ '.cxx': 'C++',
29
+ '.c': 'C',
30
+ '.h': 'C',
31
+ '.hpp': 'C++',
32
+ '.cs': 'C#',
33
+ '.php': 'PHP',
34
+ '.rb': 'Ruby',
35
+ '.swift': 'Swift',
36
+ '.kt': 'Kotlin',
37
+ '.scala': 'Scala',
38
+ '.r': 'R',
39
+ '.sh': 'Shell',
40
+ '.bash': 'Shell',
41
+ '.zsh': 'Shell',
42
+ '.html': 'HTML',
43
+ '.htm': 'HTML',
44
+ '.css': 'CSS',
45
+ '.scss': 'SCSS',
46
+ '.sass': 'SCSS',
47
+ '.less': 'Less',
48
+ '.md': 'Markdown',
49
+ '.json': 'JSON',
50
+ '.xml': 'XML',
51
+ '.yaml': 'YAML',
52
+ '.yml': 'YAML',
53
+ '.sql': 'SQL',
54
+ '.graphql': 'GraphQL',
55
+ '.gql': 'GraphQL',
56
+ }
57
+
58
+ # Special files to language mapping
59
+ SPECIAL_FILES_TO_LANGUAGE = {
60
+ 'Dockerfile': 'Docker',
61
+ 'docker-compose.yml': 'Docker',
62
+ 'docker-compose.yaml': 'Docker',
63
+ 'Makefile': 'Make',
64
+ 'CMakeLists.txt': 'CMake',
65
+ 'package.json': 'JavaScript',
66
+ 'tsconfig.json': 'TypeScript',
67
+ 'requirements.txt': 'Python',
68
+ 'setup.py': 'Python',
69
+ 'pom.xml': 'Java',
70
+ 'build.gradle': 'Java',
71
+ 'Cargo.toml': 'Rust',
72
+ 'go.mod': 'Go',
73
+ }
74
+
75
+
76
+ class LanguageDetector:
77
+ """
78
+ Detects programming languages in a repository.
79
+ """
80
+
81
+ def __init__(self):
82
+ """
83
+ Initialize the LanguageDetector.
84
+ """
85
+ logger.info("Initialized LanguageDetector")
86
+
87
+ def detect_languages(self, repo_path):
88
+ """
89
+ Detect programming languages in a repository.
90
+
91
+ Args:
92
+ repo_path (str): The path to the repository.
93
+
94
+ Returns:
95
+ list: A list of detected programming languages, sorted by prevalence.
96
+ """
97
+ logger.info(f"Detecting languages in repository: {repo_path}")
98
+
99
+ language_counter = Counter()
100
+
101
+ for root, dirs, files in os.walk(repo_path):
102
+ # Skip hidden directories and common non-code directories
103
+ dirs[:] = [d for d in dirs if not d.startswith('.') and
104
+ d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']]
105
+
106
+ for file in files:
107
+ file_path = os.path.join(root, file)
108
+
109
+ # Check if it's a special file
110
+ if file in SPECIAL_FILES_TO_LANGUAGE:
111
+ language = SPECIAL_FILES_TO_LANGUAGE[file]
112
+ language_counter[language] += 1
113
+ continue
114
+
115
+ # Check file extension
116
+ _, ext = os.path.splitext(file)
117
+ if ext in EXTENSION_TO_LANGUAGE:
118
+ language = EXTENSION_TO_LANGUAGE[ext]
119
+ language_counter[language] += 1
120
+
121
+ # Get the top languages (limit to supported languages)
122
+ supported_languages = [
123
+ "Python", "JavaScript", "TypeScript", "Java",
124
+ "Go", "Rust", "C++", "C#", "PHP", "Ruby",
125
+ "Swift", "Kotlin", "Scala", "R", "Shell"
126
+ ]
127
+
128
+ detected_languages = [lang for lang, _ in language_counter.most_common()
129
+ if lang in supported_languages]
130
+
131
+ logger.info(f"Detected languages: {detected_languages}")
132
+ return detected_languages
133
+
134
+ def get_language_breakdown(self, repo_path):
135
+ """
136
+ Get a breakdown of programming languages in a repository by lines of code.
137
+
138
+ Args:
139
+ repo_path (str): The path to the repository.
140
+
141
+ Returns:
142
+ dict: A dictionary mapping languages to lines of code.
143
+ """
144
+ logger.info(f"Getting language breakdown for repository: {repo_path}")
145
+
146
+ language_loc = {}
147
+
148
+ for root, dirs, files in os.walk(repo_path):
149
+ # Skip hidden directories and common non-code directories
150
+ dirs[:] = [d for d in dirs if not d.startswith('.') and
151
+ d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']]
152
+
153
+ for file in files:
154
+ file_path = os.path.join(root, file)
155
+
156
+ # Determine the language
157
+ language = None
158
+
159
+ # Check if it's a special file
160
+ if file in SPECIAL_FILES_TO_LANGUAGE:
161
+ language = SPECIAL_FILES_TO_LANGUAGE[file]
162
+ else:
163
+ # Check file extension
164
+ _, ext = os.path.splitext(file)
165
+ if ext in EXTENSION_TO_LANGUAGE:
166
+ language = EXTENSION_TO_LANGUAGE[ext]
167
+
168
+ if language:
169
+ # Count lines of code
170
+ try:
171
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
172
+ line_count = sum(1 for _ in f)
173
+
174
+ if language in language_loc:
175
+ language_loc[language] += line_count
176
+ else:
177
+ language_loc[language] = line_count
178
+ except Exception as e:
179
+ logger.warning(f"Error counting lines in {file_path}: {e}")
180
+
181
+ logger.info(f"Language breakdown: {language_loc}")
182
+ return language_loc
src/main.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Code Review Agent - Main Entry Point
6
+
7
+ This module serves as the entry point for the Code Review Agent application.
8
+ It initializes the Gradio interface and starts the web server.
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import logging
14
+ from dotenv import load_dotenv
15
+
16
+ # Add the project root to the Python path
17
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
18
+
19
+ # Import application modules
20
+ from src.ui.gradio_app import create_gradio_app
21
+ from src.core.agent_manager import AgentManager
22
+
23
+ # Configure logging
24
+ # Create logs directory if it doesn't exist
25
+ logs_dir = os.path.join(os.path.dirname(__file__), '..', 'logs')
26
+ os.makedirs(logs_dir, exist_ok=True)
27
+
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
31
+ handlers=[
32
+ logging.StreamHandler(),
33
+ logging.FileHandler(os.path.join(logs_dir, 'app.log'), mode='a')
34
+ ]
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ def main():
41
+ """Main function to start the Code Review Agent application."""
42
+ # Load environment variables
43
+ load_dotenv()
44
+
45
+ # Create logs directory if it doesn't exist
46
+ logs_dir = os.path.join(os.path.dirname(__file__), '..', 'logs')
47
+ os.makedirs(logs_dir, exist_ok=True)
48
+
49
+ # Initialize the agent manager
50
+ agent_manager = AgentManager()
51
+
52
+ # Create and launch the Gradio app
53
+ app = create_gradio_app(agent_manager)
54
+
55
+ # Start the Gradio server
56
+ app.launch(server_name="0.0.0.0", server_port=7860)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ try:
61
+ logger.info("Starting Code Review Agent application")
62
+ main()
63
+ except Exception as e:
64
+ logger.exception(f"Error starting application: {e}")
65
+ sys.exit(1)
src/mcp/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # MCP Integration Package for Code Review Agent
src/mcp/ai_review.py ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ AI Review Service
6
+
7
+ This module provides functionality for AI-powered code review using Nebius Qwen2.5-72B-Instruct model.
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ import json
13
+ import re
14
+ import concurrent.futures
15
+ from openai import OpenAI
16
+ from dotenv import load_dotenv
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+
23
+ class AIReviewService:
24
+ """
25
+ Service for AI-powered code review using Nebius Qwen2.5-72B-Instruct model.
26
+ """
27
+
28
+ def __init__(self):
29
+ """
30
+ Initialize the AIReviewService.
31
+ """
32
+ self.api_key = os.getenv('NEBIUS_API_KEY')
33
+ if not self.api_key:
34
+ logger.warning("NEBIUS_API_KEY not found in environment variables. AI review will not be available.")
35
+ else:
36
+ self.client = OpenAI(
37
+ base_url="https://api.studio.nebius.com/v1/",
38
+ api_key=self.api_key
39
+ )
40
+
41
+ logger.info("Initialized AIReviewService")
42
+
43
+ def is_available(self):
44
+ """
45
+ Check if the AI review service is available.
46
+
47
+ Returns:
48
+ bool: True if the service is available, False otherwise.
49
+ """
50
+ return self.api_key is not None
51
+
52
+ def review_code(self, file_path, file_content, language, context=None):
53
+ """
54
+ Review code using Qwen.
55
+
56
+ Args:
57
+ file_path (str): The path to the file being reviewed.
58
+ file_content (str): The content of the file being reviewed.
59
+ language (str): The programming language of the file.
60
+ context (dict, optional): Additional context for the review.
61
+
62
+ Returns:
63
+ dict: The review results.
64
+ """
65
+ if not self.is_available():
66
+ return {
67
+ 'status': 'error',
68
+ 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.',
69
+ 'suggestions': [],
70
+ }
71
+
72
+ logger.info(f"Reviewing {language} code in {file_path}")
73
+
74
+ # Prepare the prompt for Qwen
75
+ prompt = self._prepare_prompt(file_path, file_content, language, context)
76
+
77
+ try:
78
+ # Call Nebius API with Qwen2.5-72B-Instruct model
79
+ response = self.client.chat.completions.create(
80
+ model="Qwen/Qwen2.5-72B-Instruct",
81
+ max_tokens=4000,
82
+ temperature=0,
83
+ messages=[
84
+ {"role": "system", "content": self._get_system_prompt(language)},
85
+ {"role": "user", "content": prompt}
86
+ ]
87
+ )
88
+
89
+ # Parse the response
90
+ review_text = response.choices[0].message.content
91
+ suggestions = self._parse_review(review_text)
92
+
93
+ return {
94
+ 'status': 'success',
95
+ 'review_text': review_text,
96
+ 'suggestions': suggestions,
97
+ }
98
+
99
+ except Exception as e:
100
+ logger.error(f"Error calling Qwen API: {e}")
101
+ return {
102
+ 'status': 'error',
103
+ 'error': str(e),
104
+ 'suggestions': [],
105
+ }
106
+
107
+ def review_repository(self, repo_path, files, languages, analysis_results=None):
108
+ """
109
+ Review a repository using Qwen with parallel processing.
110
+
111
+ Args:
112
+ repo_path (str): The path to the repository.
113
+ files (list): A list of files to review.
114
+ languages (list): A list of programming languages in the repository.
115
+ analysis_results (dict, optional): Results from other analysis tools.
116
+
117
+ Returns:
118
+ dict: The review results.
119
+ """
120
+ if not self.is_available():
121
+ return {
122
+ 'status': 'error',
123
+ 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.',
124
+ 'reviews': {},
125
+ 'summary': '',
126
+ }
127
+
128
+ logger.info(f"Reviewing repository at {repo_path} with {len(files)} files")
129
+
130
+ # Limit the number of files to review to avoid excessive API usage
131
+ max_files = 20
132
+ if len(files) > max_files:
133
+ logger.warning(f"Too many files to review ({len(files)}). Limiting to {max_files} files.")
134
+ files = files[:max_files]
135
+
136
+ # Function to review a single file
137
+ def review_file(file_path):
138
+ try:
139
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
140
+ file_content = f.read()
141
+
142
+ # Determine the language based on file extension
143
+ _, ext = os.path.splitext(file_path)
144
+ language = self._get_language_from_extension(ext)
145
+
146
+ if language:
147
+ # Provide context from analysis results if available
148
+ context = None
149
+ if analysis_results:
150
+ context = self._extract_context_for_file(file_path, analysis_results)
151
+
152
+ # Review the file
153
+ review_result = self.review_code(file_path, file_content, language, context)
154
+ return file_path, review_result
155
+ return file_path, {
156
+ 'status': 'error',
157
+ 'error': f'Unsupported language for file {file_path}',
158
+ 'suggestions': [],
159
+ }
160
+
161
+ except Exception as e:
162
+ logger.error(f"Error reviewing file {file_path}: {e}")
163
+ return file_path, {
164
+ 'status': 'error',
165
+ 'error': str(e),
166
+ 'suggestions': [],
167
+ }
168
+
169
+ # Review files in parallel using ThreadPoolExecutor
170
+ reviews = {}
171
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(5, len(files))) as executor:
172
+ # Submit all file review tasks
173
+ future_to_file = {executor.submit(review_file, file_path): file_path for file_path in files}
174
+
175
+ # Process results as they complete
176
+ for future in concurrent.futures.as_completed(future_to_file):
177
+ file_path = future_to_file[future]
178
+ try:
179
+ path, result = future.result()
180
+ reviews[path] = result
181
+ logger.info(f"Completed review for {path}")
182
+ except Exception as e:
183
+ logger.error(f"Exception occurred during review of {file_path}: {e}")
184
+ reviews[file_path] = {
185
+ 'status': 'error',
186
+ 'error': str(e),
187
+ 'suggestions': [],
188
+ }
189
+
190
+ # Generate a summary of the review
191
+ summary = self._generate_repository_summary(repo_path, reviews, languages, analysis_results)
192
+
193
+ return {
194
+ 'status': 'success',
195
+ 'reviews': reviews,
196
+ 'summary': summary,
197
+ }
198
+
199
+ def _prepare_prompt(self, file_path, file_content, language, context=None):
200
+ """
201
+ Prepare a prompt for Qwen.
202
+
203
+ Args:
204
+ file_path (str): The path to the file being reviewed.
205
+ file_content (str): The content of the file being reviewed.
206
+ language (str): The programming language of the file.
207
+ context (dict, optional): Additional context for the review.
208
+
209
+ Returns:
210
+ str: The prompt for Qwen.
211
+ """
212
+ prompt = f"""Please review the following {language} code and provide constructive feedback:
213
+
214
+ File: {file_path}
215
+
216
+ ```{language}
217
+ {file_content}
218
+ ```
219
+
220
+ """
221
+
222
+ if context:
223
+ prompt += "Additional context:\n"
224
+ if 'issues' in context:
225
+ prompt += "\nIssues detected by other tools:\n"
226
+ for issue in context['issues']:
227
+ prompt += f"- {issue.get('issue', 'Unknown issue')} at line {issue.get('line', 'unknown')}: {issue.get('description', '')}\n"
228
+
229
+ if 'vulnerabilities' in context:
230
+ prompt += "\nSecurity vulnerabilities detected:\n"
231
+ for vuln in context['vulnerabilities']:
232
+ prompt += f"- {vuln.get('issue', 'Unknown vulnerability')} at line {vuln.get('line', 'unknown')}: {vuln.get('description', '')}\n"
233
+
234
+ prompt += "\nPlease provide your review with the following sections:\n"
235
+ prompt += "1. Code Quality: Assess the overall quality, readability, and maintainability.\n"
236
+ prompt += "2. Potential Issues: Identify any bugs, edge cases, or potential problems.\n"
237
+ prompt += "3. Security Concerns: Highlight any security vulnerabilities or risks.\n"
238
+ prompt += "4. Performance Considerations: Note any performance bottlenecks or inefficiencies.\n"
239
+ prompt += "5. Specific Suggestions: Provide concrete, actionable suggestions for improvement.\n"
240
+
241
+ return prompt
242
+
243
+ def _get_system_prompt(self, language):
244
+ """
245
+ Get the system prompt for Qwen based on the programming language.
246
+
247
+ Args:
248
+ language (str): The programming language.
249
+
250
+ Returns:
251
+ str: The system prompt for Qwen.
252
+ """
253
+ base_prompt = """You are an expert code reviewer with deep knowledge of software development best practices, design patterns, and security.
254
+ Your task is to review code and provide constructive, actionable feedback.
255
+ Be thorough but prioritize the most important issues.
256
+ Format your response in markdown with clear sections.
257
+ For each suggestion, include the line number, the issue, and a recommended solution.
258
+ Focus on:
259
+ - Code quality and readability
260
+ - Potential bugs and edge cases
261
+ - Security vulnerabilities
262
+ - Performance optimizations
263
+ - Adherence to best practices
264
+
265
+ Your feedback should be specific, actionable, and educational. Explain why each suggestion matters.
266
+ Do not hallucinate vulnerabilities. Base claims on code patterns.
267
+ """
268
+
269
+ # Add language-specific guidance
270
+ if language == 'Python':
271
+ base_prompt += "\nFor Python code, pay special attention to PEP 8 compliance, proper exception handling, and Pythonic idioms."
272
+ elif language in ['JavaScript', 'TypeScript']:
273
+ base_prompt += "\nFor JavaScript/TypeScript code, focus on modern ES6+ practices, proper async handling, and potential type issues."
274
+ elif language == 'Java':
275
+ base_prompt += "\nFor Java code, examine object-oriented design, proper exception handling, and resource management."
276
+ elif language == 'Go':
277
+ base_prompt += "\nFor Go code, check for idiomatic Go patterns, proper error handling, and concurrency issues."
278
+ elif language == 'Rust':
279
+ base_prompt += "\nFor Rust code, verify memory safety, proper use of ownership/borrowing, and idiomatic Rust patterns."
280
+
281
+ return base_prompt
282
+
283
+ def _parse_review(self, review_text):
284
+ """
285
+ Parse the review text from Qwen to extract structured suggestions.
286
+
287
+ Args:
288
+ review_text (str): The review text from Qwen.
289
+
290
+ Returns:
291
+ list: A list of structured suggestions.
292
+ """
293
+ suggestions = []
294
+
295
+ # Split the review into sections
296
+ sections = review_text.split('##')
297
+
298
+ for section in sections:
299
+ if not section.strip():
300
+ continue
301
+ # Extract suggestions from the section
302
+ lines = section.strip().split('\n')
303
+ section_title = lines[0].strip()
304
+
305
+ current_suggestion = None
306
+ for line in lines[1:]:
307
+ line = line.strip()
308
+ if not line:
309
+ continue
310
+
311
+ # Check if this line starts a new suggestion (often contains line numbers)
312
+ line_number_match = re.search(r'line\s+(\d+)', line, re.IGNORECASE)
313
+ if line_number_match or line.startswith('- ') or line.startswith('* '):
314
+ # Save the previous suggestion if it exists
315
+ if current_suggestion:
316
+ suggestions.append(current_suggestion)
317
+
318
+ # Start a new suggestion
319
+ line_number = int(line_number_match.group(1)) if line_number_match else None
320
+ current_suggestion = {
321
+ 'section': section_title,
322
+ 'line': line_number,
323
+ 'description': line,
324
+ 'details': '',
325
+ }
326
+ elif current_suggestion:
327
+ # Add to the details of the current suggestion
328
+ current_suggestion['details'] += line + '\n'
329
+
330
+ # Add the last suggestion in the section
331
+ if current_suggestion:
332
+ suggestions.append(current_suggestion)
333
+
334
+ return suggestions
335
+
336
+ # The review_repository method is already implemented above
337
+
338
+ def _get_language_from_extension(self, extension):
339
+ """
340
+ Get the programming language from a file extension.
341
+
342
+ Args:
343
+ extension (str): The file extension.
344
+
345
+ Returns:
346
+ str: The programming language, or None if unknown.
347
+ """
348
+ extension_to_language = {
349
+ '.py': 'Python',
350
+ '.js': 'JavaScript',
351
+ '.jsx': 'JavaScript',
352
+ '.ts': 'TypeScript',
353
+ '.tsx': 'TypeScript',
354
+ '.java': 'Java',
355
+ '.go': 'Go',
356
+ '.rs': 'Rust',
357
+ '.cpp': 'C++',
358
+ '.cc': 'C++',
359
+ '.c': 'C',
360
+ '.h': 'C',
361
+ '.hpp': 'C++',
362
+ '.cs': 'C#',
363
+ '.php': 'PHP',
364
+ '.rb': 'Ruby',
365
+ }
366
+
367
+ return extension_to_language.get(extension.lower())
368
+
369
+ def _extract_context_for_file(self, file_path, analysis_results):
370
+ """
371
+ Extract relevant context for a file from analysis results.
372
+
373
+ Args:
374
+ file_path (str): The path to the file.
375
+ analysis_results (dict): Results from other analysis tools.
376
+
377
+ Returns:
378
+ dict: Context for the file.
379
+ """
380
+ context = {
381
+ 'issues': [],
382
+ 'vulnerabilities': [],
383
+ }
384
+
385
+ # Extract code quality issues
386
+ if 'code_analysis' in analysis_results:
387
+ for language, language_results in analysis_results['code_analysis'].items():
388
+ for issue in language_results.get('issues', []):
389
+ if issue.get('file', '') == file_path:
390
+ context['issues'].append(issue)
391
+
392
+ # Extract security vulnerabilities
393
+ if 'security_scan' in analysis_results:
394
+ for language, language_results in analysis_results['security_scan'].items():
395
+ for vuln in language_results.get('vulnerabilities', []):
396
+ if vuln.get('file', '') == file_path:
397
+ context['vulnerabilities'].append(vuln)
398
+
399
+ # Extract performance issues
400
+ if 'performance_analysis' in analysis_results:
401
+ for language, language_results in analysis_results['performance_analysis'].get('language_results', {}).items():
402
+ for issue in language_results.get('issues', []):
403
+ if issue.get('file', '') == file_path:
404
+ context['issues'].append(issue)
405
+
406
+ return context
407
+
408
+ def _generate_repository_summary(self, repo_path, reviews, languages, analysis_results=None):
409
+ """
410
+ Generate a summary of the repository review.
411
+
412
+ Args:
413
+ repo_path (str): The path to the repository.
414
+ reviews (dict): The review results for each file.
415
+ languages (list): A list of programming languages in the repository.
416
+ analysis_results (dict, optional): Results from other analysis tools.
417
+
418
+ Returns:
419
+ str: A summary of the repository review.
420
+ """
421
+ if not self.is_available():
422
+ return "AI review service is not available. Please set ANTHROPIC_API_KEY in environment variables."
423
+
424
+ # Prepare the prompt for Qwen
425
+ prompt = f"""Please provide a summary of the code review for the repository at {repo_path}.
426
+
427
+ Languages used in the repository: {', '.join(languages)}
428
+
429
+ """
430
+
431
+ # Add information about the files reviewed
432
+ prompt += "\nFiles reviewed:\n"
433
+ for file_path, review in reviews.items():
434
+ if review.get('status') == 'success':
435
+ suggestion_count = len(review.get('suggestions', []))
436
+ prompt += f"- {file_path}: {suggestion_count} suggestions\n"
437
+ else:
438
+ prompt += f"- {file_path}: Error - {review.get('error', 'Unknown error')}\n"
439
+
440
+ # Add summary of analysis results if available
441
+ if analysis_results:
442
+ prompt += "\nAnalysis results summary:\n"
443
+
444
+ if 'code_analysis' in analysis_results:
445
+ total_issues = sum(result.get('issue_count', 0) for result in analysis_results['code_analysis'].values())
446
+ prompt += f"- Code quality issues: {total_issues}\n"
447
+
448
+ if 'security_scan' in analysis_results:
449
+ total_vulns = sum(result.get('vulnerability_count', 0) for result in analysis_results['security_scan'].values())
450
+ prompt += f"- Security vulnerabilities: {total_vulns}\n"
451
+
452
+ if 'performance_analysis' in analysis_results:
453
+ total_perf_issues = sum(result.get('issue_count', 0) for result in analysis_results['performance_analysis'].get('language_results', {}).values())
454
+ prompt += f"- Performance issues: {total_perf_issues}\n"
455
+
456
+ prompt += "\nPlease provide a comprehensive summary of the code review, including:\n"
457
+ prompt += "1. Overall code quality assessment\n"
458
+ prompt += "2. Common patterns and issues found across the codebase\n"
459
+ prompt += "3. Strengths of the codebase\n"
460
+ prompt += "4. Areas for improvement\n"
461
+ prompt += "5. Prioritized recommendations\n"
462
+
463
+ try:
464
+ # Call Nebius API with Qwen2.5-72B-Instruct model
465
+ response = self.client.chat.completions.create(
466
+ model="Qwen/Qwen2.5-72B-Instruct",
467
+ max_tokens=4000,
468
+ temperature=0,
469
+ messages=[
470
+ {"role": "system", "content": "You are an expert code reviewer providing a summary of a repository review. Be concise, insightful, and actionable in your feedback. Format your response in markdown with clear sections."},
471
+ {"role": "user", "content": prompt}
472
+ ]
473
+ )
474
+
475
+ return response.choices[0].message.content
476
+
477
+ except Exception as e:
478
+ logger.error(f"Error generating repository summary: {e}")
479
+ return f"Error generating repository summary: {e}"
src/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services Package for Code Review Agent
src/services/code_analyzer.py ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Code Analyzer Service
6
+
7
+ This module provides functionality for analyzing code quality across different languages.
8
+ """
9
+
10
+ import os
11
+ import subprocess
12
+ import logging
13
+ import json
14
+ import tempfile
15
+ import concurrent.futures
16
+ from collections import defaultdict
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class CodeAnalyzer:
21
+ """
22
+ Service for analyzing code quality across different languages.
23
+ """
24
+
25
+ def __init__(self):
26
+ """
27
+ Initialize the CodeAnalyzer.
28
+ """
29
+ logger.info("Initialized CodeAnalyzer")
30
+ self.analyzers = {
31
+ 'Python': self._analyze_python,
32
+ 'JavaScript': self._analyze_javascript,
33
+ 'TypeScript': self._analyze_typescript,
34
+ 'Java': self._analyze_java,
35
+ 'Go': self._analyze_go,
36
+ 'Rust': self._analyze_rust,
37
+ }
38
+
39
+ def analyze_repository(self, repo_path, languages):
40
+ """
41
+ Analyze code quality in a repository for the specified languages using parallel processing.
42
+
43
+ Args:
44
+ repo_path (str): The path to the repository.
45
+ languages (list): A list of programming languages to analyze.
46
+
47
+ Returns:
48
+ dict: A dictionary containing analysis results for each language.
49
+ """
50
+ logger.info(f"Analyzing repository at {repo_path} for languages: {languages}")
51
+
52
+ results = {}
53
+
54
+ # Define a function to analyze a single language
55
+ def analyze_language(language):
56
+ if language in self.analyzers:
57
+ try:
58
+ logger.info(f"Analyzing {language} code in {repo_path}")
59
+ return language, self.analyzers[language](repo_path)
60
+ except Exception as e:
61
+ logger.error(f"Error analyzing {language} code: {e}")
62
+ return language, {
63
+ 'status': 'error',
64
+ 'error': str(e),
65
+ 'issues': [],
66
+ }
67
+ else:
68
+ logger.warning(f"No analyzer available for {language}")
69
+ return language, {
70
+ 'status': 'not_supported',
71
+ 'message': f"Analysis for {language} is not supported yet.",
72
+ 'issues': [],
73
+ }
74
+
75
+ # Use ThreadPoolExecutor to analyze languages in parallel
76
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor:
77
+ # Submit all language analysis tasks
78
+ future_to_language = {executor.submit(analyze_language, language): language for language in languages}
79
+
80
+ # Process results as they complete
81
+ for future in concurrent.futures.as_completed(future_to_language):
82
+ language = future_to_language[future]
83
+ try:
84
+ lang, result = future.result()
85
+ results[lang] = result
86
+ logger.info(f"Completed analysis for {lang}")
87
+ except Exception as e:
88
+ logger.error(f"Exception occurred during analysis of {language}: {e}")
89
+ results[language] = {
90
+ 'status': 'error',
91
+ 'error': str(e),
92
+ 'issues': [],
93
+ }
94
+
95
+ return results
96
+
97
+ def _analyze_python(self, repo_path):
98
+ """
99
+ Analyze Python code using pylint.
100
+
101
+ Args:
102
+ repo_path (str): The path to the repository.
103
+
104
+ Returns:
105
+ dict: Analysis results.
106
+ """
107
+ logger.info(f"Analyzing Python code in {repo_path}")
108
+
109
+ # Find Python files
110
+ python_files = []
111
+ for root, _, files in os.walk(repo_path):
112
+ for file in files:
113
+ if file.endswith('.py'):
114
+ python_files.append(os.path.join(root, file))
115
+
116
+ if not python_files:
117
+ return {
118
+ 'status': 'no_files',
119
+ 'message': 'No Python files found in the repository.',
120
+ 'issues': [],
121
+ }
122
+
123
+ # Create a temporary file to store pylint output
124
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
125
+ temp_path = temp_file.name
126
+
127
+ try:
128
+ # Run pylint with JSON reporter
129
+ cmd = [
130
+ 'python',
131
+ '-m',
132
+ 'pylint',
133
+ '--output-format=json',
134
+ '--reports=n',
135
+ ] + python_files
136
+
137
+ process = subprocess.run(
138
+ cmd,
139
+ stdout=subprocess.PIPE,
140
+ stderr=subprocess.PIPE,
141
+ text=True,
142
+ check=False,
143
+ )
144
+
145
+ # Parse pylint output
146
+ if process.stdout.strip():
147
+ try:
148
+ issues = json.loads(process.stdout)
149
+ except json.JSONDecodeError:
150
+ logger.error(f"Error parsing pylint output: {process.stdout}")
151
+ issues = []
152
+ else:
153
+ issues = []
154
+
155
+ # Group issues by type
156
+ issues_by_type = defaultdict(list)
157
+ for issue in issues:
158
+ issue_type = issue.get('type', 'unknown')
159
+ issues_by_type[issue_type].append(issue)
160
+
161
+ return {
162
+ 'status': 'success',
163
+ 'issues': issues,
164
+ 'issues_by_type': dict(issues_by_type),
165
+ 'issue_count': len(issues),
166
+ 'files_analyzed': len(python_files),
167
+ }
168
+
169
+ except Exception as e:
170
+ logger.error(f"Error running pylint: {e}")
171
+ return {
172
+ 'status': 'error',
173
+ 'error': str(e),
174
+ 'issues': [],
175
+ }
176
+
177
+ finally:
178
+ # Clean up the temporary file
179
+ if os.path.exists(temp_path):
180
+ os.unlink(temp_path)
181
+
182
+ def _analyze_javascript(self, repo_path):
183
+ """
184
+ Analyze JavaScript code using ESLint.
185
+
186
+ Args:
187
+ repo_path (str): The path to the repository.
188
+
189
+ Returns:
190
+ dict: Analysis results.
191
+ """
192
+ logger.info(f"Analyzing JavaScript code in {repo_path}")
193
+
194
+ # Find JavaScript files
195
+ js_files = []
196
+ for root, _, files in os.walk(repo_path):
197
+ for file in files:
198
+ if file.endswith(('.js', '.jsx')) and not 'node_modules' in root:
199
+ js_files.append(os.path.join(root, file))
200
+
201
+ if not js_files:
202
+ return {
203
+ 'status': 'no_files',
204
+ 'message': 'No JavaScript files found in the repository.',
205
+ 'issues': [],
206
+ }
207
+
208
+ # Create a temporary ESLint configuration file
209
+ eslint_config = {
210
+ "env": {
211
+ "browser": True,
212
+ "es2021": True,
213
+ "node": True
214
+ },
215
+ "extends": "eslint:recommended",
216
+ "parserOptions": {
217
+ "ecmaVersion": 12,
218
+ "sourceType": "module",
219
+ "ecmaFeatures": {
220
+ "jsx": True
221
+ }
222
+ },
223
+ "rules": {}
224
+ }
225
+
226
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
227
+ json.dump(eslint_config, temp_config)
228
+ temp_config_path = temp_config.name
229
+
230
+ try:
231
+ # Run ESLint with JSON formatter
232
+ cmd = [
233
+ 'npx',
234
+ 'eslint',
235
+ '--config', temp_config_path,
236
+ '--format', 'json',
237
+ ] + js_files
238
+
239
+ process = subprocess.run(
240
+ cmd,
241
+ stdout=subprocess.PIPE,
242
+ stderr=subprocess.PIPE,
243
+ text=True,
244
+ check=False,
245
+ )
246
+
247
+ # Parse ESLint output
248
+ if process.stdout.strip():
249
+ try:
250
+ eslint_results = json.loads(process.stdout)
251
+
252
+ # Extract issues from ESLint results
253
+ issues = []
254
+ for result in eslint_results:
255
+ file_path = result.get('filePath', '')
256
+ for message in result.get('messages', []):
257
+ issues.append({
258
+ 'path': file_path,
259
+ 'line': message.get('line', 0),
260
+ 'column': message.get('column', 0),
261
+ 'message': message.get('message', ''),
262
+ 'severity': message.get('severity', 0),
263
+ 'ruleId': message.get('ruleId', ''),
264
+ })
265
+ except json.JSONDecodeError:
266
+ logger.error(f"Error parsing ESLint output: {process.stdout}")
267
+ issues = []
268
+ else:
269
+ issues = []
270
+
271
+ # Group issues by severity
272
+ issues_by_severity = defaultdict(list)
273
+ for issue in issues:
274
+ severity = issue.get('severity', 0)
275
+ severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
276
+ issues_by_severity[severity_name].append(issue)
277
+
278
+ return {
279
+ 'status': 'success',
280
+ 'issues': issues,
281
+ 'issues_by_severity': dict(issues_by_severity),
282
+ 'issue_count': len(issues),
283
+ 'files_analyzed': len(js_files),
284
+ }
285
+
286
+ except Exception as e:
287
+ logger.error(f"Error running ESLint: {e}")
288
+ return {
289
+ 'status': 'error',
290
+ 'error': str(e),
291
+ 'issues': [],
292
+ }
293
+
294
+ finally:
295
+ # Clean up the temporary configuration file
296
+ if os.path.exists(temp_config_path):
297
+ os.unlink(temp_config_path)
298
+
299
+ def _analyze_typescript(self, repo_path):
300
+ """
301
+ Analyze TypeScript code using ESLint and TSC.
302
+
303
+ Args:
304
+ repo_path (str): The path to the repository.
305
+
306
+ Returns:
307
+ dict: Analysis results.
308
+ """
309
+ logger.info(f"Analyzing TypeScript code in {repo_path}")
310
+
311
+ # Find TypeScript files
312
+ ts_files = []
313
+ for root, _, files in os.walk(repo_path):
314
+ for file in files:
315
+ if file.endswith(('.ts', '.tsx')) and not 'node_modules' in root:
316
+ ts_files.append(os.path.join(root, file))
317
+
318
+ if not ts_files:
319
+ return {
320
+ 'status': 'no_files',
321
+ 'message': 'No TypeScript files found in the repository.',
322
+ 'issues': [],
323
+ }
324
+
325
+ # Create a temporary ESLint configuration file for TypeScript
326
+ eslint_config = {
327
+ "env": {
328
+ "browser": True,
329
+ "es2021": True,
330
+ "node": True
331
+ },
332
+ "extends": [
333
+ "eslint:recommended",
334
+ "plugin:@typescript-eslint/recommended"
335
+ ],
336
+ "parser": "@typescript-eslint/parser",
337
+ "parserOptions": {
338
+ "ecmaVersion": 12,
339
+ "sourceType": "module",
340
+ "ecmaFeatures": {
341
+ "jsx": True
342
+ }
343
+ },
344
+ "plugins": [
345
+ "@typescript-eslint"
346
+ ],
347
+ "rules": {}
348
+ }
349
+
350
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
351
+ json.dump(eslint_config, temp_config)
352
+ temp_config_path = temp_config.name
353
+
354
+ # Create a temporary tsconfig.json file
355
+ tsconfig = {
356
+ "compilerOptions": {
357
+ "target": "es2020",
358
+ "module": "commonjs",
359
+ "strict": True,
360
+ "esModuleInterop": True,
361
+ "skipLibCheck": True,
362
+ "forceConsistentCasingInFileNames": True,
363
+ "noEmit": True
364
+ },
365
+ "include": ts_files
366
+ }
367
+
368
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_tsconfig:
369
+ json.dump(tsconfig, temp_tsconfig)
370
+ temp_tsconfig_path = temp_tsconfig.name
371
+
372
+ try:
373
+ # Run ESLint with TypeScript support
374
+ eslint_cmd = [
375
+ 'npx',
376
+ 'eslint',
377
+ '--config', temp_config_path,
378
+ '--format', 'json',
379
+ '--ext', '.ts,.tsx',
380
+ ] + ts_files
381
+
382
+ eslint_process = subprocess.run(
383
+ eslint_cmd,
384
+ stdout=subprocess.PIPE,
385
+ stderr=subprocess.PIPE,
386
+ text=True,
387
+ check=False,
388
+ )
389
+
390
+ # Parse ESLint output
391
+ eslint_issues = []
392
+ if eslint_process.stdout.strip():
393
+ try:
394
+ eslint_results = json.loads(eslint_process.stdout)
395
+
396
+ # Extract issues from ESLint results
397
+ for result in eslint_results:
398
+ file_path = result.get('filePath', '')
399
+ for message in result.get('messages', []):
400
+ eslint_issues.append({
401
+ 'path': file_path,
402
+ 'line': message.get('line', 0),
403
+ 'column': message.get('column', 0),
404
+ 'message': message.get('message', ''),
405
+ 'severity': message.get('severity', 0),
406
+ 'ruleId': message.get('ruleId', ''),
407
+ 'source': 'eslint',
408
+ })
409
+ except json.JSONDecodeError:
410
+ logger.error(f"Error parsing ESLint output: {eslint_process.stdout}")
411
+
412
+ # Run TypeScript compiler for type checking
413
+ tsc_cmd = [
414
+ 'npx',
415
+ 'tsc',
416
+ '--project', temp_tsconfig_path,
417
+ '--noEmit',
418
+ ]
419
+
420
+ tsc_process = subprocess.run(
421
+ tsc_cmd,
422
+ stdout=subprocess.PIPE,
423
+ stderr=subprocess.PIPE,
424
+ text=True,
425
+ check=False,
426
+ )
427
+
428
+ # Parse TSC output
429
+ tsc_issues = []
430
+ if tsc_process.stderr.strip():
431
+ # TSC error format: file.ts(line,col): error TS2551: message
432
+ for line in tsc_process.stderr.splitlines():
433
+ if ': error ' in line or ': warning ' in line:
434
+ try:
435
+ file_info, error_info = line.split(':', 1)
436
+ file_path, line_col = file_info.rsplit('(', 1)
437
+ line_num, col_num = line_col.rstrip(')').split(',')
438
+
439
+ error_type, error_message = error_info.split(':', 1)
440
+ error_type = error_type.strip()
441
+ error_message = error_message.strip()
442
+
443
+ tsc_issues.append({
444
+ 'path': file_path,
445
+ 'line': int(line_num),
446
+ 'column': int(col_num),
447
+ 'message': error_message,
448
+ 'severity': 2 if 'error' in error_type else 1,
449
+ 'ruleId': error_type,
450
+ 'source': 'tsc',
451
+ })
452
+ except Exception as e:
453
+ logger.warning(f"Error parsing TSC output line: {line}, error: {e}")
454
+
455
+ # Combine issues from both tools
456
+ all_issues = eslint_issues + tsc_issues
457
+
458
+ # Group issues by source and severity
459
+ issues_by_source = defaultdict(list)
460
+ issues_by_severity = defaultdict(list)
461
+
462
+ for issue in all_issues:
463
+ source = issue.get('source', 'unknown')
464
+ issues_by_source[source].append(issue)
465
+
466
+ severity = issue.get('severity', 0)
467
+ severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
468
+ issues_by_severity[severity_name].append(issue)
469
+
470
+ return {
471
+ 'status': 'success',
472
+ 'issues': all_issues,
473
+ 'issues_by_source': dict(issues_by_source),
474
+ 'issues_by_severity': dict(issues_by_severity),
475
+ 'issue_count': len(all_issues),
476
+ 'files_analyzed': len(ts_files),
477
+ }
478
+
479
+ except Exception as e:
480
+ logger.error(f"Error analyzing TypeScript code: {e}")
481
+ return {
482
+ 'status': 'error',
483
+ 'error': str(e),
484
+ 'issues': [],
485
+ }
486
+
487
+ finally:
488
+ # Clean up temporary files
489
+ for temp_file in [temp_config_path, temp_tsconfig_path]:
490
+ if os.path.exists(temp_file):
491
+ os.unlink(temp_file)
492
+
493
+ def _analyze_java(self, repo_path):
494
+ """
495
+ Analyze Java code using PMD.
496
+
497
+ Args:
498
+ repo_path (str): The path to the repository.
499
+
500
+ Returns:
501
+ dict: Analysis results.
502
+ """
503
+ logger.info(f"Analyzing Java code in {repo_path}")
504
+
505
+ # Find Java files
506
+ java_files = []
507
+ for root, _, files in os.walk(repo_path):
508
+ for file in files:
509
+ if file.endswith('.java'):
510
+ java_files.append(os.path.join(root, file))
511
+
512
+ if not java_files:
513
+ return {
514
+ 'status': 'no_files',
515
+ 'message': 'No Java files found in the repository.',
516
+ 'issues': [],
517
+ }
518
+
519
+ # Create a temporary file to store PMD output
520
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
521
+ temp_path = temp_file.name
522
+
523
+ try:
524
+ # Run PMD with JSON reporter
525
+ cmd = [
526
+ 'pmd',
527
+ 'check',
528
+ '--dir', repo_path,
529
+ '--format', 'json',
530
+ '--rulesets', 'category/java/bestpractices.xml,category/java/codestyle.xml,category/java/design.xml,category/java/errorprone.xml,category/java/multithreading.xml,category/java/performance.xml,category/java/security.xml',
531
+ ]
532
+
533
+ process = subprocess.run(
534
+ cmd,
535
+ stdout=subprocess.PIPE,
536
+ stderr=subprocess.PIPE,
537
+ text=True,
538
+ check=False,
539
+ )
540
+
541
+ # Parse PMD output
542
+ if process.stdout.strip():
543
+ try:
544
+ pmd_results = json.loads(process.stdout)
545
+
546
+ # Extract issues from PMD results
547
+ issues = []
548
+ for file_result in pmd_results.get('files', []):
549
+ file_path = file_result.get('filename', '')
550
+ for violation in file_result.get('violations', []):
551
+ issues.append({
552
+ 'path': file_path,
553
+ 'line': violation.get('beginline', 0),
554
+ 'endLine': violation.get('endline', 0),
555
+ 'column': violation.get('begincolumn', 0),
556
+ 'endColumn': violation.get('endcolumn', 0),
557
+ 'message': violation.get('description', ''),
558
+ 'rule': violation.get('rule', ''),
559
+ 'ruleset': violation.get('ruleset', ''),
560
+ 'priority': violation.get('priority', 0),
561
+ })
562
+ except json.JSONDecodeError:
563
+ logger.error(f"Error parsing PMD output: {process.stdout}")
564
+ issues = []
565
+ else:
566
+ issues = []
567
+
568
+ # Group issues by ruleset
569
+ issues_by_ruleset = defaultdict(list)
570
+ for issue in issues:
571
+ ruleset = issue.get('ruleset', 'unknown')
572
+ issues_by_ruleset[ruleset].append(issue)
573
+
574
+ return {
575
+ 'status': 'success',
576
+ 'issues': issues,
577
+ 'issues_by_ruleset': dict(issues_by_ruleset),
578
+ 'issue_count': len(issues),
579
+ 'files_analyzed': len(java_files),
580
+ }
581
+
582
+ except Exception as e:
583
+ logger.error(f"Error running PMD: {e}")
584
+ return {
585
+ 'status': 'error',
586
+ 'error': str(e),
587
+ 'issues': [],
588
+ }
589
+
590
+ finally:
591
+ # Clean up the temporary file
592
+ if os.path.exists(temp_path):
593
+ os.unlink(temp_path)
594
+
595
+ def _analyze_go(self, repo_path):
596
+ """
597
+ Analyze Go code using golangci-lint.
598
+
599
+ Args:
600
+ repo_path (str): The path to the repository.
601
+
602
+ Returns:
603
+ dict: Analysis results.
604
+ """
605
+ logger.info(f"Analyzing Go code in {repo_path}")
606
+
607
+ # Find Go files
608
+ go_files = []
609
+ for root, _, files in os.walk(repo_path):
610
+ for file in files:
611
+ if file.endswith('.go'):
612
+ go_files.append(os.path.join(root, file))
613
+
614
+ if not go_files:
615
+ return {
616
+ 'status': 'no_files',
617
+ 'message': 'No Go files found in the repository.',
618
+ 'issues': [],
619
+ }
620
+
621
+ try:
622
+ # Run golangci-lint with JSON output
623
+ cmd = [
624
+ 'golangci-lint',
625
+ 'run',
626
+ '--out-format=json',
627
+ repo_path,
628
+ ]
629
+
630
+ process = subprocess.run(
631
+ cmd,
632
+ stdout=subprocess.PIPE,
633
+ stderr=subprocess.PIPE,
634
+ text=True,
635
+ check=False,
636
+ cwd=repo_path, # Run in the repository directory
637
+ )
638
+
639
+ # Parse golangci-lint output
640
+ if process.stdout.strip():
641
+ try:
642
+ lint_results = json.loads(process.stdout)
643
+
644
+ # Extract issues from golangci-lint results
645
+ issues = []
646
+ for issue in lint_results.get('Issues', []):
647
+ issues.append({
648
+ 'path': issue.get('Pos', {}).get('Filename', ''),
649
+ 'line': issue.get('Pos', {}).get('Line', 0),
650
+ 'column': issue.get('Pos', {}).get('Column', 0),
651
+ 'message': issue.get('Text', ''),
652
+ 'linter': issue.get('FromLinter', ''),
653
+ 'severity': 'error' if issue.get('Severity', '') == 'error' else 'warning',
654
+ })
655
+ except json.JSONDecodeError:
656
+ logger.error(f"Error parsing golangci-lint output: {process.stdout}")
657
+ issues = []
658
+ else:
659
+ issues = []
660
+
661
+ # Group issues by linter
662
+ issues_by_linter = defaultdict(list)
663
+ for issue in issues:
664
+ linter = issue.get('linter', 'unknown')
665
+ issues_by_linter[linter].append(issue)
666
+
667
+ return {
668
+ 'status': 'success',
669
+ 'issues': issues,
670
+ 'issues_by_linter': dict(issues_by_linter),
671
+ 'issue_count': len(issues),
672
+ 'files_analyzed': len(go_files),
673
+ }
674
+
675
+ except Exception as e:
676
+ logger.error(f"Error running golangci-lint: {e}")
677
+ return {
678
+ 'status': 'error',
679
+ 'error': str(e),
680
+ 'issues': [],
681
+ }
682
+
683
+ def _analyze_rust(self, repo_path):
684
+ """
685
+ Analyze Rust code using clippy.
686
+
687
+ Args:
688
+ repo_path (str): The path to the repository.
689
+
690
+ Returns:
691
+ dict: Analysis results.
692
+ """
693
+ logger.info(f"Analyzing Rust code in {repo_path}")
694
+
695
+ # Find Rust files
696
+ rust_files = []
697
+ for root, _, files in os.walk(repo_path):
698
+ for file in files:
699
+ if file.endswith('.rs'):
700
+ rust_files.append(os.path.join(root, file))
701
+
702
+ if not rust_files:
703
+ return {
704
+ 'status': 'no_files',
705
+ 'message': 'No Rust files found in the repository.',
706
+ 'issues': [],
707
+ }
708
+
709
+ try:
710
+ # Run clippy with JSON output
711
+ cmd = [
712
+ 'cargo',
713
+ 'clippy',
714
+ '--message-format=json',
715
+ ]
716
+
717
+ process = subprocess.run(
718
+ cmd,
719
+ stdout=subprocess.PIPE,
720
+ stderr=subprocess.PIPE,
721
+ text=True,
722
+ check=False,
723
+ cwd=repo_path, # Run in the repository directory
724
+ )
725
+
726
+ # Parse clippy output
727
+ issues = []
728
+ if process.stdout.strip():
729
+ for line in process.stdout.splitlines():
730
+ try:
731
+ message = json.loads(line)
732
+ if message.get('reason') == 'compiler-message':
733
+ msg = message.get('message', {})
734
+ spans = msg.get('spans', [])
735
+
736
+ if spans:
737
+ primary_span = next((s for s in spans if s.get('is_primary')), spans[0])
738
+ file_path = primary_span.get('file_name', '')
739
+ line_num = primary_span.get('line_start', 0)
740
+ column = primary_span.get('column_start', 0)
741
+
742
+ issues.append({
743
+ 'path': file_path,
744
+ 'line': line_num,
745
+ 'column': column,
746
+ 'message': msg.get('message', ''),
747
+ 'level': msg.get('level', ''),
748
+ 'code': msg.get('code', {}).get('code', ''),
749
+ })
750
+ except json.JSONDecodeError:
751
+ continue
752
+
753
+ # Group issues by level
754
+ issues_by_level = defaultdict(list)
755
+ for issue in issues:
756
+ level = issue.get('level', 'unknown')
757
+ issues_by_level[level].append(issue)
758
+
759
+ return {
760
+ 'status': 'success',
761
+ 'issues': issues,
762
+ 'issues_by_level': dict(issues_by_level),
763
+ 'issue_count': len(issues),
764
+ 'files_analyzed': len(rust_files),
765
+ }
766
+
767
+ except Exception as e:
768
+ logger.error(f"Error running clippy: {e}")
769
+ return {
770
+ 'status': 'error',
771
+ 'error': str(e),
772
+ 'issues': [],
773
+ }
src/services/performance_analyzer.py ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Performance Analyzer Service
6
+
7
+ This module provides functionality for analyzing code performance across different languages.
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import logging
13
+ import subprocess
14
+ import json
15
+ import concurrent.futures
16
+ from collections import defaultdict
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class PerformanceAnalyzer:
21
+ """
22
+ Service for analyzing code performance across different languages.
23
+ """
24
+
25
+ def __init__(self):
26
+ """
27
+ Initialize the PerformanceAnalyzer.
28
+ """
29
+ logger.info("Initialized PerformanceAnalyzer")
30
+ self.analyzers = {
31
+ 'Python': self._analyze_python_performance,
32
+ 'JavaScript': self._analyze_javascript_performance,
33
+ 'TypeScript': self._analyze_typescript_performance,
34
+ 'Java': self._analyze_java_performance,
35
+ 'Go': self._analyze_go_performance,
36
+ 'Rust': self._analyze_rust_performance,
37
+ }
38
+
39
+ # Initialize performance patterns for different languages
40
+ self._init_performance_patterns()
41
+
42
+ def _init_performance_patterns(self):
43
+ """
44
+ Initialize performance patterns for different languages.
45
+ """
46
+ # Python performance patterns
47
+ self.python_patterns = [
48
+ {
49
+ 'name': 'Inefficient list comprehension',
50
+ 'pattern': r'\[.*?for.*?in.*?for.*?in.*?\]',
51
+ 'severity': 'medium',
52
+ 'description': 'Nested list comprehensions can be inefficient for large datasets.',
53
+ 'suggestion': 'Consider using itertools or breaking into separate operations.',
54
+ },
55
+ {
56
+ 'name': 'String concatenation in loop',
57
+ 'pattern': r'for.*?\+\=\s*[\'\"](.*?)[\'\"]',
58
+ 'severity': 'medium',
59
+ 'description': 'String concatenation in loops is inefficient in Python.',
60
+ 'suggestion': 'Use string join() or a list of strings with join() at the end.',
61
+ },
62
+ {
63
+ 'name': 'Global variable in loop',
64
+ 'pattern': r'global\s+\w+.*?for\s+\w+\s+in',
65
+ 'severity': 'medium',
66
+ 'description': 'Modifying global variables in loops can be inefficient.',
67
+ 'suggestion': 'Use local variables and return values instead.',
68
+ },
69
+ {
70
+ 'name': 'Inefficient dict/list access in loop',
71
+ 'pattern': r'for.*?in.*?:\s*.*?\[.*?\]\s*=',
72
+ 'severity': 'medium',
73
+ 'description': 'Repeatedly accessing dictionary or list elements in a loop can be inefficient.',
74
+ 'suggestion': 'Consider using a local variable to store the accessed element.',
75
+ },
76
+ {
77
+ 'name': 'Using range(len())',
78
+ 'pattern': r'for\s+\w+\s+in\s+range\(len\(',
79
+ 'severity': 'low',
80
+ 'description': 'Using range(len()) is less readable than using enumerate().',
81
+ 'suggestion': 'Use enumerate() instead of range(len()).',
82
+ },
83
+ {
84
+ 'name': 'Inefficient regular expression',
85
+ 'pattern': r're\.compile\([\'\"].*?[\+\*].*?[\'\"]\)',
86
+ 'severity': 'medium',
87
+ 'description': 'Complex regular expressions can be inefficient.',
88
+ 'suggestion': 'Simplify the regular expression or use more specific patterns.',
89
+ },
90
+ {
91
+ 'name': 'Large memory allocation',
92
+ 'pattern': r'\[.*?for.*?in\s+range\(\d{7,}\)\]',
93
+ 'severity': 'high',
94
+ 'description': 'Creating large lists in memory can cause performance issues.',
95
+ 'suggestion': 'Use generators or iterators instead of creating large lists.',
96
+ },
97
+ {
98
+ 'name': 'Inefficient database query in loop',
99
+ 'pattern': r'for.*?in.*?:\s*.*?\.execute\(',
100
+ 'severity': 'high',
101
+ 'description': 'Executing database queries in a loop can be very inefficient.',
102
+ 'suggestion': 'Use batch operations or join queries instead of querying in a loop.',
103
+ },
104
+ ]
105
+
106
+ # JavaScript performance patterns
107
+ self.javascript_patterns = [
108
+ {
109
+ 'name': 'DOM manipulation in loop',
110
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?document\..*?\}',
111
+ 'severity': 'high',
112
+ 'description': 'Manipulating the DOM inside loops can cause performance issues.',
113
+ 'suggestion': 'Batch DOM updates or use DocumentFragment.',
114
+ },
115
+ {
116
+ 'name': 'Inefficient array manipulation',
117
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?splice\(.*?\}',
118
+ 'severity': 'medium',
119
+ 'description': 'Using splice() in loops can be inefficient for large arrays.',
120
+ 'suggestion': 'Consider using filter() or other array methods.',
121
+ },
122
+ {
123
+ 'name': 'Creating functions in loops',
124
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?function\s*\(.*?\)\s*\{.*?\}.*?\}',
125
+ 'severity': 'medium',
126
+ 'description': 'Creating functions inside loops can lead to performance issues.',
127
+ 'suggestion': 'Define the function outside the loop and reference it.',
128
+ },
129
+ {
130
+ 'name': 'Inefficient string concatenation',
131
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
132
+ 'severity': 'medium',
133
+ 'description': 'String concatenation in loops can be inefficient.',
134
+ 'suggestion': 'Use array join() or template literals.',
135
+ },
136
+ {
137
+ 'name': 'Using eval()',
138
+ 'pattern': r'eval\(',
139
+ 'severity': 'high',
140
+ 'description': 'Using eval() is slow and can introduce security vulnerabilities.',
141
+ 'suggestion': 'Avoid using eval() and use safer alternatives.',
142
+ },
143
+ {
144
+ 'name': 'Inefficient event handling',
145
+ 'pattern': r'addEventListener\([\'\"].*?[\'\"],\s*function',
146
+ 'severity': 'medium',
147
+ 'description': 'Anonymous functions in event listeners can lead to memory leaks.',
148
+ 'suggestion': 'Use named functions for event handlers to allow proper cleanup.',
149
+ },
150
+ ]
151
+
152
+ # TypeScript performance patterns (extends JavaScript patterns)
153
+ self.typescript_patterns = self.javascript_patterns + [
154
+ {
155
+ 'name': 'Inefficient type assertion',
156
+ 'pattern': r'<.*?>\s*\(.*?\)',
157
+ 'severity': 'low',
158
+ 'description': 'Excessive type assertions can impact runtime performance.',
159
+ 'suggestion': 'Use proper typing and interfaces instead of frequent type assertions.',
160
+ },
161
+ {
162
+ 'name': 'Complex type definitions',
163
+ 'pattern': r'type\s+\w+\s*=\s*\{[^\}]{500,}\}',
164
+ 'severity': 'medium',
165
+ 'description': 'Overly complex type definitions can slow down the TypeScript compiler.',
166
+ 'suggestion': 'Break complex types into smaller, reusable interfaces.',
167
+ },
168
+ ]
169
+
170
+ # Java performance patterns
171
+ self.java_patterns = [
172
+ {
173
+ 'name': 'Inefficient string concatenation',
174
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
175
+ 'severity': 'medium',
176
+ 'description': 'String concatenation in loops is inefficient in Java.',
177
+ 'suggestion': 'Use StringBuilder or StringBuffer instead.',
178
+ },
179
+ {
180
+ 'name': 'Creating objects in loops',
181
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?new\s+\w+\(.*?\).*?\}',
182
+ 'severity': 'medium',
183
+ 'description': 'Creating objects inside loops can lead to excessive garbage collection.',
184
+ 'suggestion': 'Create objects outside the loop or use object pooling.',
185
+ },
186
+ {
187
+ 'name': 'Inefficient collection iteration',
188
+ 'pattern': r'for\s*\(int\s+i\s*=\s*0.*?i\s*<\s*\w+\.size\(\).*?\)',
189
+ 'severity': 'low',
190
+ 'description': 'Calling size() in each iteration can be inefficient for some collections.',
191
+ 'suggestion': 'Store the size in a variable before the loop.',
192
+ },
193
+ {
194
+ 'name': 'Using boxed primitives in performance-critical code',
195
+ 'pattern': r'(Integer|Boolean|Double|Float|Long)\s+\w+\s*=',
196
+ 'severity': 'low',
197
+ 'description': 'Using boxed primitives can be less efficient than primitive types.',
198
+ 'suggestion': 'Use primitive types (int, boolean, etc.) in performance-critical code.',
199
+ },
200
+ {
201
+ 'name': 'Inefficient exception handling',
202
+ 'pattern': r'try\s*\{.*?\}\s*catch\s*\(Exception\s+\w+\)\s*\{',
203
+ 'severity': 'medium',
204
+ 'description': 'Catching generic exceptions can hide issues and impact performance.',
205
+ 'suggestion': 'Catch specific exceptions and handle them appropriately.',
206
+ },
207
+ ]
208
+
209
+ # Go performance patterns
210
+ self.go_patterns = [
211
+ {
212
+ 'name': 'Inefficient string concatenation',
213
+ 'pattern': r'for\s+.*?\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
214
+ 'severity': 'medium',
215
+ 'description': 'String concatenation in loops can be inefficient.',
216
+ 'suggestion': 'Use strings.Builder for string concatenation in loops.',
217
+ },
218
+ {
219
+ 'name': 'Inefficient slice operations',
220
+ 'pattern': r'for\s+.*?\{.*?append\(.*?\}',
221
+ 'severity': 'medium',
222
+ 'description': 'Repeatedly appending to a slice can cause multiple allocations.',
223
+ 'suggestion': 'Pre-allocate slices with make() when the size is known.',
224
+ },
225
+ {
226
+ 'name': 'Mutex in hot path',
227
+ 'pattern': r'func\s+\(.*?\)\s+\w+\(.*?\)\s+\{.*?Lock\(\).*?Unlock\(\)',
228
+ 'severity': 'medium',
229
+ 'description': 'Using mutexes in frequently called functions can impact performance.',
230
+ 'suggestion': 'Consider using atomic operations or redesigning for less contention.',
231
+ },
232
+ {
233
+ 'name': 'Inefficient map iteration',
234
+ 'pattern': r'for\s+\w+,\s*_\s*:=\s*range',
235
+ 'severity': 'low',
236
+ 'description': 'Iterating over maps when only keys are needed can be inefficient.',
237
+ 'suggestion': 'Use a slice for ordered data when possible.',
238
+ },
239
+ ]
240
+
241
+ # Rust performance patterns
242
+ self.rust_patterns = [
243
+ {
244
+ 'name': 'Inefficient string operations',
245
+ 'pattern': r'for\s+.*?\{.*?\.push_str\(.*?\}',
246
+ 'severity': 'medium',
247
+ 'description': 'Repeatedly pushing to strings can be inefficient.',
248
+ 'suggestion': 'Use string concatenation with the format! macro or String::with_capacity().',
249
+ },
250
+ {
251
+ 'name': 'Excessive cloning',
252
+ 'pattern': r'\.clone\(\)',
253
+ 'severity': 'medium',
254
+ 'description': 'Excessive cloning can impact performance.',
255
+ 'suggestion': 'Use references or ownership transfer where possible.',
256
+ },
257
+ {
258
+ 'name': 'Inefficient vector operations',
259
+ 'pattern': r'for\s+.*?\{.*?\.push\(.*?\}',
260
+ 'severity': 'medium',
261
+ 'description': 'Repeatedly pushing to vectors can cause multiple allocations.',
262
+ 'suggestion': 'Pre-allocate vectors with Vec::with_capacity() when the size is known.',
263
+ },
264
+ {
265
+ 'name': 'Box allocation in loops',
266
+ 'pattern': r'for\s+.*?\{.*?Box::new\(.*?\}',
267
+ 'severity': 'medium',
268
+ 'description': 'Allocating boxes in loops can be inefficient.',
269
+ 'suggestion': 'Allocate memory outside the loop when possible.',
270
+ },
271
+ ]
272
+
273
+ def analyze_repository(self, repo_path, languages):
274
+ """
275
+ Analyze code performance in a repository for the specified languages using parallel processing.
276
+
277
+ Args:
278
+ repo_path (str): The path to the repository.
279
+ languages (list): A list of programming languages to analyze.
280
+
281
+ Returns:
282
+ dict: A dictionary containing performance analysis results for each language.
283
+ """
284
+ logger.info(f"Analyzing performance in repository at {repo_path} for languages: {languages}")
285
+
286
+ results = {}
287
+
288
+ # Define a function to analyze a single language
289
+ def analyze_language(language):
290
+ if language in self.analyzers:
291
+ try:
292
+ logger.info(f"Analyzing {language} code performance in {repo_path}")
293
+ return language, self.analyzers[language](repo_path)
294
+ except Exception as e:
295
+ logger.error(f"Error analyzing {language} code performance: {e}")
296
+ return language, {
297
+ 'status': 'error',
298
+ 'error': str(e),
299
+ 'issues': [],
300
+ }
301
+ else:
302
+ logger.warning(f"No performance analyzer available for {language}")
303
+ return language, {
304
+ 'status': 'not_supported',
305
+ 'message': f"Performance analysis for {language} is not supported yet.",
306
+ 'issues': [],
307
+ }
308
+
309
+ # Use ThreadPoolExecutor to analyze languages in parallel
310
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor:
311
+ # Submit all language analysis tasks
312
+ future_to_language = {executor.submit(analyze_language, language): language for language in languages}
313
+
314
+ # Process results as they complete
315
+ for future in concurrent.futures.as_completed(future_to_language):
316
+ language = future_to_language[future]
317
+ try:
318
+ lang, result = future.result()
319
+ results[lang] = result
320
+ logger.info(f"Completed performance analysis for {lang}")
321
+ except Exception as e:
322
+ logger.error(f"Exception occurred during performance analysis of {language}: {e}")
323
+ results[language] = {
324
+ 'status': 'error',
325
+ 'error': str(e),
326
+ 'issues': [],
327
+ }
328
+
329
+ # Identify hotspots (files with multiple performance issues)
330
+ hotspots = self._identify_hotspots(results)
331
+
332
+ return {
333
+ 'language_results': results,
334
+ 'hotspots': hotspots,
335
+ }
336
+
337
+ def _identify_hotspots(self, results):
338
+ """
339
+ Identify performance hotspots across all languages.
340
+
341
+ Args:
342
+ results (dict): Performance analysis results for each language.
343
+
344
+ Returns:
345
+ list: A list of hotspot files with multiple performance issues.
346
+ """
347
+ # Count issues per file across all languages
348
+ file_issue_count = defaultdict(int)
349
+ file_issues = defaultdict(list)
350
+
351
+ for language, language_result in results.items():
352
+ for issue in language_result.get('issues', []):
353
+ file_path = issue.get('file', '')
354
+ if file_path:
355
+ file_issue_count[file_path] += 1
356
+ file_issues[file_path].append(issue)
357
+
358
+ # Identify hotspots (files with multiple issues)
359
+ hotspots = []
360
+ for file_path, count in sorted(file_issue_count.items(), key=lambda x: x[1], reverse=True):
361
+ if count >= 2: # Files with at least 2 issues are considered hotspots
362
+ hotspots.append({
363
+ 'file': file_path,
364
+ 'issue_count': count,
365
+ 'issues': file_issues[file_path],
366
+ })
367
+
368
+ return hotspots[:10] # Return top 10 hotspots
369
+
370
+ def _analyze_python_performance(self, repo_path):
371
+ """
372
+ Analyze Python code for performance issues.
373
+
374
+ Args:
375
+ repo_path (str): The path to the repository.
376
+
377
+ Returns:
378
+ dict: Performance analysis results for Python code.
379
+ """
380
+ logger.info(f"Analyzing Python code performance in {repo_path}")
381
+
382
+ # Find Python files
383
+ python_files = []
384
+ for root, _, files in os.walk(repo_path):
385
+ for file in files:
386
+ if file.endswith('.py'):
387
+ python_files.append(os.path.join(root, file))
388
+
389
+ if not python_files:
390
+ return {
391
+ 'status': 'no_files',
392
+ 'message': 'No Python files found in the repository.',
393
+ 'issues': [],
394
+ }
395
+
396
+ # Analyze each Python file
397
+ issues = []
398
+ for file_path in python_files:
399
+ try:
400
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
401
+ content = f.read()
402
+
403
+ # Check for performance patterns
404
+ for pattern in self.python_patterns:
405
+ matches = re.finditer(pattern['pattern'], content)
406
+ for match in matches:
407
+ line_number = content[:match.start()].count('\n') + 1
408
+ code_snippet = match.group(0)
409
+
410
+ issues.append({
411
+ 'file': file_path,
412
+ 'line': line_number,
413
+ 'code': code_snippet,
414
+ 'issue': pattern['name'],
415
+ 'description': pattern['description'],
416
+ 'suggestion': pattern['suggestion'],
417
+ 'severity': pattern['severity'],
418
+ 'language': 'Python',
419
+ })
420
+ except Exception as e:
421
+ logger.error(f"Error analyzing Python file {file_path}: {e}")
422
+
423
+ # Group issues by severity
424
+ issues_by_severity = defaultdict(list)
425
+ for issue in issues:
426
+ severity = issue.get('severity', 'unknown')
427
+ issues_by_severity[severity].append(issue)
428
+
429
+ return {
430
+ 'status': 'success',
431
+ 'issues': issues,
432
+ 'issues_by_severity': dict(issues_by_severity),
433
+ 'issue_count': len(issues),
434
+ 'files_analyzed': len(python_files),
435
+ }
436
+
437
+ def _analyze_javascript_performance(self, repo_path):
438
+ """
439
+ Analyze JavaScript code for performance issues.
440
+
441
+ Args:
442
+ repo_path (str): The path to the repository.
443
+
444
+ Returns:
445
+ dict: Performance analysis results for JavaScript code.
446
+ """
447
+ logger.info(f"Analyzing JavaScript code performance in {repo_path}")
448
+
449
+ # Find JavaScript files
450
+ js_files = []
451
+ for root, _, files in os.walk(repo_path):
452
+ if 'node_modules' in root:
453
+ continue
454
+ for file in files:
455
+ if file.endswith(('.js', '.jsx')):
456
+ js_files.append(os.path.join(root, file))
457
+
458
+ if not js_files:
459
+ return {
460
+ 'status': 'no_files',
461
+ 'message': 'No JavaScript files found in the repository.',
462
+ 'issues': [],
463
+ }
464
+
465
+ # Analyze each JavaScript file
466
+ issues = []
467
+ for file_path in js_files:
468
+ try:
469
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
470
+ content = f.read()
471
+
472
+ # Check for performance patterns
473
+ for pattern in self.javascript_patterns:
474
+ matches = re.finditer(pattern['pattern'], content)
475
+ for match in matches:
476
+ line_number = content[:match.start()].count('\n') + 1
477
+ code_snippet = match.group(0)
478
+
479
+ issues.append({
480
+ 'file': file_path,
481
+ 'line': line_number,
482
+ 'code': code_snippet,
483
+ 'issue': pattern['name'],
484
+ 'description': pattern['description'],
485
+ 'suggestion': pattern['suggestion'],
486
+ 'severity': pattern['severity'],
487
+ 'language': 'JavaScript',
488
+ })
489
+ except Exception as e:
490
+ logger.error(f"Error analyzing JavaScript file {file_path}: {e}")
491
+
492
+ # Group issues by severity
493
+ issues_by_severity = defaultdict(list)
494
+ for issue in issues:
495
+ severity = issue.get('severity', 'unknown')
496
+ issues_by_severity[severity].append(issue)
497
+
498
+ return {
499
+ 'status': 'success',
500
+ 'issues': issues,
501
+ 'issues_by_severity': dict(issues_by_severity),
502
+ 'issue_count': len(issues),
503
+ 'files_analyzed': len(js_files),
504
+ }
505
+
506
+ def _analyze_typescript_performance(self, repo_path):
507
+ """
508
+ Analyze TypeScript code for performance issues.
509
+
510
+ Args:
511
+ repo_path (str): The path to the repository.
512
+
513
+ Returns:
514
+ dict: Performance analysis results for TypeScript code.
515
+ """
516
+ logger.info(f"Analyzing TypeScript code performance in {repo_path}")
517
+
518
+ # Find TypeScript files
519
+ ts_files = []
520
+ for root, _, files in os.walk(repo_path):
521
+ if 'node_modules' in root:
522
+ continue
523
+ for file in files:
524
+ if file.endswith(('.ts', '.tsx')):
525
+ ts_files.append(os.path.join(root, file))
526
+
527
+ if not ts_files:
528
+ return {
529
+ 'status': 'no_files',
530
+ 'message': 'No TypeScript files found in the repository.',
531
+ 'issues': [],
532
+ }
533
+
534
+ # Analyze each TypeScript file
535
+ issues = []
536
+ for file_path in ts_files:
537
+ try:
538
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
539
+ content = f.read()
540
+
541
+ # Check for performance patterns
542
+ for pattern in self.typescript_patterns:
543
+ matches = re.finditer(pattern['pattern'], content)
544
+ for match in matches:
545
+ line_number = content[:match.start()].count('\n') + 1
546
+ code_snippet = match.group(0)
547
+
548
+ issues.append({
549
+ 'file': file_path,
550
+ 'line': line_number,
551
+ 'code': code_snippet,
552
+ 'issue': pattern['name'],
553
+ 'description': pattern['description'],
554
+ 'suggestion': pattern['suggestion'],
555
+ 'severity': pattern['severity'],
556
+ 'language': 'TypeScript',
557
+ })
558
+ except Exception as e:
559
+ logger.error(f"Error analyzing TypeScript file {file_path}: {e}")
560
+
561
+ # Group issues by severity
562
+ issues_by_severity = defaultdict(list)
563
+ for issue in issues:
564
+ severity = issue.get('severity', 'unknown')
565
+ issues_by_severity[severity].append(issue)
566
+
567
+ return {
568
+ 'status': 'success',
569
+ 'issues': issues,
570
+ 'issues_by_severity': dict(issues_by_severity),
571
+ 'issue_count': len(issues),
572
+ 'files_analyzed': len(ts_files),
573
+ }
574
+
575
+ def _analyze_java_performance(self, repo_path):
576
+ """
577
+ Analyze Java code for performance issues.
578
+
579
+ Args:
580
+ repo_path (str): The path to the repository.
581
+
582
+ Returns:
583
+ dict: Performance analysis results for Java code.
584
+ """
585
+ logger.info(f"Analyzing Java code performance in {repo_path}")
586
+
587
+ # Find Java files
588
+ java_files = []
589
+ for root, _, files in os.walk(repo_path):
590
+ for file in files:
591
+ if file.endswith('.java'):
592
+ java_files.append(os.path.join(root, file))
593
+
594
+ if not java_files:
595
+ return {
596
+ 'status': 'no_files',
597
+ 'message': 'No Java files found in the repository.',
598
+ 'issues': [],
599
+ }
600
+
601
+ # Analyze each Java file
602
+ issues = []
603
+ for file_path in java_files:
604
+ try:
605
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
606
+ content = f.read()
607
+
608
+ # Check for performance patterns
609
+ for pattern in self.java_patterns:
610
+ matches = re.finditer(pattern['pattern'], content)
611
+ for match in matches:
612
+ line_number = content[:match.start()].count('\n') + 1
613
+ code_snippet = match.group(0)
614
+
615
+ issues.append({
616
+ 'file': file_path,
617
+ 'line': line_number,
618
+ 'code': code_snippet,
619
+ 'issue': pattern['name'],
620
+ 'description': pattern['description'],
621
+ 'suggestion': pattern['suggestion'],
622
+ 'severity': pattern['severity'],
623
+ 'language': 'Java',
624
+ })
625
+ except Exception as e:
626
+ logger.error(f"Error analyzing Java file {file_path}: {e}")
627
+
628
+ # Group issues by severity
629
+ issues_by_severity = defaultdict(list)
630
+ for issue in issues:
631
+ severity = issue.get('severity', 'unknown')
632
+ issues_by_severity[severity].append(issue)
633
+
634
+ return {
635
+ 'status': 'success',
636
+ 'issues': issues,
637
+ 'issues_by_severity': dict(issues_by_severity),
638
+ 'issue_count': len(issues),
639
+ 'files_analyzed': len(java_files),
640
+ }
641
+
642
+ def _analyze_go_performance(self, repo_path):
643
+ """
644
+ Analyze Go code for performance issues.
645
+
646
+ Args:
647
+ repo_path (str): The path to the repository.
648
+
649
+ Returns:
650
+ dict: Performance analysis results for Go code.
651
+ """
652
+ logger.info(f"Analyzing Go code performance in {repo_path}")
653
+
654
+ # Find Go files
655
+ go_files = []
656
+ for root, _, files in os.walk(repo_path):
657
+ for file in files:
658
+ if file.endswith('.go'):
659
+ go_files.append(os.path.join(root, file))
660
+
661
+ if not go_files:
662
+ return {
663
+ 'status': 'no_files',
664
+ 'message': 'No Go files found in the repository.',
665
+ 'issues': [],
666
+ }
667
+
668
+ # Analyze each Go file
669
+ issues = []
670
+ for file_path in go_files:
671
+ try:
672
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
673
+ content = f.read()
674
+
675
+ # Check for performance patterns
676
+ for pattern in self.go_patterns:
677
+ matches = re.finditer(pattern['pattern'], content)
678
+ for match in matches:
679
+ line_number = content[:match.start()].count('\n') + 1
680
+ code_snippet = match.group(0)
681
+
682
+ issues.append({
683
+ 'file': file_path,
684
+ 'line': line_number,
685
+ 'code': code_snippet,
686
+ 'issue': pattern['name'],
687
+ 'description': pattern['description'],
688
+ 'suggestion': pattern['suggestion'],
689
+ 'severity': pattern['severity'],
690
+ 'language': 'Go',
691
+ })
692
+ except Exception as e:
693
+ logger.error(f"Error analyzing Go file {file_path}: {e}")
694
+
695
+ # Group issues by severity
696
+ issues_by_severity = defaultdict(list)
697
+ for issue in issues:
698
+ severity = issue.get('severity', 'unknown')
699
+ issues_by_severity[severity].append(issue)
700
+
701
+ return {
702
+ 'status': 'success',
703
+ 'issues': issues,
704
+ 'issues_by_severity': dict(issues_by_severity),
705
+ 'issue_count': len(issues),
706
+ 'files_analyzed': len(go_files),
707
+ }
708
+
709
+ def _analyze_rust_performance(self, repo_path):
710
+ """
711
+ Analyze Rust code for performance issues.
712
+
713
+ Args:
714
+ repo_path (str): The path to the repository.
715
+
716
+ Returns:
717
+ dict: Performance analysis results for Rust code.
718
+ """
719
+ logger.info(f"Analyzing Rust code performance in {repo_path}")
720
+
721
+ # Find Rust files
722
+ rust_files = []
723
+ for root, _, files in os.walk(repo_path):
724
+ for file in files:
725
+ if file.endswith('.rs'):
726
+ rust_files.append(os.path.join(root, file))
727
+
728
+ if not rust_files:
729
+ return {
730
+ 'status': 'no_files',
731
+ 'message': 'No Rust files found in the repository.',
732
+ 'issues': [],
733
+ }
734
+
735
+ # Analyze each Rust file
736
+ issues = []
737
+ for file_path in rust_files:
738
+ try:
739
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
740
+ content = f.read()
741
+
742
+ # Check for performance patterns
743
+ for pattern in self.rust_patterns:
744
+ matches = re.finditer(pattern['pattern'], content)
745
+ for match in matches:
746
+ line_number = content[:match.start()].count('\n') + 1
747
+ code_snippet = match.group(0)
748
+
749
+ issues.append({
750
+ 'file': file_path,
751
+ 'line': line_number,
752
+ 'code': code_snippet,
753
+ 'issue': pattern['name'],
754
+ 'description': pattern['description'],
755
+ 'suggestion': pattern['suggestion'],
756
+ 'severity': pattern['severity'],
757
+ 'language': 'Rust',
758
+ })
759
+ except Exception as e:
760
+ logger.error(f"Error analyzing Rust file {file_path}: {e}")
761
+
762
+ # Group issues by severity
763
+ issues_by_severity = defaultdict(list)
764
+ for issue in issues:
765
+ severity = issue.get('severity', 'unknown')
766
+ issues_by_severity[severity].append(issue)
767
+
768
+ return {
769
+ 'status': 'success',
770
+ 'issues': issues,
771
+ 'issues_by_severity': dict(issues_by_severity),
772
+ 'issue_count': len(issues),
773
+ 'files_analyzed': len(rust_files),
774
+ }
src/services/report_generator.py ADDED
@@ -0,0 +1,782 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Report Generator Service
6
+
7
+ This module provides functionality for generating comprehensive code review reports
8
+ in various formats based on the analysis results.
9
+ """
10
+
11
+ import os
12
+ import json
13
+ import logging
14
+ import datetime
15
+ from pathlib import Path
16
+ import markdown
17
+ import pdfkit
18
+ import csv
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class ReportGenerator:
23
+ """
24
+ Service for generating code review reports in various formats.
25
+ """
26
+
27
+ def __init__(self, output_dir="reports"):
28
+ """
29
+ Initialize the ReportGenerator.
30
+
31
+ Args:
32
+ output_dir (str): Directory to save generated reports.
33
+ """
34
+ self.output_dir = output_dir
35
+ os.makedirs(output_dir, exist_ok=True)
36
+ logger.info(f"Initialized ReportGenerator with output directory: {output_dir}")
37
+
38
+ def generate_report(self, repo_name, results, format_type="all"):
39
+ """
40
+ Generate a report based on the analysis results.
41
+
42
+ Args:
43
+ repo_name (str): Name of the repository.
44
+ results (dict): Analysis results.
45
+ format_type (str): Report format type (json, html, pdf, csv, or all).
46
+
47
+ Returns:
48
+ dict: Paths to the generated reports.
49
+ """
50
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
51
+ report_name = f"{repo_name}_{timestamp}"
52
+ report_paths = {}
53
+
54
+ # Create report content
55
+ report_content = self._create_report_content(repo_name, results)
56
+
57
+ # Generate reports in requested formats
58
+ if format_type in ["json", "all"]:
59
+ json_path = self._generate_json_report(report_name, report_content)
60
+ report_paths["json"] = json_path
61
+
62
+ if format_type in ["html", "all"]:
63
+ html_path = self._generate_html_report(report_name, report_content)
64
+ report_paths["html"] = html_path
65
+
66
+ if format_type in ["pdf", "all"]:
67
+ pdf_path = self._generate_pdf_report(report_name, report_content)
68
+ report_paths["pdf"] = pdf_path
69
+
70
+ if format_type in ["csv", "all"]:
71
+ csv_path = self._generate_csv_report(report_name, report_content)
72
+ report_paths["csv"] = csv_path
73
+
74
+ logger.info(f"Generated {len(report_paths)} report(s) for {repo_name}")
75
+ return report_paths
76
+
77
+ def _create_report_content(self, repo_name, results):
78
+ """
79
+ Create the content for the report.
80
+
81
+ Args:
82
+ repo_name (str): Name of the repository.
83
+ results (dict): Analysis results.
84
+
85
+ Returns:
86
+ dict: Structured report content.
87
+ """
88
+ # Extract repository info and metrics
89
+ repo_info = results.get("repository_info", {})
90
+ total_files = repo_info.get("file_count", 0)
91
+ repo_size = repo_info.get("size_bytes", 0)
92
+
93
+ # Extract code analysis results
94
+ code_analysis = results.get("code_analysis", {})
95
+ total_code_issues = sum(len(lang_result.get("issues", [])) for lang_result in code_analysis.values())
96
+ critical_code_issues = sum(1 for lang_result in code_analysis.values()
97
+ for issue in lang_result.get("issues", [])
98
+ if issue.get("severity", "").lower() == "critical")
99
+
100
+ # Extract security scan results
101
+ security_scan = results.get("security", {})
102
+ total_vulnerabilities = sum(len(lang_result.get("vulnerabilities", []))
103
+ for lang_result in security_scan.get("vulnerabilities_by_language", {}).values())
104
+ critical_vulnerabilities = len(security_scan.get("critical_vulnerabilities", []))
105
+
106
+ # Extract performance analysis results
107
+ performance_analysis = results.get("performance", {})
108
+ total_performance_issues = sum(len(lang_result.get("issues", []))
109
+ for lang_result in performance_analysis.get("issues_by_language", {}).values())
110
+ performance_hotspots = len(performance_analysis.get("hotspots", []))
111
+
112
+ # Calculate overall score and rating
113
+ max_score = 100
114
+ deductions = {
115
+ "code_issues": total_code_issues * 2,
116
+ "critical_code_issues": critical_code_issues * 5,
117
+ "vulnerabilities": total_vulnerabilities * 3,
118
+ "critical_vulnerabilities": critical_vulnerabilities * 10,
119
+ "performance_issues": total_performance_issues * 2,
120
+ "performance_hotspots": performance_hotspots * 3
121
+ }
122
+ overall_score = max(0, max_score - sum(deductions.values()))
123
+
124
+ quality_ratings = [
125
+ (95, "Excellent"),
126
+ (85, "Very Good"),
127
+ (75, "Good"),
128
+ (65, "Fair"),
129
+ (0, "Poor")
130
+ ]
131
+ quality_rating = next(rating for threshold, rating in quality_ratings if overall_score >= threshold)
132
+
133
+ # Extract language breakdown
134
+ language_breakdown = {}
135
+ for language in code_analysis.keys():
136
+ if code_analysis[language].get("status") != "error":
137
+ language_breakdown[language] = {
138
+ "files": len([f for f in code_analysis[language].get("issues", []) if "file" in f]),
139
+ "lines": code_analysis[language].get("total_lines", 0),
140
+ "percentage": code_analysis[language].get("percentage", 0),
141
+ "issues": len(code_analysis[language].get("issues", []))
142
+ }
143
+
144
+ # Extract AI review results
145
+ ai_review = results.get("ai_review", {})
146
+
147
+ # Calculate summary metrics
148
+ summary_metrics = self._calculate_summary_metrics(results)
149
+
150
+ # Create report structure
151
+ report = {
152
+ "metadata": {
153
+ "repository_name": repo_name,
154
+ "report_date": datetime.datetime.now().isoformat(),
155
+ "repository_info": repo_info,
156
+ },
157
+ "summary": {
158
+ "metrics": summary_metrics,
159
+ "language_breakdown": language_breakdown,
160
+ "executive_summary": ai_review.get("summary", "No AI review summary available."),
161
+ },
162
+ "code_quality": {
163
+ "issues_by_language": code_analysis,
164
+ "top_issues": self._extract_top_issues(code_analysis),
165
+ },
166
+ "security": {
167
+ "vulnerabilities_by_language": security_scan,
168
+ "critical_vulnerabilities": self._extract_critical_vulnerabilities(security_scan),
169
+ },
170
+ "performance": {
171
+ "issues_by_language": performance_analysis.get("language_results", {}),
172
+ "hotspots": performance_analysis.get("hotspots", []),
173
+ },
174
+ "ai_review": {
175
+ "file_reviews": ai_review.get("reviews", {}),
176
+ "summary": ai_review.get("summary", "No AI review summary available."),
177
+ },
178
+ "recommendations": self._generate_recommendations(results),
179
+ }
180
+
181
+ return report
182
+
183
+ def _calculate_summary_metrics(self, results):
184
+ """
185
+ Calculate summary metrics from the analysis results.
186
+
187
+ Args:
188
+ results (dict): Analysis results.
189
+
190
+ Returns:
191
+ dict: Summary metrics.
192
+ """
193
+ metrics = {
194
+ "total_files": results.get("repository_info", {}).get("file_count", 0),
195
+ "repository_size": results.get("repository_info", {}).get("size", 0),
196
+ }
197
+
198
+ # Count code quality issues
199
+ code_analysis = results.get("code_analysis", {})
200
+ total_issues = 0
201
+ critical_issues = 0
202
+ for language, language_results in code_analysis.items():
203
+ total_issues += language_results.get("issue_count", 0)
204
+ for issue in language_results.get("issues", []):
205
+ if issue.get("severity", "").lower() in ["critical", "high"]:
206
+ critical_issues += 1
207
+
208
+ metrics["total_code_issues"] = total_issues
209
+ metrics["critical_code_issues"] = critical_issues
210
+
211
+ # Count security vulnerabilities
212
+ security_scan = results.get("security_scan", {})
213
+ total_vulnerabilities = 0
214
+ critical_vulnerabilities = 0
215
+ for language, language_results in security_scan.items():
216
+ total_vulnerabilities += language_results.get("vulnerability_count", 0)
217
+ for vuln in language_results.get("vulnerabilities", []):
218
+ if vuln.get("severity", "").lower() in ["critical", "high"]:
219
+ critical_vulnerabilities += 1
220
+
221
+ metrics["total_vulnerabilities"] = total_vulnerabilities
222
+ metrics["critical_vulnerabilities"] = critical_vulnerabilities
223
+
224
+ # Count performance issues
225
+ performance_analysis = results.get("performance_analysis", {})
226
+ total_performance_issues = 0
227
+ for language, language_results in performance_analysis.get("language_results", {}).items():
228
+ total_performance_issues += language_results.get("issue_count", 0)
229
+
230
+ metrics["total_performance_issues"] = total_performance_issues
231
+ metrics["performance_hotspots"] = len(performance_analysis.get("hotspots", []))
232
+
233
+ # Calculate overall score (0-100)
234
+ # This is a simple scoring algorithm that can be refined
235
+ base_score = 100
236
+ deductions = 0
237
+
238
+ # Deduct for code issues (more weight for critical issues)
239
+ if metrics["total_files"] > 0:
240
+ code_issue_ratio = metrics["total_code_issues"] / metrics["total_files"]
241
+ deductions += min(30, code_issue_ratio * 100)
242
+ deductions += min(20, (metrics["critical_code_issues"] / metrics["total_files"]) * 200)
243
+
244
+ # Deduct for security vulnerabilities (heavy weight for critical vulnerabilities)
245
+ if metrics["total_files"] > 0:
246
+ deductions += min(30, (metrics["total_vulnerabilities"] / metrics["total_files"]) * 150)
247
+ deductions += min(40, (metrics["critical_vulnerabilities"] / metrics["total_files"]) * 300)
248
+
249
+ # Deduct for performance issues
250
+ if metrics["total_files"] > 0:
251
+ deductions += min(20, (metrics["total_performance_issues"] / metrics["total_files"]) * 80)
252
+ deductions += min(10, (metrics["performance_hotspots"] / metrics["total_files"]) * 100)
253
+
254
+ metrics["overall_score"] = max(0, min(100, base_score - deductions))
255
+
256
+ # Determine quality rating based on score
257
+ if metrics["overall_score"] >= 90:
258
+ metrics["quality_rating"] = "Excellent"
259
+ elif metrics["overall_score"] >= 80:
260
+ metrics["quality_rating"] = "Good"
261
+ elif metrics["overall_score"] >= 70:
262
+ metrics["quality_rating"] = "Satisfactory"
263
+ elif metrics["overall_score"] >= 50:
264
+ metrics["quality_rating"] = "Needs Improvement"
265
+ else:
266
+ metrics["quality_rating"] = "Poor"
267
+
268
+ return metrics
269
+
270
+ def _extract_top_issues(self, code_analysis, limit=10):
271
+ """
272
+ Extract the top code quality issues from the analysis results.
273
+
274
+ Args:
275
+ code_analysis (dict): Code analysis results.
276
+ limit (int): Maximum number of issues to extract.
277
+
278
+ Returns:
279
+ list: Top code quality issues.
280
+ """
281
+ all_issues = []
282
+
283
+ for language, language_results in code_analysis.items():
284
+ for issue in language_results.get("issues", []):
285
+ # Add language to the issue
286
+ issue["language"] = language
287
+ all_issues.append(issue)
288
+
289
+ # Sort issues by severity and then by line count if available
290
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
291
+
292
+ def issue_sort_key(issue):
293
+ severity = issue.get("severity", "").lower()
294
+ severity_value = severity_order.get(severity, 5)
295
+ return (severity_value, -issue.get("line_count", 0))
296
+
297
+ sorted_issues = sorted(all_issues, key=issue_sort_key)
298
+
299
+ return sorted_issues[:limit]
300
+
301
+ def _extract_critical_vulnerabilities(self, security_scan, limit=10):
302
+ """
303
+ Extract critical security vulnerabilities from the scan results.
304
+
305
+ Args:
306
+ security_scan (dict): Security scan results.
307
+ limit (int): Maximum number of vulnerabilities to extract.
308
+
309
+ Returns:
310
+ list: Critical security vulnerabilities.
311
+ """
312
+ all_vulnerabilities = []
313
+
314
+ for language, language_results in security_scan.items():
315
+ for vuln in language_results.get("vulnerabilities", []):
316
+ # Add language to the vulnerability
317
+ vuln["language"] = language
318
+ all_vulnerabilities.append(vuln)
319
+
320
+ # Sort vulnerabilities by severity
321
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
322
+
323
+ def vuln_sort_key(vuln):
324
+ severity = vuln.get("severity", "").lower()
325
+ severity_value = severity_order.get(severity, 5)
326
+ return severity_value
327
+
328
+ sorted_vulnerabilities = sorted(all_vulnerabilities, key=vuln_sort_key)
329
+
330
+ return sorted_vulnerabilities[:limit]
331
+
332
+ def _generate_recommendations(self, results):
333
+ """
334
+ Generate recommendations based on the analysis results.
335
+
336
+ Args:
337
+ results (dict): Analysis results.
338
+
339
+ Returns:
340
+ dict: Recommendations categorized by priority.
341
+ """
342
+ recommendations = {
343
+ "high_priority": [],
344
+ "medium_priority": [],
345
+ "low_priority": [],
346
+ }
347
+
348
+ # Extract critical security vulnerabilities as high priority recommendations
349
+ security_scan = results.get("security_scan", {})
350
+ for language, language_results in security_scan.items():
351
+ for vuln in language_results.get("vulnerabilities", []):
352
+ if vuln.get("severity", "").lower() in ["critical", "high"]:
353
+ recommendations["high_priority"].append({
354
+ "type": "security",
355
+ "language": language,
356
+ "issue": vuln.get("issue", "Unknown vulnerability"),
357
+ "description": vuln.get("description", ""),
358
+ "file": vuln.get("file", ""),
359
+ "line": vuln.get("line", ""),
360
+ "recommendation": vuln.get("recommendation", "Fix this security vulnerability."),
361
+ })
362
+
363
+ # Extract critical code quality issues as medium priority recommendations
364
+ code_analysis = results.get("code_analysis", {})
365
+ for language, language_results in code_analysis.items():
366
+ for issue in language_results.get("issues", []):
367
+ if issue.get("severity", "").lower() in ["critical", "high"]:
368
+ recommendations["medium_priority"].append({
369
+ "type": "code_quality",
370
+ "language": language,
371
+ "issue": issue.get("issue", "Unknown issue"),
372
+ "description": issue.get("description", ""),
373
+ "file": issue.get("file", ""),
374
+ "line": issue.get("line", ""),
375
+ "recommendation": issue.get("recommendation", "Address this code quality issue."),
376
+ })
377
+
378
+ # Extract performance hotspots as medium priority recommendations
379
+ performance_analysis = results.get("performance_analysis", {})
380
+ for hotspot in performance_analysis.get("hotspots", []):
381
+ recommendations["medium_priority"].append({
382
+ "type": "performance",
383
+ "language": hotspot.get("language", ""),
384
+ "issue": "Performance Hotspot",
385
+ "description": f"File contains {hotspot.get('issue_count', 0)} performance issues",
386
+ "file": hotspot.get("file", ""),
387
+ "recommendation": "Optimize this file to improve performance.",
388
+ })
389
+
390
+ # Extract other performance issues as low priority recommendations
391
+ for language, language_results in performance_analysis.get("language_results", {}).items():
392
+ for issue in language_results.get("issues", []):
393
+ # Skip issues that are already part of hotspots
394
+ if any(hotspot.get("file", "") == issue.get("file", "") for hotspot in performance_analysis.get("hotspots", [])):
395
+ continue
396
+
397
+ recommendations["low_priority"].append({
398
+ "type": "performance",
399
+ "language": language,
400
+ "issue": issue.get("issue", "Unknown issue"),
401
+ "description": issue.get("description", ""),
402
+ "file": issue.get("file", ""),
403
+ "line": issue.get("line", ""),
404
+ "recommendation": issue.get("recommendation", "Consider optimizing this code."),
405
+ })
406
+
407
+ # Extract AI review suggestions as recommendations
408
+ ai_review = results.get("ai_review", {})
409
+ for file_path, review in ai_review.get("reviews", {}).items():
410
+ for suggestion in review.get("suggestions", []):
411
+ priority = "medium_priority"
412
+ if "security" in suggestion.get("section", "").lower():
413
+ priority = "high_priority"
414
+ elif "performance" in suggestion.get("section", "").lower():
415
+ priority = "low_priority"
416
+
417
+ recommendations[priority].append({
418
+ "type": "ai_review",
419
+ "language": "", # AI review doesn't specify language
420
+ "issue": suggestion.get("section", "AI Suggestion"),
421
+ "description": suggestion.get("description", ""),
422
+ "file": file_path,
423
+ "line": suggestion.get("line", ""),
424
+ "recommendation": suggestion.get("details", ""),
425
+ })
426
+
427
+ # Limit the number of recommendations in each category
428
+ limit = 15
429
+ recommendations["high_priority"] = recommendations["high_priority"][:limit]
430
+ recommendations["medium_priority"] = recommendations["medium_priority"][:limit]
431
+ recommendations["low_priority"] = recommendations["low_priority"][:limit]
432
+
433
+ return recommendations
434
+
435
+ def _generate_json_report(self, report_name, report_content):
436
+ """
437
+ Generate a JSON report.
438
+
439
+ Args:
440
+ report_name (str): Name of the report.
441
+ report_content (dict): Report content.
442
+
443
+ Returns:
444
+ str: Path to the generated report.
445
+ """
446
+ report_path = os.path.join(self.output_dir, f"{report_name}.json")
447
+
448
+ with open(report_path, "w", encoding="utf-8") as f:
449
+ json.dump(report_content, f, indent=2, ensure_ascii=False)
450
+
451
+ logger.info(f"Generated JSON report: {report_path}")
452
+ return report_path
453
+
454
+ def _generate_html_report(self, report_name, report_content):
455
+ """
456
+ Generate an HTML report.
457
+
458
+ Args:
459
+ report_name (str): Name of the report.
460
+ report_content (dict): Report content.
461
+
462
+ Returns:
463
+ str: Path to the generated report.
464
+ """
465
+ report_path = os.path.join(self.output_dir, f"{report_name}.html")
466
+
467
+ # Convert report content to markdown
468
+ md_content = self._convert_to_markdown(report_content)
469
+
470
+ # Convert markdown to HTML
471
+ html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"])
472
+
473
+ # Add CSS styling
474
+ html_content = f"""
475
+ <!DOCTYPE html>
476
+ <html>
477
+ <head>
478
+ <meta charset="utf-8">
479
+ <meta name="viewport" content="width=device-width, initial-scale=1">
480
+ <title>Code Review Report: {report_content['metadata']['repository_name']}</title>
481
+ <style>
482
+ body {{font-family: Arial, sans-serif; line-height: 1.6; max-width: 1200px; margin: 0 auto; padding: 20px;}}
483
+ h1, h2, h3, h4 {{color: #333; margin-top: 30px;}}
484
+ h1 {{border-bottom: 2px solid #333; padding-bottom: 10px;}}
485
+ h2 {{border-bottom: 1px solid #ccc; padding-bottom: 5px;}}
486
+ table {{border-collapse: collapse; width: 100%; margin: 20px 0;}}
487
+ th, td {{text-align: left; padding: 12px; border-bottom: 1px solid #ddd;}}
488
+ th {{background-color: #f2f2f2;}}
489
+ tr:hover {{background-color: #f5f5f5;}}
490
+ .metric-card {{background-color: #f9f9f9; border-radius: 5px; padding: 15px; margin: 10px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);}}
491
+ .metric-value {{font-size: 24px; font-weight: bold; color: #333;}}
492
+ .metric-label {{font-size: 14px; color: #666;}}
493
+ .severity-critical {{color: #d9534f; font-weight: bold;}}
494
+ .severity-high {{color: #f0ad4e; font-weight: bold;}}
495
+ .severity-medium {{color: #5bc0de; font-weight: bold;}}
496
+ .severity-low {{color: #5cb85c; font-weight: bold;}}
497
+ .metrics-container {{display: flex; flex-wrap: wrap; gap: 20px; justify-content: space-between;}}
498
+ .metric-card {{flex: 1; min-width: 200px;}}
499
+ pre {{background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto;}}
500
+ code {{font-family: Consolas, Monaco, 'Andale Mono', monospace; font-size: 14px;}}
501
+ .recommendation {{background-color: #f9f9f9; border-left: 4px solid #5bc0de; padding: 10px; margin: 10px 0;}}
502
+ .high-priority {{border-left-color: #d9534f;}}
503
+ .medium-priority {{border-left-color: #f0ad4e;}}
504
+ .low-priority {{border-left-color: #5cb85c;}}
505
+ </style>
506
+ </head>
507
+ <body>
508
+ {html_content}
509
+ </body>
510
+ </html>
511
+ """
512
+
513
+ with open(report_path, "w", encoding="utf-8") as f:
514
+ f.write(html_content)
515
+
516
+ logger.info(f"Generated HTML report: {report_path}")
517
+ return report_path
518
+
519
+ def _generate_pdf_report(self, report_name, report_content):
520
+ """
521
+ Generate a PDF report.
522
+
523
+ Args:
524
+ report_name (str): Name of the report.
525
+ report_content (dict): Report content.
526
+
527
+ Returns:
528
+ str: Path to the generated report.
529
+ """
530
+ report_path = os.path.join(self.output_dir, f"{report_name}.pdf")
531
+
532
+ # First generate HTML report
533
+ html_path = self._generate_html_report(f"{report_name}_temp", report_content)
534
+
535
+ try:
536
+ # Convert HTML to PDF using pdfkit
537
+ pdfkit.from_file(html_path, report_path)
538
+
539
+ # Remove temporary HTML file
540
+ os.remove(html_path)
541
+
542
+ logger.info(f"Generated PDF report: {report_path}")
543
+ return report_path
544
+
545
+ except Exception as e:
546
+ logger.error(f"Error generating PDF report: {e}")
547
+ return html_path
548
+
549
+ def _generate_csv_report(self, report_name, report_content):
550
+ """
551
+ Generate a CSV report with issues and recommendations.
552
+
553
+ Args:
554
+ report_name (str): Name of the report.
555
+ report_content (dict): Report content.
556
+
557
+ Returns:
558
+ str: Path to the generated report.
559
+ """
560
+ report_path = os.path.join(self.output_dir, f"{report_name}.csv")
561
+
562
+ # Collect all issues and recommendations
563
+ rows = []
564
+
565
+ # Add code quality issues
566
+ for language, language_results in report_content["code_quality"]["issues_by_language"].items():
567
+ for issue in language_results.get("issues", []):
568
+ rows.append({
569
+ "Type": "Code Quality",
570
+ "Language": language,
571
+ "Severity": issue.get("severity", ""),
572
+ "Issue": issue.get("issue", ""),
573
+ "Description": issue.get("description", ""),
574
+ "File": issue.get("file", ""),
575
+ "Line": issue.get("line", ""),
576
+ "Recommendation": issue.get("recommendation", ""),
577
+ })
578
+
579
+ # Add security vulnerabilities
580
+ for language, language_results in report_content["security"]["vulnerabilities_by_language"].items():
581
+ for vuln in language_results.get("vulnerabilities", []):
582
+ rows.append({
583
+ "Type": "Security",
584
+ "Language": language,
585
+ "Severity": vuln.get("severity", ""),
586
+ "Issue": vuln.get("issue", ""),
587
+ "Description": vuln.get("description", ""),
588
+ "File": vuln.get("file", ""),
589
+ "Line": vuln.get("line", ""),
590
+ "Recommendation": vuln.get("recommendation", ""),
591
+ })
592
+
593
+ # Add performance issues
594
+ for language, language_results in report_content["performance"]["issues_by_language"].items():
595
+ for issue in language_results.get("issues", []):
596
+ rows.append({
597
+ "Type": "Performance",
598
+ "Language": language,
599
+ "Severity": issue.get("severity", "Medium"),
600
+ "Issue": issue.get("issue", ""),
601
+ "Description": issue.get("description", ""),
602
+ "File": issue.get("file", ""),
603
+ "Line": issue.get("line", ""),
604
+ "Recommendation": issue.get("recommendation", ""),
605
+ })
606
+
607
+ # Add AI review suggestions
608
+ for file_path, review in report_content["ai_review"]["file_reviews"].items():
609
+ for suggestion in review.get("suggestions", []):
610
+ rows.append({
611
+ "Type": "AI Review",
612
+ "Language": "",
613
+ "Severity": "",
614
+ "Issue": suggestion.get("section", ""),
615
+ "Description": suggestion.get("description", ""),
616
+ "File": file_path,
617
+ "Line": suggestion.get("line", ""),
618
+ "Recommendation": suggestion.get("details", ""),
619
+ })
620
+
621
+ # Write to CSV
622
+ with open(report_path, "w", newline="", encoding="utf-8") as f:
623
+ fieldnames = ["Type", "Language", "Severity", "Issue", "Description", "File", "Line", "Recommendation"]
624
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
625
+ writer.writeheader()
626
+ writer.writerows(rows)
627
+
628
+ logger.info(f"Generated CSV report: {report_path}")
629
+ return report_path
630
+
631
+ def _convert_to_markdown(self, report_content):
632
+ """
633
+ Convert report content to markdown format.
634
+
635
+ Args:
636
+ report_content (dict): Report content.
637
+
638
+ Returns:
639
+ str: Markdown formatted report.
640
+ """
641
+ md = []
642
+
643
+ # Title and metadata
644
+ md.append(f"# Code Review Report: {report_content['metadata']['repository_name']}")
645
+ md.append(f"**Report Date:** {report_content['metadata']['report_date']}")
646
+ md.append("")
647
+
648
+ # Repository info
649
+ repo_info = report_content['metadata']['repository_info']
650
+ md.append("## Repository Information")
651
+ md.append(f"**Branch:** {repo_info.get('branch', 'N/A')}")
652
+ md.append(f"**Commit:** {repo_info.get('commit', 'N/A')}")
653
+ md.append(f"**Remote URL:** {repo_info.get('remote_url', 'N/A')}")
654
+ md.append(f"**Size:** {repo_info.get('size', 0)} bytes")
655
+ md.append(f"**File Count:** {repo_info.get('file_count', 0)}")
656
+ md.append("")
657
+
658
+ # Summary metrics
659
+ md.append("## Executive Summary")
660
+ metrics = report_content['summary']['metrics']
661
+ md.append(f"**Overall Score:** {metrics.get('overall_score', 0)}/100")
662
+ md.append(f"**Quality Rating:** {metrics.get('quality_rating', 'N/A')}")
663
+ md.append("")
664
+ md.append("### Key Metrics")
665
+ md.append("| Metric | Value |")
666
+ md.append("| ------ | ----- |")
667
+ md.append(f"| Total Files | {metrics.get('total_files', 0)} |")
668
+ md.append(f"| Code Quality Issues | {metrics.get('total_code_issues', 0)} |")
669
+ md.append(f"| Critical Code Issues | {metrics.get('critical_code_issues', 0)} |")
670
+ md.append(f"| Security Vulnerabilities | {metrics.get('total_vulnerabilities', 0)} |")
671
+ md.append(f"| Critical Vulnerabilities | {metrics.get('critical_vulnerabilities', 0)} |")
672
+ md.append(f"| Performance Issues | {metrics.get('total_performance_issues', 0)} |")
673
+ md.append(f"| Performance Hotspots | {metrics.get('performance_hotspots', 0)} |")
674
+ md.append("")
675
+
676
+ # Language breakdown
677
+ md.append("### Language Breakdown")
678
+ language_breakdown = report_content['summary']['language_breakdown']
679
+ md.append("| Language | Files | Lines | Percentage |")
680
+ md.append("| -------- | ----- | ----- | ---------- |")
681
+ for language, stats in language_breakdown.items():
682
+ md.append(f"| {language} | {stats.get('files', 0)} | {stats.get('lines', 0)} | {stats.get('percentage', 0)}% |")
683
+ md.append("")
684
+
685
+ # Executive summary from AI review
686
+ md.append("### Executive Summary")
687
+ md.append(report_content['summary']['executive_summary'])
688
+ md.append("")
689
+
690
+ # Code quality issues
691
+ md.append("## Code Quality Analysis")
692
+ md.append("### Top Issues")
693
+ top_issues = report_content['code_quality']['top_issues']
694
+ if top_issues:
695
+ md.append("| Severity | Language | Issue | File | Line |")
696
+ md.append("| -------- | -------- | ----- | ---- | ---- |")
697
+ for issue in top_issues:
698
+ md.append(f"| {issue.get('severity', 'N/A')} | {issue.get('language', 'N/A')} | {issue.get('issue', 'N/A')} | {issue.get('file', 'N/A')} | {issue.get('line', 'N/A')} |")
699
+ else:
700
+ md.append("No code quality issues found.")
701
+ md.append("")
702
+
703
+ # Security vulnerabilities
704
+ md.append("## Security Analysis")
705
+ md.append("### Critical Vulnerabilities")
706
+ critical_vulnerabilities = report_content['security']['critical_vulnerabilities']
707
+ if critical_vulnerabilities:
708
+ md.append("| Severity | Language | Vulnerability | File | Line |")
709
+ md.append("| -------- | -------- | ------------- | ---- | ---- |")
710
+ for vuln in critical_vulnerabilities:
711
+ md.append(f"| {vuln.get('severity', 'N/A')} | {vuln.get('language', 'N/A')} | {vuln.get('issue', 'N/A')} | {vuln.get('file', 'N/A')} | {vuln.get('line', 'N/A')} |")
712
+ else:
713
+ md.append("No critical security vulnerabilities found.")
714
+ md.append("")
715
+
716
+ # Performance analysis
717
+ md.append("## Performance Analysis")
718
+ md.append("### Performance Hotspots")
719
+ hotspots = report_content['performance']['hotspots']
720
+ if hotspots:
721
+ md.append("| Language | File | Issue Count |")
722
+ md.append("| -------- | ---- | ----------- |")
723
+ for hotspot in hotspots:
724
+ md.append(f"| {hotspot.get('language', 'N/A')} | {hotspot.get('file', 'N/A')} | {hotspot.get('issue_count', 0)} |")
725
+ else:
726
+ md.append("No performance hotspots found.")
727
+ md.append("")
728
+
729
+ # Recommendations
730
+ md.append("## Recommendations")
731
+
732
+ # High priority recommendations
733
+ md.append("### High Priority")
734
+ high_priority = report_content['recommendations']['high_priority']
735
+ if high_priority:
736
+ for i, rec in enumerate(high_priority, 1):
737
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
738
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
739
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
740
+ if rec.get('line'):
741
+ md.append(f"- **Line:** {rec.get('line')}")
742
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
743
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
744
+ md.append("")
745
+ else:
746
+ md.append("No high priority recommendations.")
747
+ md.append("")
748
+
749
+ # Medium priority recommendations
750
+ md.append("### Medium Priority")
751
+ medium_priority = report_content['recommendations']['medium_priority']
752
+ if medium_priority:
753
+ for i, rec in enumerate(medium_priority, 1):
754
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
755
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
756
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
757
+ if rec.get('line'):
758
+ md.append(f"- **Line:** {rec.get('line')}")
759
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
760
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
761
+ md.append("")
762
+ else:
763
+ md.append("No medium priority recommendations.")
764
+ md.append("")
765
+
766
+ # Low priority recommendations
767
+ md.append("### Low Priority")
768
+ low_priority = report_content['recommendations']['low_priority']
769
+ if low_priority:
770
+ for i, rec in enumerate(low_priority, 1):
771
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
772
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
773
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
774
+ if rec.get('line'):
775
+ md.append(f"- **Line:** {rec.get('line')}")
776
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
777
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
778
+ md.append("")
779
+ else:
780
+ md.append("No low priority recommendations.")
781
+
782
+ return "\n".join(md)
src/services/repository_service.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Repository Service
6
+
7
+ This module provides functionality for cloning and managing Git repositories.
8
+ """
9
+
10
+ import os
11
+ import shutil
12
+ import tempfile
13
+ import logging
14
+ import re
15
+ from git import Repo
16
+ from git.exc import GitCommandError
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class RepositoryService:
21
+ """
22
+ Service for cloning and managing Git repositories.
23
+ """
24
+
25
+ def __init__(self, base_temp_dir=None):
26
+ """
27
+ Initialize the RepositoryService.
28
+
29
+ Args:
30
+ base_temp_dir (str, optional): Base directory for temporary repositories.
31
+ If None, system temp directory will be used.
32
+ """
33
+ self.base_temp_dir = base_temp_dir or tempfile.gettempdir()
34
+ self.repos = {}
35
+ logger.info(f"Initialized RepositoryService with base temp dir: {self.base_temp_dir}")
36
+
37
+ def validate_github_url(self, url):
38
+ """
39
+ Validate if the provided URL is a valid GitHub repository URL.
40
+
41
+ Args:
42
+ url (str): The GitHub repository URL to validate.
43
+
44
+ Returns:
45
+ bool: True if the URL is valid, False otherwise.
46
+ """
47
+ # GitHub URL patterns
48
+ patterns = [
49
+ r'^https?://github\.com/[\w.-]+/[\w.-]+(\.git)?$', # https://github.com/user/repo[.git]
50
+ r'^git@github\.com:[\w.-]+/[\w.-]+(\.git)?$', # [email protected]:user/repo[.git]
51
+ ]
52
+
53
+ for pattern in patterns:
54
+ if re.match(pattern, url):
55
+ return True
56
+
57
+ return False
58
+
59
+ def normalize_github_url(self, url):
60
+ """
61
+ Normalize a GitHub URL to a consistent format.
62
+
63
+ Args:
64
+ url (str): The GitHub repository URL to normalize.
65
+
66
+ Returns:
67
+ str: The normalized URL.
68
+ """
69
+ # Convert SSH URL to HTTPS URL
70
+ if url.startswith('[email protected]:'):
71
+ user_repo = url[len('[email protected]:'):]
72
+ if user_repo.endswith('.git'):
73
+ user_repo = user_repo[:-4]
74
+ return f"https://github.com/{user_repo}"
75
+
76
+ # Ensure HTTPS URL ends without .git
77
+ if url.startswith('http'):
78
+ if url.endswith('.git'):
79
+ return url[:-4]
80
+
81
+ return url
82
+
83
+ def extract_repo_name(self, url):
84
+ """
85
+ Extract repository name from a GitHub URL.
86
+
87
+ Args:
88
+ url (str): The GitHub repository URL.
89
+
90
+ Returns:
91
+ str: The repository name.
92
+ """
93
+ normalized_url = self.normalize_github_url(url)
94
+ return normalized_url.split('/')[-1]
95
+
96
+ def clone_repository(self, url, branch=None):
97
+ """
98
+ Clone a Git repository from the provided URL.
99
+
100
+ Args:
101
+ url (str): The repository URL to clone.
102
+ branch (str, optional): The branch to checkout. If None, the default branch is used.
103
+
104
+ Returns:
105
+ str: The path to the cloned repository.
106
+
107
+ Raises:
108
+ ValueError: If the URL is not a valid GitHub repository URL.
109
+ GitCommandError: If there's an error during the Git operation.
110
+ """
111
+ if not self.validate_github_url(url):
112
+ raise ValueError(f"Invalid GitHub repository URL: {url}")
113
+
114
+ repo_name = self.extract_repo_name(url)
115
+ repo_dir = os.path.join(self.base_temp_dir, f"codereview_{repo_name}_{os.urandom(4).hex()}")
116
+
117
+ logger.info(f"Cloning repository {url} to {repo_dir}")
118
+
119
+ try:
120
+ # Clone the repository
121
+ if branch:
122
+ repo = Repo.clone_from(url, repo_dir, branch=branch)
123
+ logger.info(f"Cloned repository {url} (branch: {branch}) to {repo_dir}")
124
+ else:
125
+ repo = Repo.clone_from(url, repo_dir)
126
+ logger.info(f"Cloned repository {url} (default branch) to {repo_dir}")
127
+
128
+ # Store the repository instance
129
+ self.repos[repo_dir] = repo
130
+
131
+ return repo_dir
132
+
133
+ except GitCommandError as e:
134
+ logger.error(f"Error cloning repository {url}: {e}")
135
+ # Clean up the directory if it was created
136
+ if os.path.exists(repo_dir):
137
+ shutil.rmtree(repo_dir, ignore_errors=True)
138
+ raise
139
+
140
+ def get_repository_info(self, repo_path):
141
+ """
142
+ Get information about a repository.
143
+
144
+ Args:
145
+ repo_path (str): The path to the repository.
146
+
147
+ Returns:
148
+ dict: A dictionary containing repository information.
149
+ """
150
+ if repo_path not in self.repos:
151
+ try:
152
+ self.repos[repo_path] = Repo(repo_path)
153
+ except Exception as e:
154
+ logger.error(f"Error opening repository at {repo_path}: {e}")
155
+ return {}
156
+
157
+ repo = self.repos[repo_path]
158
+
159
+ try:
160
+ # Get the active branch
161
+ try:
162
+ active_branch = repo.active_branch.name
163
+ except TypeError:
164
+ # Detached HEAD state
165
+ active_branch = 'HEAD detached'
166
+
167
+ # Get the latest commit
168
+ latest_commit = repo.head.commit
169
+
170
+ # Get remote URL
171
+ try:
172
+ remote_url = repo.remotes.origin.url
173
+ except AttributeError:
174
+ remote_url = 'No remote URL found'
175
+
176
+ # Get repository size (approximate)
177
+ repo_size = sum(os.path.getsize(os.path.join(dirpath, filename))
178
+ for dirpath, _, filenames in os.walk(repo_path)
179
+ for filename in filenames)
180
+
181
+ # Count files
182
+ file_count = sum(len(files) for _, _, files in os.walk(repo_path))
183
+
184
+ return {
185
+ 'path': repo_path,
186
+ 'active_branch': active_branch,
187
+ 'latest_commit': {
188
+ 'hash': latest_commit.hexsha,
189
+ 'author': f"{latest_commit.author.name} <{latest_commit.author.email}>",
190
+ 'date': latest_commit.committed_datetime.isoformat(),
191
+ 'message': latest_commit.message.strip(),
192
+ },
193
+ 'remote_url': remote_url,
194
+ 'size_bytes': repo_size,
195
+ 'file_count': file_count,
196
+ }
197
+
198
+ except Exception as e:
199
+ logger.error(f"Error getting repository info for {repo_path}: {e}")
200
+ return {
201
+ 'path': repo_path,
202
+ 'error': str(e),
203
+ }
204
+
205
+ def cleanup_repository(self, repo_path):
206
+ """
207
+ Clean up a cloned repository.
208
+
209
+ Args:
210
+ repo_path (str): The path to the repository to clean up.
211
+
212
+ Returns:
213
+ bool: True if the cleanup was successful, False otherwise.
214
+ """
215
+ logger.info(f"Cleaning up repository at {repo_path}")
216
+
217
+ # Remove the repository from the tracked repos
218
+ if repo_path in self.repos:
219
+ del self.repos[repo_path]
220
+
221
+ # Remove the directory
222
+ try:
223
+ if os.path.exists(repo_path):
224
+ shutil.rmtree(repo_path, ignore_errors=True)
225
+ return True
226
+ except Exception as e:
227
+ logger.error(f"Error cleaning up repository at {repo_path}: {e}")
228
+ return False
229
+
230
+ def cleanup_all_repositories(self):
231
+ """
232
+ Clean up all cloned repositories.
233
+
234
+ Returns:
235
+ bool: True if all cleanups were successful, False otherwise.
236
+ """
237
+ logger.info("Cleaning up all repositories")
238
+
239
+ success = True
240
+ for repo_path in list(self.repos.keys()):
241
+ if not self.cleanup_repository(repo_path):
242
+ success = False
243
+
244
+ return success
src/services/security_scanner.py ADDED
@@ -0,0 +1,831 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Security Scanner Service
6
+
7
+ This module provides functionality for scanning code for security vulnerabilities.
8
+ """
9
+
10
+ import os
11
+ import subprocess
12
+ import logging
13
+ import json
14
+ import tempfile
15
+ import concurrent.futures
16
+ from collections import defaultdict
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class SecurityScanner:
21
+ """
22
+ Service for scanning code for security vulnerabilities.
23
+ """
24
+
25
+ def __init__(self):
26
+ """
27
+ Initialize the SecurityScanner.
28
+ """
29
+ logger.info("Initialized SecurityScanner")
30
+ self.scanners = {
31
+ 'Python': self._scan_python,
32
+ 'JavaScript': self._scan_javascript,
33
+ 'TypeScript': self._scan_javascript, # TypeScript uses the same scanner as JavaScript
34
+ 'Java': self._scan_java,
35
+ 'Go': self._scan_go,
36
+ 'Rust': self._scan_rust,
37
+ }
38
+
39
+ def scan_repository(self, repo_path, languages):
40
+ """
41
+ Scan a repository for security vulnerabilities in the specified languages using parallel processing.
42
+
43
+ Args:
44
+ repo_path (str): The path to the repository.
45
+ languages (list): A list of programming languages to scan.
46
+
47
+ Returns:
48
+ dict: A dictionary containing scan results for each language.
49
+ """
50
+ logger.info(f"Scanning repository at {repo_path} for security vulnerabilities in languages: {languages}")
51
+
52
+ results = {}
53
+
54
+ # Scan dependencies first (language-agnostic)
55
+ results['dependencies'] = self._scan_dependencies(repo_path)
56
+
57
+ # Define a function to scan a single language
58
+ def scan_language(language):
59
+ if language in self.scanners:
60
+ try:
61
+ logger.info(f"Scanning {language} code in {repo_path} for security vulnerabilities")
62
+ return language, self.scanners[language](repo_path)
63
+ except Exception as e:
64
+ logger.error(f"Error scanning {language} code for security vulnerabilities: {e}")
65
+ return language, {
66
+ 'status': 'error',
67
+ 'error': str(e),
68
+ 'vulnerabilities': [],
69
+ }
70
+ else:
71
+ logger.warning(f"No security scanner available for {language}")
72
+ return language, {
73
+ 'status': 'not_supported',
74
+ 'message': f"Security scanning for {language} is not supported yet.",
75
+ 'vulnerabilities': [],
76
+ }
77
+
78
+ # Use ThreadPoolExecutor to scan languages in parallel
79
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor:
80
+ # Submit all language scanning tasks
81
+ future_to_language = {executor.submit(scan_language, language): language for language in languages}
82
+
83
+ # Process results as they complete
84
+ for future in concurrent.futures.as_completed(future_to_language):
85
+ language = future_to_language[future]
86
+ try:
87
+ lang, result = future.result()
88
+ results[lang] = result
89
+ logger.info(f"Completed security scanning for {lang}")
90
+ except Exception as e:
91
+ logger.error(f"Exception occurred during security scanning of {language}: {e}")
92
+ results[language] = {
93
+ 'status': 'error',
94
+ 'error': str(e),
95
+ 'vulnerabilities': [],
96
+ }
97
+
98
+ return results
99
+
100
+ def _scan_dependencies(self, repo_path):
101
+ """
102
+ Scan dependencies for known vulnerabilities.
103
+
104
+ Args:
105
+ repo_path (str): The path to the repository.
106
+
107
+ Returns:
108
+ dict: Dependency scan results.
109
+ """
110
+ logger.info(f"Scanning dependencies in {repo_path}")
111
+
112
+ results = {
113
+ 'python': self._scan_python_dependencies(repo_path),
114
+ 'javascript': self._scan_javascript_dependencies(repo_path),
115
+ 'java': self._scan_java_dependencies(repo_path),
116
+ 'go': self._scan_go_dependencies(repo_path),
117
+ 'rust': self._scan_rust_dependencies(repo_path),
118
+ }
119
+
120
+ # Aggregate vulnerabilities
121
+ all_vulnerabilities = []
122
+ for lang_result in results.values():
123
+ all_vulnerabilities.extend(lang_result.get('vulnerabilities', []))
124
+
125
+ return {
126
+ 'status': 'success',
127
+ 'vulnerabilities': all_vulnerabilities,
128
+ 'vulnerability_count': len(all_vulnerabilities),
129
+ 'language_results': results,
130
+ }
131
+
132
+ def _scan_python_dependencies(self, repo_path):
133
+ """
134
+ Scan Python dependencies for known vulnerabilities using safety.
135
+
136
+ Args:
137
+ repo_path (str): The path to the repository.
138
+
139
+ Returns:
140
+ dict: Scan results for Python dependencies.
141
+ """
142
+ logger.info(f"Scanning Python dependencies in {repo_path}")
143
+
144
+ # Find requirements files
145
+ requirements_files = []
146
+ for root, _, files in os.walk(repo_path):
147
+ for file in files:
148
+ if file == 'requirements.txt' or file == 'Pipfile' or file == 'Pipfile.lock' or file == 'setup.py':
149
+ requirements_files.append(os.path.join(root, file))
150
+
151
+ if not requirements_files:
152
+ return {
153
+ 'status': 'no_dependencies',
154
+ 'message': 'No Python dependency files found.',
155
+ 'vulnerabilities': [],
156
+ }
157
+
158
+ vulnerabilities = []
159
+
160
+ for req_file in requirements_files:
161
+ try:
162
+ # Run safety check
163
+ cmd = [
164
+ 'safety',
165
+ 'check',
166
+ '--file', req_file,
167
+ '--json',
168
+ ]
169
+
170
+ process = subprocess.run(
171
+ cmd,
172
+ stdout=subprocess.PIPE,
173
+ stderr=subprocess.PIPE,
174
+ text=True,
175
+ check=False,
176
+ )
177
+
178
+ # Parse safety output
179
+ if process.stdout.strip():
180
+ try:
181
+ safety_results = json.loads(process.stdout)
182
+
183
+ for vuln in safety_results.get('vulnerabilities', []):
184
+ vulnerabilities.append({
185
+ 'package': vuln.get('package_name', ''),
186
+ 'installed_version': vuln.get('installed_version', ''),
187
+ 'affected_versions': vuln.get('vulnerable_spec', ''),
188
+ 'description': vuln.get('advisory', ''),
189
+ 'severity': vuln.get('severity', ''),
190
+ 'file': req_file,
191
+ 'language': 'Python',
192
+ })
193
+ except json.JSONDecodeError:
194
+ logger.error(f"Error parsing safety output: {process.stdout}")
195
+ except Exception as e:
196
+ logger.error(f"Error running safety on {req_file}: {e}")
197
+
198
+ return {
199
+ 'status': 'success',
200
+ 'vulnerabilities': vulnerabilities,
201
+ 'vulnerability_count': len(vulnerabilities),
202
+ 'files_scanned': requirements_files,
203
+ }
204
+
205
+ def _scan_javascript_dependencies(self, repo_path):
206
+ """
207
+ Scan JavaScript/TypeScript dependencies for known vulnerabilities using npm audit.
208
+
209
+ Args:
210
+ repo_path (str): The path to the repository.
211
+
212
+ Returns:
213
+ dict: Scan results for JavaScript dependencies.
214
+ """
215
+ logger.info(f"Scanning JavaScript dependencies in {repo_path}")
216
+
217
+ # Find package.json files
218
+ package_files = []
219
+ for root, _, files in os.walk(repo_path):
220
+ if 'package.json' in files:
221
+ package_files.append(os.path.join(root, 'package.json'))
222
+
223
+ if not package_files:
224
+ return {
225
+ 'status': 'no_dependencies',
226
+ 'message': 'No JavaScript dependency files found.',
227
+ 'vulnerabilities': [],
228
+ }
229
+
230
+ vulnerabilities = []
231
+
232
+ for pkg_file in package_files:
233
+ pkg_dir = os.path.dirname(pkg_file)
234
+ try:
235
+ # Run npm audit
236
+ cmd = [
237
+ 'npm',
238
+ 'audit',
239
+ '--json',
240
+ ]
241
+
242
+ process = subprocess.run(
243
+ cmd,
244
+ stdout=subprocess.PIPE,
245
+ stderr=subprocess.PIPE,
246
+ text=True,
247
+ check=False,
248
+ cwd=pkg_dir, # Run in the directory containing package.json
249
+ )
250
+
251
+ # Parse npm audit output
252
+ if process.stdout.strip():
253
+ try:
254
+ audit_results = json.loads(process.stdout)
255
+
256
+ # Extract vulnerabilities from npm audit results
257
+ for vuln_id, vuln_info in audit_results.get('vulnerabilities', {}).items():
258
+ vulnerabilities.append({
259
+ 'package': vuln_info.get('name', ''),
260
+ 'installed_version': vuln_info.get('version', ''),
261
+ 'affected_versions': vuln_info.get('range', ''),
262
+ 'description': vuln_info.get('overview', ''),
263
+ 'severity': vuln_info.get('severity', ''),
264
+ 'file': pkg_file,
265
+ 'language': 'JavaScript',
266
+ 'cwe': vuln_info.get('cwe', ''),
267
+ 'recommendation': vuln_info.get('recommendation', ''),
268
+ })
269
+ except json.JSONDecodeError:
270
+ logger.error(f"Error parsing npm audit output: {process.stdout}")
271
+ except Exception as e:
272
+ logger.error(f"Error running npm audit on {pkg_file}: {e}")
273
+
274
+ return {
275
+ 'status': 'success',
276
+ 'vulnerabilities': vulnerabilities,
277
+ 'vulnerability_count': len(vulnerabilities),
278
+ 'files_scanned': package_files,
279
+ }
280
+
281
+ def _scan_java_dependencies(self, repo_path):
282
+ """
283
+ Scan Java dependencies for known vulnerabilities.
284
+
285
+ Args:
286
+ repo_path (str): The path to the repository.
287
+
288
+ Returns:
289
+ dict: Scan results for Java dependencies.
290
+ """
291
+ logger.info(f"Scanning Java dependencies in {repo_path}")
292
+
293
+ # Find pom.xml or build.gradle files
294
+ dependency_files = []
295
+ for root, _, files in os.walk(repo_path):
296
+ for file in files:
297
+ if file == 'pom.xml' or file == 'build.gradle':
298
+ dependency_files.append(os.path.join(root, file))
299
+
300
+ if not dependency_files:
301
+ return {
302
+ 'status': 'no_dependencies',
303
+ 'message': 'No Java dependency files found.',
304
+ 'vulnerabilities': [],
305
+ }
306
+
307
+ # For now, we'll just return a placeholder since we don't have a direct tool
308
+ # In a real implementation, you might use OWASP Dependency Check or similar
309
+ return {
310
+ 'status': 'not_implemented',
311
+ 'message': 'Java dependency scanning is not fully implemented yet.',
312
+ 'vulnerabilities': [],
313
+ 'files_scanned': dependency_files,
314
+ }
315
+
316
+ def _scan_go_dependencies(self, repo_path):
317
+ """
318
+ Scan Go dependencies for known vulnerabilities using govulncheck.
319
+
320
+ Args:
321
+ repo_path (str): The path to the repository.
322
+
323
+ Returns:
324
+ dict: Scan results for Go dependencies.
325
+ """
326
+ logger.info(f"Scanning Go dependencies in {repo_path}")
327
+
328
+ # Check if go.mod exists
329
+ go_mod_path = os.path.join(repo_path, 'go.mod')
330
+ if not os.path.exists(go_mod_path):
331
+ return {
332
+ 'status': 'no_dependencies',
333
+ 'message': 'No Go dependency files found.',
334
+ 'vulnerabilities': [],
335
+ }
336
+
337
+ try:
338
+ # Run govulncheck
339
+ cmd = [
340
+ 'govulncheck',
341
+ '-json',
342
+ './...',
343
+ ]
344
+
345
+ process = subprocess.run(
346
+ cmd,
347
+ stdout=subprocess.PIPE,
348
+ stderr=subprocess.PIPE,
349
+ text=True,
350
+ check=False,
351
+ cwd=repo_path, # Run in the repository directory
352
+ )
353
+
354
+ # Parse govulncheck output
355
+ vulnerabilities = []
356
+ if process.stdout.strip():
357
+ for line in process.stdout.splitlines():
358
+ try:
359
+ result = json.loads(line)
360
+ if 'vulnerability' in result:
361
+ vuln = result['vulnerability']
362
+ vulnerabilities.append({
363
+ 'package': vuln.get('package', ''),
364
+ 'description': vuln.get('details', ''),
365
+ 'severity': 'high', # govulncheck doesn't provide severity
366
+ 'file': go_mod_path,
367
+ 'language': 'Go',
368
+ 'cve': vuln.get('osv', {}).get('id', ''),
369
+ 'affected_versions': vuln.get('osv', {}).get('affected', ''),
370
+ })
371
+ except json.JSONDecodeError:
372
+ continue
373
+
374
+ return {
375
+ 'status': 'success',
376
+ 'vulnerabilities': vulnerabilities,
377
+ 'vulnerability_count': len(vulnerabilities),
378
+ 'files_scanned': [go_mod_path],
379
+ }
380
+
381
+ except Exception as e:
382
+ logger.error(f"Error running govulncheck: {e}")
383
+ return {
384
+ 'status': 'error',
385
+ 'error': str(e),
386
+ 'vulnerabilities': [],
387
+ }
388
+
389
+ def _scan_rust_dependencies(self, repo_path):
390
+ """
391
+ Scan Rust dependencies for known vulnerabilities using cargo-audit.
392
+
393
+ Args:
394
+ repo_path (str): The path to the repository.
395
+
396
+ Returns:
397
+ dict: Scan results for Rust dependencies.
398
+ """
399
+ logger.info(f"Scanning Rust dependencies in {repo_path}")
400
+
401
+ # Check if Cargo.toml exists
402
+ cargo_toml_path = os.path.join(repo_path, 'Cargo.toml')
403
+ if not os.path.exists(cargo_toml_path):
404
+ return {
405
+ 'status': 'no_dependencies',
406
+ 'message': 'No Rust dependency files found.',
407
+ 'vulnerabilities': [],
408
+ }
409
+
410
+ try:
411
+ # Run cargo-audit
412
+ cmd = [
413
+ 'cargo',
414
+ 'audit',
415
+ '--json',
416
+ ]
417
+
418
+ process = subprocess.run(
419
+ cmd,
420
+ stdout=subprocess.PIPE,
421
+ stderr=subprocess.PIPE,
422
+ text=True,
423
+ check=False,
424
+ cwd=repo_path, # Run in the repository directory
425
+ )
426
+
427
+ # Parse cargo-audit output
428
+ vulnerabilities = []
429
+ if process.stdout.strip():
430
+ try:
431
+ audit_results = json.loads(process.stdout)
432
+
433
+ for vuln in audit_results.get('vulnerabilities', {}).get('list', []):
434
+ vulnerabilities.append({
435
+ 'package': vuln.get('package', {}).get('name', ''),
436
+ 'installed_version': vuln.get('package', {}).get('version', ''),
437
+ 'description': vuln.get('advisory', {}).get('description', ''),
438
+ 'severity': vuln.get('advisory', {}).get('severity', ''),
439
+ 'file': cargo_toml_path,
440
+ 'language': 'Rust',
441
+ 'cve': vuln.get('advisory', {}).get('id', ''),
442
+ })
443
+ except json.JSONDecodeError:
444
+ logger.error(f"Error parsing cargo-audit output: {process.stdout}")
445
+
446
+ return {
447
+ 'status': 'success',
448
+ 'vulnerabilities': vulnerabilities,
449
+ 'vulnerability_count': len(vulnerabilities),
450
+ 'files_scanned': [cargo_toml_path],
451
+ }
452
+
453
+ except Exception as e:
454
+ logger.error(f"Error running cargo-audit: {e}")
455
+ return {
456
+ 'status': 'error',
457
+ 'error': str(e),
458
+ 'vulnerabilities': [],
459
+ }
460
+
461
+ def _scan_python(self, repo_path):
462
+ """
463
+ Scan Python code for security vulnerabilities using bandit.
464
+
465
+ Args:
466
+ repo_path (str): The path to the repository.
467
+
468
+ Returns:
469
+ dict: Scan results for Python code.
470
+ """
471
+ logger.info(f"Scanning Python code in {repo_path} for security vulnerabilities")
472
+
473
+ # Find Python files
474
+ python_files = []
475
+ for root, _, files in os.walk(repo_path):
476
+ for file in files:
477
+ if file.endswith('.py'):
478
+ python_files.append(os.path.join(root, file))
479
+
480
+ if not python_files:
481
+ return {
482
+ 'status': 'no_files',
483
+ 'message': 'No Python files found in the repository.',
484
+ 'vulnerabilities': [],
485
+ }
486
+
487
+ try:
488
+ # Run bandit
489
+ cmd = [
490
+ 'bandit',
491
+ '-r',
492
+ '-f', 'json',
493
+ repo_path,
494
+ ]
495
+
496
+ process = subprocess.run(
497
+ cmd,
498
+ stdout=subprocess.PIPE,
499
+ stderr=subprocess.PIPE,
500
+ text=True,
501
+ check=False,
502
+ )
503
+
504
+ # Parse bandit output
505
+ vulnerabilities = []
506
+ if process.stdout.strip():
507
+ try:
508
+ bandit_results = json.loads(process.stdout)
509
+
510
+ for result in bandit_results.get('results', []):
511
+ vulnerabilities.append({
512
+ 'file': result.get('filename', ''),
513
+ 'line': result.get('line_number', 0),
514
+ 'code': result.get('code', ''),
515
+ 'issue': result.get('issue_text', ''),
516
+ 'severity': result.get('issue_severity', ''),
517
+ 'confidence': result.get('issue_confidence', ''),
518
+ 'cwe': result.get('cwe', ''),
519
+ 'test_id': result.get('test_id', ''),
520
+ 'test_name': result.get('test_name', ''),
521
+ 'language': 'Python',
522
+ })
523
+ except json.JSONDecodeError:
524
+ logger.error(f"Error parsing bandit output: {process.stdout}")
525
+
526
+ # Group vulnerabilities by severity
527
+ vulns_by_severity = defaultdict(list)
528
+ for vuln in vulnerabilities:
529
+ severity = vuln.get('severity', 'unknown')
530
+ vulns_by_severity[severity].append(vuln)
531
+
532
+ return {
533
+ 'status': 'success',
534
+ 'vulnerabilities': vulnerabilities,
535
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
536
+ 'vulnerability_count': len(vulnerabilities),
537
+ 'files_scanned': len(python_files),
538
+ }
539
+
540
+ except Exception as e:
541
+ logger.error(f"Error running bandit: {e}")
542
+ return {
543
+ 'status': 'error',
544
+ 'error': str(e),
545
+ 'vulnerabilities': [],
546
+ }
547
+
548
+ def _scan_javascript(self, repo_path):
549
+ """
550
+ Scan JavaScript/TypeScript code for security vulnerabilities using NodeJSScan.
551
+
552
+ Args:
553
+ repo_path (str): The path to the repository.
554
+
555
+ Returns:
556
+ dict: Scan results for JavaScript/TypeScript code.
557
+ """
558
+ logger.info(f"Scanning JavaScript/TypeScript code in {repo_path} for security vulnerabilities")
559
+
560
+ # Find JavaScript/TypeScript files
561
+ js_files = []
562
+ for root, _, files in os.walk(repo_path):
563
+ if 'node_modules' in root:
564
+ continue
565
+ for file in files:
566
+ if file.endswith(('.js', '.jsx', '.ts', '.tsx')):
567
+ js_files.append(os.path.join(root, file))
568
+
569
+ if not js_files:
570
+ return {
571
+ 'status': 'no_files',
572
+ 'message': 'No JavaScript/TypeScript files found in the repository.',
573
+ 'vulnerabilities': [],
574
+ }
575
+
576
+ # For now, we'll use a simplified approach since NodeJSScan might not be available
577
+ # In a real implementation, you might use NodeJSScan or similar
578
+
579
+ # Create a temporary ESLint configuration file with security rules
580
+ eslint_config = {
581
+ "env": {
582
+ "browser": True,
583
+ "es2021": True,
584
+ "node": True
585
+ },
586
+ "extends": [
587
+ "eslint:recommended",
588
+ "plugin:security/recommended"
589
+ ],
590
+ "plugins": [
591
+ "security"
592
+ ],
593
+ "parserOptions": {
594
+ "ecmaVersion": 12,
595
+ "sourceType": "module",
596
+ "ecmaFeatures": {
597
+ "jsx": True
598
+ }
599
+ },
600
+ "rules": {}
601
+ }
602
+
603
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
604
+ json.dump(eslint_config, temp_config)
605
+ temp_config_path = temp_config.name
606
+
607
+ try:
608
+ # Run ESLint with security plugin
609
+ cmd = [
610
+ 'npx',
611
+ 'eslint',
612
+ '--config', temp_config_path,
613
+ '--format', 'json',
614
+ '--plugin', 'security',
615
+ ] + js_files
616
+
617
+ process = subprocess.run(
618
+ cmd,
619
+ stdout=subprocess.PIPE,
620
+ stderr=subprocess.PIPE,
621
+ text=True,
622
+ check=False,
623
+ )
624
+
625
+ # Parse ESLint output
626
+ vulnerabilities = []
627
+ if process.stdout.strip():
628
+ try:
629
+ eslint_results = json.loads(process.stdout)
630
+
631
+ for result in eslint_results:
632
+ file_path = result.get('filePath', '')
633
+ for message in result.get('messages', []):
634
+ # Only include security-related issues
635
+ rule_id = message.get('ruleId', '')
636
+ if rule_id and ('security' in rule_id or 'no-eval' in rule_id or 'no-implied-eval' in rule_id):
637
+ vulnerabilities.append({
638
+ 'file': file_path,
639
+ 'line': message.get('line', 0),
640
+ 'column': message.get('column', 0),
641
+ 'issue': message.get('message', ''),
642
+ 'severity': 'high' if message.get('severity', 0) == 2 else 'medium',
643
+ 'rule': rule_id,
644
+ 'language': 'JavaScript',
645
+ })
646
+ except json.JSONDecodeError:
647
+ logger.error(f"Error parsing ESLint output: {process.stdout}")
648
+
649
+ # Group vulnerabilities by severity
650
+ vulns_by_severity = defaultdict(list)
651
+ for vuln in vulnerabilities:
652
+ severity = vuln.get('severity', 'unknown')
653
+ vulns_by_severity[severity].append(vuln)
654
+
655
+ return {
656
+ 'status': 'success',
657
+ 'vulnerabilities': vulnerabilities,
658
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
659
+ 'vulnerability_count': len(vulnerabilities),
660
+ 'files_scanned': len(js_files),
661
+ }
662
+
663
+ except Exception as e:
664
+ logger.error(f"Error scanning JavaScript/TypeScript code: {e}")
665
+ return {
666
+ 'status': 'error',
667
+ 'error': str(e),
668
+ 'vulnerabilities': [],
669
+ }
670
+
671
+ finally:
672
+ # Clean up the temporary configuration file
673
+ if os.path.exists(temp_config_path):
674
+ os.unlink(temp_config_path)
675
+
676
+ def _scan_java(self, repo_path):
677
+ """
678
+ Scan Java code for security vulnerabilities.
679
+
680
+ Args:
681
+ repo_path (str): The path to the repository.
682
+
683
+ Returns:
684
+ dict: Scan results for Java code.
685
+ """
686
+ logger.info(f"Scanning Java code in {repo_path} for security vulnerabilities")
687
+
688
+ # Find Java files
689
+ java_files = []
690
+ for root, _, files in os.walk(repo_path):
691
+ for file in files:
692
+ if file.endswith('.java'):
693
+ java_files.append(os.path.join(root, file))
694
+
695
+ if not java_files:
696
+ return {
697
+ 'status': 'no_files',
698
+ 'message': 'No Java files found in the repository.',
699
+ 'vulnerabilities': [],
700
+ }
701
+
702
+ # For now, we'll just return a placeholder since we don't have a direct tool
703
+ # In a real implementation, you might use FindSecBugs or similar
704
+ return {
705
+ 'status': 'not_implemented',
706
+ 'message': 'Java security scanning is not fully implemented yet.',
707
+ 'vulnerabilities': [],
708
+ 'files_scanned': java_files,
709
+ }
710
+
711
+ def _scan_go(self, repo_path):
712
+ """
713
+ Scan Go code for security vulnerabilities using gosec.
714
+
715
+ Args:
716
+ repo_path (str): The path to the repository.
717
+
718
+ Returns:
719
+ dict: Scan results for Go code.
720
+ """
721
+ logger.info(f"Scanning Go code in {repo_path} for security vulnerabilities")
722
+
723
+ # Find Go files
724
+ go_files = []
725
+ for root, _, files in os.walk(repo_path):
726
+ for file in files:
727
+ if file.endswith('.go'):
728
+ go_files.append(os.path.join(root, file))
729
+
730
+ if not go_files:
731
+ return {
732
+ 'status': 'no_files',
733
+ 'message': 'No Go files found in the repository.',
734
+ 'vulnerabilities': [],
735
+ }
736
+
737
+ try:
738
+ # Run gosec
739
+ cmd = [
740
+ 'gosec',
741
+ '-fmt', 'json',
742
+ '-quiet',
743
+ './...',
744
+ ]
745
+
746
+ process = subprocess.run(
747
+ cmd,
748
+ stdout=subprocess.PIPE,
749
+ stderr=subprocess.PIPE,
750
+ text=True,
751
+ check=False,
752
+ cwd=repo_path, # Run in the repository directory
753
+ )
754
+
755
+ # Parse gosec output
756
+ vulnerabilities = []
757
+ if process.stdout.strip():
758
+ try:
759
+ gosec_results = json.loads(process.stdout)
760
+
761
+ for issue in gosec_results.get('Issues', []):
762
+ vulnerabilities.append({
763
+ 'file': issue.get('file', ''),
764
+ 'line': issue.get('line', ''),
765
+ 'code': issue.get('code', ''),
766
+ 'issue': issue.get('details', ''),
767
+ 'severity': issue.get('severity', ''),
768
+ 'confidence': issue.get('confidence', ''),
769
+ 'cwe': issue.get('cwe', {}).get('ID', ''),
770
+ 'rule_id': issue.get('rule_id', ''),
771
+ 'language': 'Go',
772
+ })
773
+ except json.JSONDecodeError:
774
+ logger.error(f"Error parsing gosec output: {process.stdout}")
775
+
776
+ # Group vulnerabilities by severity
777
+ vulns_by_severity = defaultdict(list)
778
+ for vuln in vulnerabilities:
779
+ severity = vuln.get('severity', 'unknown')
780
+ vulns_by_severity[severity].append(vuln)
781
+
782
+ return {
783
+ 'status': 'success',
784
+ 'vulnerabilities': vulnerabilities,
785
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
786
+ 'vulnerability_count': len(vulnerabilities),
787
+ 'files_scanned': len(go_files),
788
+ }
789
+
790
+ except Exception as e:
791
+ logger.error(f"Error running gosec: {e}")
792
+ return {
793
+ 'status': 'error',
794
+ 'error': str(e),
795
+ 'vulnerabilities': [],
796
+ }
797
+
798
+ def _scan_rust(self, repo_path):
799
+ """
800
+ Scan Rust code for security vulnerabilities.
801
+
802
+ Args:
803
+ repo_path (str): The path to the repository.
804
+
805
+ Returns:
806
+ dict: Scan results for Rust code.
807
+ """
808
+ logger.info(f"Scanning Rust code in {repo_path} for security vulnerabilities")
809
+
810
+ # Find Rust files
811
+ rust_files = []
812
+ for root, _, files in os.walk(repo_path):
813
+ for file in files:
814
+ if file.endswith('.rs'):
815
+ rust_files.append(os.path.join(root, file))
816
+
817
+ if not rust_files:
818
+ return {
819
+ 'status': 'no_files',
820
+ 'message': 'No Rust files found in the repository.',
821
+ 'vulnerabilities': [],
822
+ }
823
+
824
+ # For now, we'll just return a placeholder since we don't have a direct tool
825
+ # In a real implementation, you might use cargo-audit or similar for code scanning
826
+ return {
827
+ 'status': 'not_implemented',
828
+ 'message': 'Rust security scanning is not fully implemented yet.',
829
+ 'vulnerabilities': [],
830
+ 'files_scanned': rust_files,
831
+ }
src/ui/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Package for Code Review Agent
src/ui/components/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Components Package for Code Review Agent
src/ui/components/export_manager.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Export Manager Component
6
+
7
+ This module provides the UI component for exporting the code review results in various formats.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_export_manager():
17
+ """
18
+ Create the export manager component.
19
+
20
+ Returns:
21
+ list: A list of tuples containing (export_button, export_format).
22
+ """
23
+ export_buttons = []
24
+ export_formats = []
25
+
26
+ with gr.Group():
27
+ gr.Markdown("### 📤 Export Results")
28
+
29
+ with gr.Row():
30
+ # PDF Export
31
+ pdf_btn = gr.Button("Export as PDF", variant="secondary")
32
+ pdf_format = gr.Textbox(value="pdf", visible=False)
33
+ export_buttons.append((pdf_btn, pdf_format))
34
+ export_formats.append(pdf_format)
35
+
36
+ # JSON Export
37
+ json_btn = gr.Button("Export as JSON", variant="secondary")
38
+ json_format = gr.Textbox(value="json", visible=False)
39
+ export_buttons.append((json_btn, json_format))
40
+ export_formats.append(json_format)
41
+
42
+ # HTML Export
43
+ html_btn = gr.Button("Export as HTML", variant="secondary")
44
+ html_format = gr.Textbox(value="html", visible=False)
45
+ export_buttons.append((html_btn, html_format))
46
+ export_formats.append(html_format)
47
+
48
+ # CSV Export
49
+ csv_btn = gr.Button("Export as CSV", variant="secondary")
50
+ csv_format = gr.Textbox(value="csv", visible=False)
51
+ export_buttons.append((csv_btn, csv_format))
52
+ export_formats.append(csv_format)
53
+
54
+ return export_buttons
src/ui/components/language_selector.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Language Selector Component
6
+
7
+ This module provides the UI component for selecting programming languages to analyze.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # List of supported programming languages
16
+ SUPPORTED_LANGUAGES = [
17
+ "Python", "JavaScript", "TypeScript", "Java",
18
+ "Go", "Rust", "C++", "C#", "PHP", "Ruby",
19
+ "Swift", "Kotlin", "Scala", "R", "Shell"
20
+ ]
21
+
22
+
23
+ def create_language_selector():
24
+ """
25
+ Create the language selector component.
26
+
27
+ Returns:
28
+ gr.CheckboxGroup: The language selector component.
29
+ """
30
+ with gr.Group():
31
+ gr.Markdown("### 🔤 Languages (Optional)")
32
+
33
+ language_selector = gr.CheckboxGroup(
34
+ choices=SUPPORTED_LANGUAGES,
35
+ label="Select languages to analyze",
36
+ info="Leave empty to auto-detect languages",
37
+ value=[],
38
+ )
39
+
40
+ gr.Markdown(
41
+ "*Note: If no languages are selected, the agent will automatically detect languages in the repository.*",
42
+ elem_classes=["small-text"]
43
+ )
44
+
45
+ return language_selector
src/ui/components/progress_tracker.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Progress Tracker Component
6
+
7
+ This module provides the UI component for tracking the progress of the code review process.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_progress_tracker():
17
+ """
18
+ Create the progress tracker component.
19
+
20
+ Returns:
21
+ tuple: A tuple containing (overall_progress, status_message, step_progress_dict)
22
+ """
23
+ # Overall progress bar
24
+ overall_progress = gr.Slider(
25
+ minimum=0,
26
+ maximum=100,
27
+ value=0,
28
+ label="Overall Progress",
29
+ interactive=False,
30
+ )
31
+
32
+ # Status message
33
+ status_message = gr.Markdown(
34
+ "*Initializing...*"
35
+ )
36
+
37
+ # Detailed progress steps
38
+ steps = [
39
+ "Repository Cloning",
40
+ "Language Detection",
41
+ "Code Analysis",
42
+ "Security Scanning",
43
+ "Performance Analysis",
44
+ "AI Review",
45
+ "Report Generation"
46
+ ]
47
+
48
+ with gr.Accordion("Detailed Progress", open=False):
49
+ step_progress = {}
50
+ for step in steps:
51
+ with gr.Row(variant="panel"):
52
+ with gr.Column(scale=1, min_width=150):
53
+ gr.Markdown(f"**{step}**")
54
+ with gr.Column(scale=4):
55
+ step_progress[step] = gr.Slider(
56
+ minimum=0,
57
+ maximum=100,
58
+ value=0,
59
+ label="",
60
+ interactive=False,
61
+ scale=2
62
+ )
63
+
64
+ return overall_progress, status_message, step_progress
src/ui/components/repo_input.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Repository Input Component
6
+
7
+ This module provides the UI component for entering a GitHub repository URL.
8
+ """
9
+
10
+ import gradio as gr
11
+ import re
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def validate_github_url(url):
18
+ """
19
+ Validate that the input is a proper GitHub repository URL.
20
+
21
+ Args:
22
+ url (str): The URL to validate.
23
+
24
+ Returns:
25
+ str or None: Error message if invalid, None if valid.
26
+ """
27
+ if not url:
28
+ return None
29
+
30
+ # Basic GitHub URL pattern
31
+ pattern = r'^https?://github\.com/[\w.-]+/[\w.-]+/?$'
32
+ if not re.match(pattern, url):
33
+ return "Please enter a valid GitHub repository URL"
34
+ return None
35
+
36
+
37
+ def create_repo_input():
38
+ """
39
+ Create the repository input component.
40
+
41
+ Returns:
42
+ tuple: (repo_url, github_token, submit_btn) - The repository URL input, GitHub token input, and submit button.
43
+ """
44
+ with gr.Group():
45
+ gr.Markdown("### 📂 GitHub Repository")
46
+
47
+ repo_url = gr.Textbox(
48
+ label="Repository URL",
49
+ placeholder="https://github.com/username/repository",
50
+ info="Enter the URL of a GitHub repository",
51
+ )
52
+
53
+ github_token = gr.Textbox(
54
+ label="GitHub Token (Optional)",
55
+ placeholder="For private repositories only",
56
+ info="Required only for private repositories",
57
+ type="password",
58
+ visible=True
59
+ )
60
+
61
+ submit_btn = gr.Button(
62
+ value="Analyze Repository",
63
+ variant="primary",
64
+ scale=0,
65
+ )
66
+
67
+ # Add validation for GitHub URL format
68
+ error_box = gr.Textbox(
69
+ label="Error",
70
+ visible=True,
71
+ interactive=False,
72
+ container=False,
73
+ show_label=False
74
+ )
75
+
76
+ repo_url.change(
77
+ fn=validate_github_url,
78
+ inputs=[repo_url],
79
+ outputs=[error_box],
80
+ show_progress=False
81
+ )
82
+
83
+ return repo_url, github_token, submit_btn
src/ui/components/results_dashboard.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Results Dashboard Component
6
+
7
+ This module provides the UI component for displaying the code review results.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_results_dashboard():
17
+ """
18
+ Create the results dashboard component.
19
+
20
+ Returns:
21
+ gr.Tabs: The results dashboard component tabs.
22
+ """
23
+ # Create a Tabs component directly instead of wrapping in a Group
24
+ results_tabs = gr.Tabs(visible=False)
25
+
26
+ # Executive Summary Tab
27
+ with results_tabs:
28
+ with gr.TabItem("Executive Summary"):
29
+ gr.Markdown("### 📊 Analysis Results")
30
+ with gr.Row():
31
+ with gr.Column(scale=2):
32
+ gr.Markdown("#### 📝 Overview")
33
+ summary_text = gr.Markdown("")
34
+
35
+ with gr.Column(scale=1):
36
+ gr.Markdown("#### 📈 Key Metrics")
37
+ with gr.Row():
38
+ gr.Label("Code Quality Score", value="N/A")
39
+ with gr.Row():
40
+ gr.Label("Security Score", value="N/A")
41
+ with gr.Row():
42
+ gr.Label("Performance Score", value="N/A")
43
+
44
+ # Technical Details Tab
45
+ with gr.TabItem("Technical Details"):
46
+ with gr.Accordion("Repository Structure", open=True):
47
+ repo_structure = gr.Markdown("")
48
+
49
+ with gr.Accordion("Language Breakdown", open=True):
50
+ language_breakdown = gr.BarPlot(
51
+ x="Language",
52
+ y="Lines of Code",
53
+ title="Language Distribution",
54
+ tooltip=["Language", "Lines of Code"],
55
+ height=300,
56
+ )
57
+
58
+ with gr.Accordion("Code Quality Issues", open=True):
59
+ quality_issues = gr.Dataframe(
60
+ headers=["File", "Line", "Issue", "Severity", "Description"],
61
+ datatype=["str", "number", "str", "str", "str"],
62
+ row_count=10,
63
+ )
64
+
65
+ # Security Analysis Tab
66
+ with gr.TabItem("Security Analysis"):
67
+ with gr.Accordion("Vulnerabilities", open=True):
68
+ vulnerabilities = gr.Dataframe(
69
+ headers=["File", "Line", "Vulnerability", "Severity", "Description", "Recommendation"],
70
+ datatype=["str", "number", "str", "str", "str", "str"],
71
+ row_count=10,
72
+ )
73
+
74
+ with gr.Accordion("Dependency Issues", open=True):
75
+ dependency_issues = gr.Dataframe(
76
+ headers=["Package", "Current Version", "Recommended Version", "Vulnerability", "Severity"],
77
+ datatype=["str", "str", "str", "str", "str"],
78
+ row_count=10,
79
+ )
80
+
81
+ # Performance Analysis Tab
82
+ with gr.TabItem("Performance Analysis"):
83
+ with gr.Accordion("Performance Hotspots", open=True):
84
+ performance_hotspots = gr.Dataframe(
85
+ headers=["File", "Function", "Issue", "Impact", "Recommendation"],
86
+ datatype=["str", "str", "str", "str", "str"],
87
+ row_count=10,
88
+ )
89
+
90
+ with gr.Accordion("Resource Usage", open=True):
91
+ resource_usage = gr.BarPlot(
92
+ x="Component",
93
+ y="Usage",
94
+ title="Resource Usage",
95
+ tooltip=["Component", "Usage"],
96
+ height=300,
97
+ )
98
+
99
+ # Recommendations Tab
100
+ with gr.TabItem("Recommendations"):
101
+ with gr.Accordion("High Priority", open=True):
102
+ high_priority_recs = gr.Markdown("")
103
+
104
+ with gr.Accordion("Medium Priority", open=True):
105
+ medium_priority_recs = gr.Markdown("")
106
+
107
+ with gr.Accordion("Low Priority", open=True):
108
+ low_priority_recs = gr.Markdown("")
109
+
110
+ return results_tabs
src/ui/gradio_app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Gradio Application for Code Review Agent
6
+
7
+ This module defines the Gradio web interface for the Code Review Agent.
8
+ It creates a professional UI with components for repository input, language selection,
9
+ progress tracking, and results display.
10
+ """
11
+
12
+ import os
13
+ import gradio as gr
14
+ import logging
15
+
16
+ from src.ui.components.repo_input import create_repo_input
17
+ from src.ui.components.language_selector import create_language_selector
18
+ from src.ui.components.progress_tracker import create_progress_tracker
19
+ from src.ui.components.results_dashboard import create_results_dashboard
20
+ from src.ui.components.export_manager import create_export_manager
21
+ from src.ui.styles.themes import get_theme
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def create_gradio_app(agent_manager):
27
+ """
28
+ Create and configure the Gradio application.
29
+
30
+ Args:
31
+ agent_manager: The AgentManager instance that handles the business logic.
32
+
33
+ Returns:
34
+ gr.Blocks: The configured Gradio application.
35
+ """
36
+ # Load custom CSS
37
+ css_path = os.path.join(os.path.dirname(__file__), 'styles', 'custom.css')
38
+ with open(css_path, 'r') as f:
39
+ custom_css = f.read()
40
+
41
+ # Create the Gradio app with custom theme
42
+ theme = get_theme()
43
+
44
+ with gr.Blocks(css=custom_css, theme=theme, title="Code Review Agent") as app:
45
+ gr.Markdown(
46
+ """
47
+ # 🔍 Professional Code Review Agent
48
+
49
+ Upload a GitHub repository URL and get comprehensive code analysis with actionable recommendations.
50
+ """
51
+ )
52
+
53
+ with gr.Row():
54
+ with gr.Column(scale=3):
55
+ # Repository input component
56
+ repo_url, github_token, submit_btn = create_repo_input()
57
+
58
+ # Language selector component
59
+ selected_languages = create_language_selector()
60
+
61
+ with gr.Column(scale=1):
62
+ # Information panel
63
+ gr.Markdown(
64
+ """
65
+ ### 📋 Features
66
+ - Multi-language support (15+ languages)
67
+ - Security vulnerability detection
68
+ - Performance analysis
69
+ - Code quality metrics
70
+ - Actionable recommendations
71
+ """
72
+ )
73
+
74
+ # Progress tracker component
75
+ with gr.Group(visible=False) as progress_group:
76
+ gr.Markdown("### ⏳ Analysis Progress")
77
+ overall_progress, status_message, step_progress = create_progress_tracker()
78
+
79
+ # Results dashboard component
80
+ results_dashboard = create_results_dashboard()
81
+
82
+ # Export options component
83
+ export_buttons = create_export_manager()
84
+
85
+ # Set up event handlers
86
+ def start_review_with_progress(repo_url, github_token, selected_languages):
87
+ # We can't use Group objects as outputs, so we'll handle visibility differently
88
+ # First, make progress group visible and results dashboard invisible
89
+ progress_group.visible = True
90
+ results_dashboard.visible = False
91
+
92
+ # Start review process
93
+ progress_components = (progress_group, overall_progress, status_message, step_progress)
94
+ try:
95
+ _, _, _, results = agent_manager.start_review(repo_url, github_token, selected_languages, progress_components)
96
+
97
+ # Hide progress group and show results dashboard when done
98
+ progress_group.visible = False
99
+ results_dashboard.visible = True
100
+
101
+ return [
102
+ 0, # reset overall_progress value
103
+ "*Analysis complete!*", # status_message value
104
+ results # results dashboard with data
105
+ ]
106
+ except Exception as e:
107
+ # Show error message but keep progress group visible
108
+ return [
109
+ 0, # reset overall_progress value
110
+ f"*Error: {str(e)}*", # error message
111
+ results_dashboard # keep current dashboard
112
+ ]
113
+
114
+ submit_btn.click(
115
+ fn=start_review_with_progress,
116
+ inputs=[repo_url, github_token, selected_languages],
117
+ outputs=[overall_progress, status_message, results_dashboard]
118
+ )
119
+
120
+ for export_btn, export_format in export_buttons:
121
+ export_btn.click(
122
+ fn=agent_manager.export_report,
123
+ inputs=[results_dashboard, export_format],
124
+ outputs=[]
125
+ )
126
+
127
+ # Add WebSocket for real-time updates
128
+ app.queue()
129
+
130
+ return app
src/ui/styles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Styles Package for Code Review Agent
src/ui/styles/custom.css ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS for Code Review Agent */
2
+
3
+ /* Global Styles */
4
+ body {
5
+ font-family: 'Inter', sans-serif;
6
+ }
7
+
8
+ /* Glass-morphism effect for containers */
9
+ .glass-container {
10
+ background: rgba(255, 255, 255, 0.7);
11
+ backdrop-filter: blur(10px);
12
+ border-radius: 10px;
13
+ border: 1px solid rgba(255, 255, 255, 0.2);
14
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.1);
15
+ }
16
+
17
+ /* Animations */
18
+ .fade-in {
19
+ animation: fadeIn 0.5s ease-in-out;
20
+ }
21
+
22
+ @keyframes fadeIn {
23
+ from { opacity: 0; }
24
+ to { opacity: 1; }
25
+ }
26
+
27
+ /* Typography */
28
+ .small-text {
29
+ font-size: 0.8rem;
30
+ color: #6b7280;
31
+ }
32
+
33
+ /* Custom Scrollbar */
34
+ ::-webkit-scrollbar {
35
+ width: 8px;
36
+ height: 8px;
37
+ }
38
+
39
+ ::-webkit-scrollbar-track {
40
+ background: #f1f1f1;
41
+ border-radius: 4px;
42
+ }
43
+
44
+ ::-webkit-scrollbar-thumb {
45
+ background: #c1c1c1;
46
+ border-radius: 4px;
47
+ }
48
+
49
+ ::-webkit-scrollbar-thumb:hover {
50
+ background: #a1a1a1;
51
+ }
52
+
53
+ /* Progress Bar Styling */
54
+ .progress-step-complete {
55
+ color: #10b981;
56
+ font-weight: bold;
57
+ }
58
+
59
+ .progress-step-current {
60
+ color: #3b82f6;
61
+ font-weight: bold;
62
+ }
63
+
64
+ .progress-step-pending {
65
+ color: #6b7280;
66
+ }
67
+
68
+ /* Results Dashboard Styling */
69
+ .metric-card {
70
+ border-radius: 8px;
71
+ padding: 16px;
72
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
73
+ margin-bottom: 16px;
74
+ }
75
+
76
+ .metric-card-good {
77
+ background-color: rgba(16, 185, 129, 0.1);
78
+ border-left: 4px solid #10b981;
79
+ }
80
+
81
+ .metric-card-warning {
82
+ background-color: rgba(245, 158, 11, 0.1);
83
+ border-left: 4px solid #f59e0b;
84
+ }
85
+
86
+ .metric-card-critical {
87
+ background-color: rgba(239, 68, 68, 0.1);
88
+ border-left: 4px solid #ef4444;
89
+ }
90
+
91
+ /* Code Snippet Styling */
92
+ .code-snippet {
93
+ font-family: 'Fira Code', monospace;
94
+ background-color: #f8f9fa;
95
+ border-radius: 4px;
96
+ padding: 12px;
97
+ overflow-x: auto;
98
+ margin: 8px 0;
99
+ border-left: 3px solid #3b82f6;
100
+ }
101
+
102
+ .code-line-highlight {
103
+ background-color: rgba(59, 130, 246, 0.1);
104
+ display: block;
105
+ }
106
+
107
+ /* Responsive Design Adjustments */
108
+ @media (max-width: 768px) {
109
+ .hide-on-mobile {
110
+ display: none;
111
+ }
112
+
113
+ .mobile-full-width {
114
+ width: 100% !important;
115
+ }
116
+ }
src/ui/styles/themes.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ UI Themes
6
+
7
+ This module provides theme configuration for the Gradio interface.
8
+ """
9
+
10
+ import gradio as gr
11
+
12
+
13
+ def get_theme():
14
+ """
15
+ Create a custom theme for the Gradio interface.
16
+
17
+ Returns:
18
+ gr.Theme: A custom Gradio theme.
19
+ """
20
+ return gr.Theme(
21
+ primary_hue="blue",
22
+ secondary_hue="indigo",
23
+ neutral_hue="slate",
24
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
25
+ )
tests/test_agent_manager.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Agent Manager
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.core.agent_manager import AgentManager
19
+
20
+
21
+ class TestAgentManager(unittest.TestCase):
22
+ """Test cases for the AgentManager class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ # Create mock components
27
+ self.mock_progress_tracker = MagicMock()
28
+ self.mock_results_dashboard = MagicMock()
29
+
30
+ # Create the agent manager with mocked components
31
+ with patch('src.core.agent_manager.LanguageDetector'), \
32
+ patch('src.services.repository_service'), \
33
+ patch('src.services.code_analyzer.CodeAnalyzer'), \
34
+ patch('src.services.security_scanner.SecurityScanner'), \
35
+ patch('src.services.performance_analyzer.PerformanceAnalyzer'), \
36
+ patch('src.mcp.ai_review.AIReviewService'), \
37
+ patch('src.services.report_generator.ReportGenerator'):
38
+
39
+ self.agent_manager = AgentManager()
40
+
41
+ # Replace the UI components with mocks
42
+ self.agent_manager._progress_tracker = self.mock_progress_tracker
43
+ self.agent_manager._results_dashboard = self.mock_results_dashboard
44
+
45
+ @patch('src.services.repository_service.validate_github_url')
46
+ @patch('src.services.repository_service.clone_repository')
47
+ @patch('src.services.repository_service.get_repository_info')
48
+ @patch('src.core.language_detector.LanguageDetector.detect_languages')
49
+ @patch('src.core.language_detector.LanguageDetector.get_language_breakdown')
50
+ def test_start_review(self, mock_get_breakdown, mock_detect_languages,
51
+ mock_get_repo_info, mock_clone_repo, mock_validate_url):
52
+ """Test start_review method"""
53
+ # Set up the mocks
54
+ mock_validate_url.return_value = True
55
+ mock_clone_repo.return_value = "/test/repo"
56
+ mock_get_repo_info.return_value = {"branch": "main", "commit": "abc123"}
57
+ mock_detect_languages.return_value = ["Python", "JavaScript"]
58
+ mock_get_breakdown.return_value = {
59
+ "Python": {"files": 5, "lines": 500, "percentage": 70},
60
+ "JavaScript": {"files": 3, "lines": 200, "percentage": 30}
61
+ }
62
+
63
+ # Mock the analysis methods
64
+ self.agent_manager._analyze_code = MagicMock()
65
+ self.agent_manager._scan_security = MagicMock()
66
+ self.agent_manager._analyze_performance = MagicMock()
67
+ self.agent_manager._perform_ai_review = MagicMock()
68
+ self.agent_manager._generate_report = MagicMock()
69
+
70
+ # Call the method
71
+ result = self.agent_manager.start_review(
72
+ repo_url="https://github.com/user/repo",
73
+ languages=["Python", "JavaScript"],
74
+ features=["code_analysis", "security_scan", "performance_analysis", "ai_review"]
75
+ )
76
+
77
+ # Verify the result
78
+ self.assertTrue(result["success"])
79
+ self.assertEqual(result["repo_path"], "/test/repo")
80
+
81
+ # Verify the method calls
82
+ mock_validate_url.assert_called_once_with("https://github.com/user/repo")
83
+ mock_clone_repo.assert_called_once()
84
+ mock_get_repo_info.assert_called_once_with("/test/repo")
85
+ mock_detect_languages.assert_called_once_with("/test/repo")
86
+ mock_get_breakdown.assert_called_once_with("/test/repo")
87
+
88
+ # Verify the analysis method calls
89
+ self.agent_manager._analyze_code.assert_called_once()
90
+ self.agent_manager._scan_security.assert_called_once()
91
+ self.agent_manager._analyze_performance.assert_called_once()
92
+ self.agent_manager._perform_ai_review.assert_called_once()
93
+ self.agent_manager._generate_report.assert_called_once()
94
+
95
+ # Verify the progress updates
96
+ self.assertEqual(self.mock_progress_tracker.update.call_count, 8) # Initial + 7 steps
97
+
98
+ @patch('src.services.repository_service.validate_github_url')
99
+ def test_start_review_invalid_url(self, mock_validate_url):
100
+ """Test start_review method with invalid URL"""
101
+ # Set up the mock
102
+ mock_validate_url.return_value = False
103
+
104
+ # Call the method
105
+ result = self.agent_manager.start_review(
106
+ repo_url="invalid_url",
107
+ languages=["Python"],
108
+ features=["code_analysis"]
109
+ )
110
+
111
+ # Verify the result
112
+ self.assertFalse(result["success"])
113
+ self.assertIn("Invalid GitHub URL", result["error"])
114
+
115
+ @patch('src.services.repository_service.validate_github_url')
116
+ @patch('src.services.repository_service.clone_repository')
117
+ def test_start_review_clone_error(self, mock_clone_repo, mock_validate_url):
118
+ """Test start_review method with clone error"""
119
+ # Set up the mocks
120
+ mock_validate_url.return_value = True
121
+ mock_clone_repo.side_effect = Exception("Clone error")
122
+
123
+ # Call the method
124
+ result = self.agent_manager.start_review(
125
+ repo_url="https://github.com/user/repo",
126
+ languages=["Python"],
127
+ features=["code_analysis"]
128
+ )
129
+
130
+ # Verify the result
131
+ self.assertFalse(result["success"])
132
+ self.assertIn("Failed to clone repository", result["error"])
133
+
134
+ @patch('src.services.code_analyzer.CodeAnalyzer.analyze_code')
135
+ def test_analyze_code(self, mock_analyze_code):
136
+ """Test _analyze_code method"""
137
+ # Set up the mock
138
+ mock_analyze_code.return_value = {"Python": {"issues": [], "issue_count": 0}}
139
+
140
+ # Call the method
141
+ self.agent_manager._repo_path = "/test/repo"
142
+ self.agent_manager._languages = ["Python"]
143
+ self.agent_manager._results = {}
144
+
145
+ self.agent_manager._analyze_code()
146
+
147
+ # Verify the result
148
+ self.assertIn("code_analysis", self.agent_manager._results)
149
+ mock_analyze_code.assert_called_once_with("/test/repo", ["Python"])
150
+
151
+ @patch('src.services.security_scanner.SecurityScanner.scan_repository')
152
+ def test_scan_security(self, mock_scan_repo):
153
+ """Test _scan_security method"""
154
+ # Set up the mock
155
+ mock_scan_repo.return_value = {"Python": {"vulnerabilities": [], "vulnerability_count": 0}}
156
+
157
+ # Call the method
158
+ self.agent_manager._repo_path = "/test/repo"
159
+ self.agent_manager._languages = ["Python"]
160
+ self.agent_manager._results = {}
161
+
162
+ self.agent_manager._scan_security()
163
+
164
+ # Verify the result
165
+ self.assertIn("security_scan", self.agent_manager._results)
166
+ mock_scan_repo.assert_called_once_with("/test/repo", ["Python"])
167
+
168
+ @patch('src.services.performance_analyzer.PerformanceAnalyzer.analyze_repository')
169
+ def test_analyze_performance(self, mock_analyze_repo):
170
+ """Test _analyze_performance method"""
171
+ # Set up the mock
172
+ mock_analyze_repo.return_value = {
173
+ "language_results": {"Python": {"issues": [], "issue_count": 0}},
174
+ "hotspots": []
175
+ }
176
+
177
+ # Call the method
178
+ self.agent_manager._repo_path = "/test/repo"
179
+ self.agent_manager._languages = ["Python"]
180
+ self.agent_manager._results = {}
181
+
182
+ self.agent_manager._analyze_performance()
183
+
184
+ # Verify the result
185
+ self.assertIn("performance_analysis", self.agent_manager._results)
186
+ mock_analyze_repo.assert_called_once_with("/test/repo", ["Python"])
187
+
188
+ @patch('src.mcp.ai_review.AIReviewService.is_available')
189
+ @patch('src.mcp.ai_review.AIReviewService.review_repository')
190
+ def test_perform_ai_review(self, mock_review_repo, mock_is_available):
191
+ """Test _perform_ai_review method"""
192
+ # Set up the mocks
193
+ mock_is_available.return_value = True
194
+ mock_review_repo.return_value = {
195
+ "status": "success",
196
+ "reviews": {},
197
+ "summary": "AI review summary"
198
+ }
199
+
200
+ # Call the method
201
+ self.agent_manager._repo_path = "/test/repo"
202
+ self.agent_manager._languages = ["Python"]
203
+ self.agent_manager._results = {}
204
+
205
+ self.agent_manager._perform_ai_review()
206
+
207
+ # Verify the result
208
+ self.assertIn("ai_review", self.agent_manager._results)
209
+ mock_review_repo.assert_called_once()
210
+
211
+ @patch('src.mcp.ai_review.AIReviewService.is_available')
212
+ def test_perform_ai_review_unavailable(self, mock_is_available):
213
+ """Test _perform_ai_review method when AI review is unavailable"""
214
+ # Set up the mock
215
+ mock_is_available.return_value = False
216
+
217
+ # Call the method
218
+ self.agent_manager._repo_path = "/test/repo"
219
+ self.agent_manager._languages = ["Python"]
220
+ self.agent_manager._results = {}
221
+
222
+ self.agent_manager._perform_ai_review()
223
+
224
+ # Verify the result
225
+ self.assertIn("ai_review", self.agent_manager._results)
226
+ self.assertEqual(self.agent_manager._results["ai_review"]["status"], "error")
227
+ self.assertIn("AI review service is not available", self.agent_manager._results["ai_review"]["error"])
228
+
229
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
230
+ def test_generate_report(self, mock_generate_report):
231
+ """Test _generate_report method"""
232
+ # Set up the mock
233
+ mock_generate_report.return_value = {
234
+ "json": "/test/reports/report.json",
235
+ "html": "/test/reports/report.html"
236
+ }
237
+
238
+ # Call the method
239
+ self.agent_manager._repo_name = "repo"
240
+ self.agent_manager._results = {"test": "data"}
241
+
242
+ self.agent_manager._generate_report()
243
+
244
+ # Verify the result
245
+ self.assertIn("report_paths", self.agent_manager._results)
246
+ mock_generate_report.assert_called_once_with("repo", {"test": "data"}, "all")
247
+
248
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
249
+ def test_export_report(self, mock_generate_report):
250
+ """Test export_report method"""
251
+ # Set up the mock
252
+ mock_generate_report.return_value = {
253
+ "json": "/test/reports/report.json"
254
+ }
255
+
256
+ # Call the method
257
+ self.agent_manager._repo_name = "repo"
258
+ self.agent_manager._results = {"test": "data"}
259
+
260
+ result = self.agent_manager.export_report("json")
261
+
262
+ # Verify the result
263
+ self.assertTrue(result["success"])
264
+ self.assertEqual(result["report_path"], "/test/reports/report.json")
265
+ mock_generate_report.assert_called_once_with("repo", {"test": "data"}, "json")
266
+
267
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
268
+ def test_export_report_error(self, mock_generate_report):
269
+ """Test export_report method with error"""
270
+ # Set up the mock
271
+ mock_generate_report.side_effect = Exception("Export error")
272
+
273
+ # Call the method
274
+ self.agent_manager._repo_name = "repo"
275
+ self.agent_manager._results = {"test": "data"}
276
+
277
+ result = self.agent_manager.export_report("json")
278
+
279
+ # Verify the result
280
+ self.assertFalse(result["success"])
281
+ self.assertIn("Failed to export report", result["error"])
282
+
283
+ @patch('src.services.repository_service.clone_repository')
284
+ def test_clone_repository(self, mock_clone_repo):
285
+ """Test _clone_repository method"""
286
+ # Set up the mock
287
+ mock_clone_repo.return_value = "/test/repo"
288
+
289
+ # Call the method
290
+ repo_path = self.agent_manager._clone_repository("https://github.com/user/repo")
291
+
292
+ # Verify the result
293
+ self.assertEqual(repo_path, "/test/repo")
294
+ mock_clone_repo.assert_called_once()
295
+
296
+ def test_update_progress(self):
297
+ """Test _update_progress method"""
298
+ # Call the method
299
+ self.agent_manager._update_progress("Test step", 50, "Test message")
300
+
301
+ # Verify the result
302
+ self.mock_progress_tracker.update.assert_called_once_with(
303
+ "Test step", 50, "Test message"
304
+ )
305
+
306
+ def test_create_progress_tracker(self):
307
+ """Test _create_progress_tracker method"""
308
+ # Mock the gradio components
309
+ with patch('gradio.Markdown'), patch('gradio.Slider'), patch('gradio.Accordion'), patch('gradio.Group'):
310
+ # Call the method
311
+ progress_tracker = self.agent_manager._create_progress_tracker()
312
+
313
+ # Verify the result
314
+ self.assertIsNotNone(progress_tracker)
315
+
316
+ def test_create_results_dashboard(self):
317
+ """Test _create_results_dashboard method"""
318
+ # Mock the gradio components
319
+ with patch('gradio.Markdown'), patch('gradio.Dataframe'), patch('gradio.HighlightedText'), \
320
+ patch('gradio.Code'), patch('gradio.Accordion'), patch('gradio.Tab'), patch('gradio.Tabs'), \
321
+ patch('gradio.Group'):
322
+
323
+ # Call the method
324
+ results_dashboard = self.agent_manager._create_results_dashboard()
325
+
326
+ # Verify the result
327
+ self.assertIsNotNone(results_dashboard)
328
+
329
+ def test_create_error_progress_tracker(self):
330
+ """Test _create_error_progress_tracker method"""
331
+ # Mock the gradio components
332
+ with patch('gradio.Markdown'), patch('gradio.Group'):
333
+ # Call the method
334
+ error_tracker = self.agent_manager._create_error_progress_tracker("Test error")
335
+
336
+ # Verify the result
337
+ self.assertIsNotNone(error_tracker)
338
+
339
+
340
+ if __name__ == "__main__":
341
+ unittest.main()
tests/test_ai_review.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the AI Review Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.mcp.ai_review import AIReviewService
19
+
20
+
21
+ class TestAIReviewService(unittest.TestCase):
22
+ """Test cases for the AIReviewService class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ # Mock environment variables
27
+ self.env_patcher = patch.dict('os.environ', {'ANTHROPIC_API_KEY': 'test_api_key'})
28
+ self.env_patcher.start()
29
+
30
+ # Create the service
31
+ self.service = AIReviewService()
32
+
33
+ def tearDown(self):
34
+ """Tear down test fixtures"""
35
+ self.env_patcher.stop()
36
+
37
+ def test_init(self):
38
+ """Test initialization of the service"""
39
+ self.assertIsNotNone(self.service)
40
+ self.assertEqual(self.service.api_key, 'test_api_key')
41
+ self.assertTrue(self.service.is_available())
42
+
43
+ def test_is_available(self):
44
+ """Test is_available method"""
45
+ # With API key
46
+ self.assertTrue(self.service.is_available())
47
+
48
+ # Without API key
49
+ with patch.dict('os.environ', {}, clear=True):
50
+ service = AIReviewService()
51
+ self.assertFalse(service.is_available())
52
+
53
+ @patch('anthropic.Anthropic')
54
+ def test_review_code(self, mock_anthropic):
55
+ """Test review_code method"""
56
+ # Mock the Anthropic client
57
+ mock_client = MagicMock()
58
+ mock_anthropic.return_value = mock_client
59
+
60
+ # Mock the response
61
+ mock_response = MagicMock()
62
+ mock_content = MagicMock()
63
+ mock_content.text = "# Code Review\n\n## Code Quality\nThe code is well-structured.\n\n## Potential Issues\nLine 10: Variable 'x' is not used."
64
+ mock_response.content = [mock_content]
65
+ mock_client.messages.create.return_value = mock_response
66
+
67
+ # Test the method
68
+ result = self.service.review_code(
69
+ file_path="test.py",
70
+ file_content="def test():\n x = 1\n return 2",
71
+ language="Python"
72
+ )
73
+
74
+ # Verify the result
75
+ self.assertEqual(result['status'], 'success')
76
+ self.assertEqual(result['review_text'], mock_content.text)
77
+ self.assertIn('suggestions', result)
78
+
79
+ @patch('anthropic.Anthropic')
80
+ def test_review_code_error(self, mock_anthropic):
81
+ """Test review_code method with error"""
82
+ # Mock the Anthropic client
83
+ mock_client = MagicMock()
84
+ mock_anthropic.return_value = mock_client
85
+
86
+ # Mock an error
87
+ mock_client.messages.create.side_effect = Exception("API error")
88
+
89
+ # Test the method
90
+ result = self.service.review_code(
91
+ file_path="test.py",
92
+ file_content="def test():\n return 1",
93
+ language="Python"
94
+ )
95
+
96
+ # Verify the result
97
+ self.assertEqual(result['status'], 'error')
98
+ self.assertEqual(result['error'], 'API error')
99
+ self.assertEqual(result['suggestions'], [])
100
+
101
+ def test_review_code_unavailable(self):
102
+ """Test review_code method when service is unavailable"""
103
+ # Create a service without API key
104
+ with patch.dict('os.environ', {}, clear=True):
105
+ service = AIReviewService()
106
+
107
+ # Test the method
108
+ result = service.review_code(
109
+ file_path="test.py",
110
+ file_content="def test():\n return 1",
111
+ language="Python"
112
+ )
113
+
114
+ # Verify the result
115
+ self.assertEqual(result['status'], 'error')
116
+ self.assertIn('AI review service is not available', result['error'])
117
+ self.assertEqual(result['suggestions'], [])
118
+
119
+ @patch('anthropic.Anthropic')
120
+ @patch('builtins.open', new_callable=unittest.mock.mock_open, read_data="def test():\n return 1")
121
+ def test_review_repository(self, mock_open, mock_anthropic):
122
+ """Test review_repository method"""
123
+ # Mock the Anthropic client
124
+ mock_client = MagicMock()
125
+ mock_anthropic.return_value = mock_client
126
+
127
+ # Mock the response for file review
128
+ mock_file_response = MagicMock()
129
+ mock_file_content = MagicMock()
130
+ mock_file_content.text = "# Code Review\n\n## Code Quality\nThe code is well-structured."
131
+ mock_file_response.content = [mock_file_content]
132
+
133
+ # Mock the response for repository summary
134
+ mock_summary_response = MagicMock()
135
+ mock_summary_content = MagicMock()
136
+ mock_summary_content.text = "# Repository Review\n\nOverall, the code quality is good."
137
+ mock_summary_response.content = [mock_summary_content]
138
+
139
+ # Set up the mock to return different responses
140
+ mock_client.messages.create.side_effect = [mock_file_response, mock_summary_response]
141
+
142
+ # Test the method
143
+ result = self.service.review_repository(
144
+ repo_path="/test/repo",
145
+ files=["test.py"],
146
+ languages=["Python"]
147
+ )
148
+
149
+ # Verify the result
150
+ self.assertEqual(result['status'], 'success')
151
+ self.assertIn('reviews', result)
152
+ self.assertIn('test.py', result['reviews'])
153
+ self.assertEqual(result['summary'], mock_summary_content.text)
154
+
155
+ def test_get_language_from_extension(self):
156
+ """Test _get_language_from_extension method"""
157
+ self.assertEqual(self.service._get_language_from_extension(".py"), "Python")
158
+ self.assertEqual(self.service._get_language_from_extension(".js"), "JavaScript")
159
+ self.assertEqual(self.service._get_language_from_extension(".ts"), "TypeScript")
160
+ self.assertEqual(self.service._get_language_from_extension(".java"), "Java")
161
+ self.assertEqual(self.service._get_language_from_extension(".go"), "Go")
162
+ self.assertEqual(self.service._get_language_from_extension(".rs"), "Rust")
163
+ self.assertIsNone(self.service._get_language_from_extension(".unknown"))
164
+
165
+
166
+ if __name__ == "__main__":
167
+ unittest.main()
tests/test_code_analyzer.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Code Analyzer service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.code_analyzer import CodeAnalyzer
20
+
21
+
22
+ class TestCodeAnalyzer(unittest.TestCase):
23
+ """Test cases for the CodeAnalyzer class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.analyzer = CodeAnalyzer()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ @patch('os.path.exists')
31
+ @patch('subprocess.run')
32
+ def test_analyze_python_code(self, mock_run, mock_exists):
33
+ """Test analyze_python_code method"""
34
+ # Set up the mocks
35
+ mock_exists.return_value = True
36
+
37
+ # Mock the subprocess.run result
38
+ mock_process = MagicMock()
39
+ mock_process.returncode = 0
40
+ mock_process.stdout = json.dumps({
41
+ "messages": [
42
+ {
43
+ "type": "convention",
44
+ "module": "test_module",
45
+ "obj": "",
46
+ "line": 10,
47
+ "column": 0,
48
+ "path": "test.py",
49
+ "symbol": "missing-docstring",
50
+ "message": "Missing module docstring",
51
+ "message-id": "C0111"
52
+ }
53
+ ]
54
+ })
55
+ mock_run.return_value = mock_process
56
+
57
+ # Mock the file discovery
58
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.py']):
59
+ # Call the method
60
+ result = self.analyzer.analyze_python_code(self.test_repo_path)
61
+
62
+ # Verify the result
63
+ self.assertEqual(len(result['issues']), 1)
64
+ self.assertEqual(result['issue_count'], 1)
65
+ self.assertEqual(result['issues'][0]['type'], 'convention')
66
+ self.assertEqual(result['issues'][0]['file'], 'test.py')
67
+ self.assertEqual(result['issues'][0]['line'], 10)
68
+ self.assertEqual(result['issues'][0]['message'], 'Missing module docstring')
69
+
70
+ @patch('os.path.exists')
71
+ @patch('subprocess.run')
72
+ def test_analyze_javascript_code(self, mock_run, mock_exists):
73
+ """Test analyze_javascript_code method"""
74
+ # Set up the mocks
75
+ mock_exists.return_value = True
76
+
77
+ # Mock the subprocess.run result
78
+ mock_process = MagicMock()
79
+ mock_process.returncode = 0
80
+ mock_process.stdout = json.dumps([
81
+ {
82
+ "filePath": "/test/repo/test.js",
83
+ "messages": [
84
+ {
85
+ "ruleId": "semi",
86
+ "severity": 2,
87
+ "message": "Missing semicolon.",
88
+ "line": 5,
89
+ "column": 20,
90
+ "nodeType": "ExpressionStatement"
91
+ }
92
+ ],
93
+ "errorCount": 1,
94
+ "warningCount": 0,
95
+ "fixableErrorCount": 1,
96
+ "fixableWarningCount": 0
97
+ }
98
+ ])
99
+ mock_run.return_value = mock_process
100
+
101
+ # Mock the file discovery
102
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.js']):
103
+ # Call the method
104
+ result = self.analyzer.analyze_javascript_code(self.test_repo_path)
105
+
106
+ # Verify the result
107
+ self.assertEqual(len(result['issues']), 1)
108
+ self.assertEqual(result['issue_count'], 1)
109
+ self.assertEqual(result['issues'][0]['type'], 'error')
110
+ self.assertEqual(result['issues'][0]['file'], 'test.js')
111
+ self.assertEqual(result['issues'][0]['line'], 5)
112
+ self.assertEqual(result['issues'][0]['message'], 'Missing semicolon.')
113
+
114
+ @patch('os.path.exists')
115
+ @patch('subprocess.run')
116
+ def test_analyze_typescript_code(self, mock_run, mock_exists):
117
+ """Test analyze_typescript_code method"""
118
+ # Set up the mocks
119
+ mock_exists.return_value = True
120
+
121
+ # Mock the subprocess.run results
122
+ # First for ESLint
123
+ eslint_process = MagicMock()
124
+ eslint_process.returncode = 0
125
+ eslint_process.stdout = json.dumps([
126
+ {
127
+ "filePath": "/test/repo/test.ts",
128
+ "messages": [
129
+ {
130
+ "ruleId": "@typescript-eslint/no-unused-vars",
131
+ "severity": 1,
132
+ "message": "'x' is defined but never used.",
133
+ "line": 3,
134
+ "column": 7,
135
+ "nodeType": "Identifier"
136
+ }
137
+ ],
138
+ "errorCount": 0,
139
+ "warningCount": 1,
140
+ "fixableErrorCount": 0,
141
+ "fixableWarningCount": 0
142
+ }
143
+ ])
144
+
145
+ # Then for TSC
146
+ tsc_process = MagicMock()
147
+ tsc_process.returncode = 2 # Error code for TypeScript compiler
148
+ tsc_process.stderr = "test.ts(10,15): error TS2339: Property 'foo' does not exist on type 'Bar'."
149
+
150
+ # Set up the mock to return different values on consecutive calls
151
+ mock_run.side_effect = [eslint_process, tsc_process]
152
+
153
+ # Mock the file discovery
154
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.ts']):
155
+ # Call the method
156
+ result = self.analyzer.analyze_typescript_code(self.test_repo_path)
157
+
158
+ # Verify the result
159
+ self.assertEqual(len(result['issues']), 2) # One from ESLint, one from TSC
160
+ self.assertEqual(result['issue_count'], 2)
161
+
162
+ # Check the ESLint issue
163
+ eslint_issue = next(issue for issue in result['issues'] if issue['source'] == 'eslint')
164
+ self.assertEqual(eslint_issue['type'], 'warning')
165
+ self.assertEqual(eslint_issue['file'], 'test.ts')
166
+ self.assertEqual(eslint_issue['line'], 3)
167
+ self.assertEqual(eslint_issue['message'], "'x' is defined but never used.")
168
+
169
+ # Check the TSC issue
170
+ tsc_issue = next(issue for issue in result['issues'] if issue['source'] == 'tsc')
171
+ self.assertEqual(tsc_issue['type'], 'error')
172
+ self.assertEqual(tsc_issue['file'], 'test.ts')
173
+ self.assertEqual(tsc_issue['line'], 10)
174
+ self.assertEqual(tsc_issue['message'], "Property 'foo' does not exist on type 'Bar'.")
175
+
176
+ @patch('os.path.exists')
177
+ @patch('subprocess.run')
178
+ def test_analyze_java_code(self, mock_run, mock_exists):
179
+ """Test analyze_java_code method"""
180
+ # Set up the mocks
181
+ mock_exists.return_value = True
182
+
183
+ # Mock the subprocess.run result
184
+ mock_process = MagicMock()
185
+ mock_process.returncode = 0
186
+ mock_process.stdout = """
187
+ <?xml version="1.0" encoding="UTF-8"?>
188
+ <pmd version="6.55.0" timestamp="2023-06-01T12:00:00.000">
189
+ <file name="/test/repo/Test.java">
190
+ <violation beginline="10" endline="10" begincolumn="5" endcolumn="20" rule="UnusedLocalVariable" ruleset="Best Practices" class="Test" method="main" variable="unusedVar" externalInfoUrl="https://pmd.github.io/pmd-6.55.0/pmd_rules_java_bestpractices.html#unusedlocalvariable" priority="3">
191
+ Avoid unused local variables such as 'unusedVar'.
192
+ </violation>
193
+ </file>
194
+ </pmd>
195
+ """
196
+ mock_run.return_value = mock_process
197
+
198
+ # Mock the file discovery
199
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/Test.java']):
200
+ # Call the method
201
+ result = self.analyzer.analyze_java_code(self.test_repo_path)
202
+
203
+ # Verify the result
204
+ self.assertEqual(len(result['issues']), 1)
205
+ self.assertEqual(result['issue_count'], 1)
206
+ self.assertEqual(result['issues'][0]['type'], 'warning') # Priority 3 maps to warning
207
+ self.assertEqual(result['issues'][0]['file'], 'Test.java')
208
+ self.assertEqual(result['issues'][0]['line'], 10)
209
+ self.assertEqual(result['issues'][0]['message'], "Avoid unused local variables such as 'unusedVar'.")
210
+
211
+ @patch('os.path.exists')
212
+ @patch('subprocess.run')
213
+ def test_analyze_go_code(self, mock_run, mock_exists):
214
+ """Test analyze_go_code method"""
215
+ # Set up the mocks
216
+ mock_exists.return_value = True
217
+
218
+ # Mock the subprocess.run result
219
+ mock_process = MagicMock()
220
+ mock_process.returncode = 0
221
+ mock_process.stdout = json.dumps({
222
+ "Issues": [
223
+ {
224
+ "FromLinter": "gosimple",
225
+ "Text": "S1000: should use a simple channel send/receive instead of select with a single case",
226
+ "Pos": {
227
+ "Filename": "test.go",
228
+ "Line": 15,
229
+ "Column": 2
230
+ },
231
+ "Severity": "warning"
232
+ }
233
+ ]
234
+ })
235
+ mock_run.return_value = mock_process
236
+
237
+ # Call the method
238
+ result = self.analyzer.analyze_go_code(self.test_repo_path)
239
+
240
+ # Verify the result
241
+ self.assertEqual(len(result['issues']), 1)
242
+ self.assertEqual(result['issue_count'], 1)
243
+ self.assertEqual(result['issues'][0]['type'], 'warning')
244
+ self.assertEqual(result['issues'][0]['file'], 'test.go')
245
+ self.assertEqual(result['issues'][0]['line'], 15)
246
+ self.assertEqual(result['issues'][0]['message'], 'S1000: should use a simple channel send/receive instead of select with a single case')
247
+
248
+ @patch('os.path.exists')
249
+ @patch('subprocess.run')
250
+ def test_analyze_rust_code(self, mock_run, mock_exists):
251
+ """Test analyze_rust_code method"""
252
+ # Set up the mocks
253
+ mock_exists.return_value = True
254
+
255
+ # Mock the subprocess.run result
256
+ mock_process = MagicMock()
257
+ mock_process.returncode = 0
258
+ mock_process.stdout = json.dumps({
259
+ "reason": "compiler-message",
260
+ "message": {
261
+ "rendered": "warning: unused variable: `x`\n --> src/main.rs:2:9\n |\n2 | let x = 5;\n | ^ help: if this is intentional, prefix it with an underscore: `_x`\n |\n = note: `#[warn(unused_variables)]` on by default\n\n",
262
+ "children": [],
263
+ "code": {
264
+ "code": "unused_variables",
265
+ "explanation": null
266
+ },
267
+ "level": "warning",
268
+ "message": "unused variable: `x`",
269
+ "spans": [
270
+ {
271
+ "byte_end": 26,
272
+ "byte_start": 25,
273
+ "column_end": 10,
274
+ "column_start": 9,
275
+ "expansion": null,
276
+ "file_name": "src/main.rs",
277
+ "is_primary": true,
278
+ "label": "help: if this is intentional, prefix it with an underscore: `_x`",
279
+ "line_end": 2,
280
+ "line_start": 2,
281
+ "suggested_replacement": "_x",
282
+ "suggestion_applicability": "MachineApplicable",
283
+ "text": [
284
+ {
285
+ "highlight_end": 10,
286
+ "highlight_start": 9,
287
+ "text": " let x = 5;"
288
+ }
289
+ ]
290
+ }
291
+ ]
292
+ }
293
+ })
294
+ mock_run.return_value = mock_process
295
+
296
+ # Call the method
297
+ result = self.analyzer.analyze_rust_code(self.test_repo_path)
298
+
299
+ # Verify the result
300
+ self.assertEqual(len(result['issues']), 1)
301
+ self.assertEqual(result['issue_count'], 1)
302
+ self.assertEqual(result['issues'][0]['type'], 'warning')
303
+ self.assertEqual(result['issues'][0]['file'], 'src/main.rs')
304
+ self.assertEqual(result['issues'][0]['line'], 2)
305
+ self.assertEqual(result['issues'][0]['message'], 'unused variable: `x`')
306
+
307
+ def test_analyze_code(self):
308
+ """Test analyze_code method"""
309
+ # Mock the language-specific analysis methods
310
+ self.analyzer.analyze_python_code = MagicMock(return_value={
311
+ 'issues': [{'type': 'convention', 'file': 'test.py', 'line': 10, 'message': 'Test issue'}],
312
+ 'issue_count': 1
313
+ })
314
+ self.analyzer.analyze_javascript_code = MagicMock(return_value={
315
+ 'issues': [{'type': 'error', 'file': 'test.js', 'line': 5, 'message': 'Test issue'}],
316
+ 'issue_count': 1
317
+ })
318
+
319
+ # Call the method
320
+ result = self.analyzer.analyze_code(self.test_repo_path, ['Python', 'JavaScript'])
321
+
322
+ # Verify the result
323
+ self.assertEqual(len(result), 2) # Two languages
324
+ self.assertIn('Python', result)
325
+ self.assertIn('JavaScript', result)
326
+ self.assertEqual(result['Python']['issue_count'], 1)
327
+ self.assertEqual(result['JavaScript']['issue_count'], 1)
328
+
329
+ # Verify the method calls
330
+ self.analyzer.analyze_python_code.assert_called_once_with(self.test_repo_path)
331
+ self.analyzer.analyze_javascript_code.assert_called_once_with(self.test_repo_path)
332
+
333
+ @patch('os.walk')
334
+ def test_find_files(self, mock_walk):
335
+ """Test _find_files method"""
336
+ # Set up the mock
337
+ mock_walk.return_value = [
338
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
339
+ ('/test/repo/dir1', [], ['file3.py'])
340
+ ]
341
+
342
+ # Call the method
343
+ python_files = self.analyzer._find_files(self.test_repo_path, '.py')
344
+
345
+ # Verify the result
346
+ self.assertEqual(len(python_files), 2)
347
+ self.assertIn('/test/repo/file1.py', python_files)
348
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
349
+
350
+ @patch('os.path.exists')
351
+ def test_check_tool_availability(self, mock_exists):
352
+ """Test _check_tool_availability method"""
353
+ # Set up the mock
354
+ mock_exists.side_effect = [True, False] # First tool exists, second doesn't
355
+
356
+ # Call the method
357
+ result1 = self.analyzer._check_tool_availability('tool1')
358
+ result2 = self.analyzer._check_tool_availability('tool2')
359
+
360
+ # Verify the result
361
+ self.assertTrue(result1)
362
+ self.assertFalse(result2)
363
+
364
+ @patch('subprocess.run')
365
+ def test_run_command(self, mock_run):
366
+ """Test _run_command method"""
367
+ # Set up the mock
368
+ mock_process = MagicMock()
369
+ mock_process.returncode = 0
370
+ mock_process.stdout = "Test output"
371
+ mock_run.return_value = mock_process
372
+
373
+ # Call the method
374
+ returncode, output = self.analyzer._run_command(['test', 'command'])
375
+
376
+ # Verify the result
377
+ self.assertEqual(returncode, 0)
378
+ self.assertEqual(output, "Test output")
379
+ mock_run.assert_called_once()
380
+
381
+
382
+ if __name__ == "__main__":
383
+ unittest.main()
tests/test_language_detector.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Language Detector
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.core.language_detector import LanguageDetector
19
+
20
+
21
+ class TestLanguageDetector(unittest.TestCase):
22
+ """Test cases for the LanguageDetector class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ self.detector = LanguageDetector()
27
+
28
+ # Create a mock repository structure
29
+ self.repo_path = "/test/repo"
30
+ self.mock_files = [
31
+ "/test/repo/main.py",
32
+ "/test/repo/utils.py",
33
+ "/test/repo/static/script.js",
34
+ "/test/repo/static/style.css",
35
+ "/test/repo/src/app.js",
36
+ "/test/repo/src/components/Button.jsx",
37
+ "/test/repo/src/components/Form.tsx",
38
+ "/test/repo/docs/index.html",
39
+ "/test/repo/README.md",
40
+ "/test/repo/package.json",
41
+ "/test/repo/Dockerfile",
42
+ "/test/repo/.gitignore"
43
+ ]
44
+
45
+ def test_get_language_from_extension(self):
46
+ """Test _get_language_from_extension method"""
47
+ # Test common extensions
48
+ self.assertEqual(self.detector._get_language_from_extension(".py"), "Python")
49
+ self.assertEqual(self.detector._get_language_from_extension(".js"), "JavaScript")
50
+ self.assertEqual(self.detector._get_language_from_extension(".jsx"), "JavaScript")
51
+ self.assertEqual(self.detector._get_language_from_extension(".ts"), "TypeScript")
52
+ self.assertEqual(self.detector._get_language_from_extension(".tsx"), "TypeScript")
53
+ self.assertEqual(self.detector._get_language_from_extension(".java"), "Java")
54
+ self.assertEqual(self.detector._get_language_from_extension(".go"), "Go")
55
+ self.assertEqual(self.detector._get_language_from_extension(".rs"), "Rust")
56
+ self.assertEqual(self.detector._get_language_from_extension(".html"), "HTML")
57
+ self.assertEqual(self.detector._get_language_from_extension(".css"), "CSS")
58
+ self.assertEqual(self.detector._get_language_from_extension(".md"), "Markdown")
59
+
60
+ # Test unknown extension
61
+ self.assertEqual(self.detector._get_language_from_extension(".unknown"), "Other")
62
+
63
+ def test_get_language_from_filename(self):
64
+ """Test _get_language_from_filename method"""
65
+ # Test common filenames
66
+ self.assertEqual(self.detector._get_language_from_filename("Dockerfile"), "Dockerfile")
67
+ self.assertEqual(self.detector._get_language_from_filename(".gitignore"), "Git")
68
+ self.assertEqual(self.detector._get_language_from_filename("package.json"), "JSON")
69
+ self.assertEqual(self.detector._get_language_from_filename("README.md"), "Markdown")
70
+
71
+ # Test unknown filename
72
+ self.assertEqual(self.detector._get_language_from_filename("unknown"), None)
73
+
74
+ @patch('os.walk')
75
+ def test_detect_languages(self, mock_walk):
76
+ """Test detect_languages method"""
77
+ # Mock os.walk to return our mock files
78
+ mock_walk.return_value = [
79
+ ("/test/repo", ["static", "src", "docs"], ["main.py", "utils.py", "README.md", "package.json", ".gitignore"]),
80
+ ("/test/repo/static", [], ["script.js", "style.css"]),
81
+ ("/test/repo/src", ["components"], ["app.js"]),
82
+ ("/test/repo/src/components", [], ["Button.jsx", "Form.tsx"]),
83
+ ("/test/repo/docs", [], ["index.html"]),
84
+ ]
85
+
86
+ # Test the method
87
+ languages = self.detector.detect_languages(self.repo_path)
88
+
89
+ # Verify the result
90
+ self.assertIn("Python", languages)
91
+ self.assertIn("JavaScript", languages)
92
+ self.assertIn("TypeScript", languages)
93
+ self.assertIn("HTML", languages)
94
+ self.assertIn("CSS", languages)
95
+ self.assertIn("Markdown", languages)
96
+ self.assertIn("JSON", languages)
97
+ self.assertIn("Git", languages)
98
+
99
+ @patch('os.walk')
100
+ @patch('builtins.open', new_callable=mock_open, read_data="line1\nline2\nline3\n")
101
+ def test_get_language_breakdown(self, mock_file, mock_walk):
102
+ """Test get_language_breakdown method"""
103
+ # Mock os.walk to return our mock files
104
+ mock_walk.return_value = [
105
+ ("/test/repo", ["static", "src"], ["main.py", "utils.py", "README.md"]),
106
+ ("/test/repo/static", [], ["script.js"]),
107
+ ("/test/repo/src", [], ["app.js"]),
108
+ ]
109
+
110
+ # Test the method
111
+ breakdown = self.detector.get_language_breakdown(self.repo_path)
112
+
113
+ # Verify the result
114
+ self.assertIn("Python", breakdown)
115
+ self.assertIn("JavaScript", breakdown)
116
+ self.assertIn("Markdown", breakdown)
117
+
118
+ # Each file has 4 lines (including the newline at the end)
119
+ self.assertEqual(breakdown["Python"]["files"], 2)
120
+ self.assertEqual(breakdown["Python"]["lines"], 8) # 2 files * 4 lines
121
+ self.assertEqual(breakdown["JavaScript"]["files"], 2)
122
+ self.assertEqual(breakdown["JavaScript"]["lines"], 8) # 2 files * 4 lines
123
+ self.assertEqual(breakdown["Markdown"]["files"], 1)
124
+ self.assertEqual(breakdown["Markdown"]["lines"], 4) # 1 file * 4 lines
125
+
126
+ # Check percentages
127
+ total_lines = 20 # 5 files * 4 lines
128
+ self.assertEqual(breakdown["Python"]["percentage"], 40) # 8/20 * 100
129
+ self.assertEqual(breakdown["JavaScript"]["percentage"], 40) # 8/20 * 100
130
+ self.assertEqual(breakdown["Markdown"]["percentage"], 20) # 4/20 * 100
131
+
132
+ @patch('os.path.isfile')
133
+ def test_is_binary_file(self, mock_isfile):
134
+ """Test _is_binary_file method"""
135
+ # Mock isfile to always return True
136
+ mock_isfile.return_value = True
137
+
138
+ # Test with text file extensions
139
+ self.assertFalse(self.detector._is_binary_file("test.py"))
140
+ self.assertFalse(self.detector._is_binary_file("test.js"))
141
+ self.assertFalse(self.detector._is_binary_file("test.html"))
142
+ self.assertFalse(self.detector._is_binary_file("test.css"))
143
+ self.assertFalse(self.detector._is_binary_file("test.md"))
144
+
145
+ # Test with binary file extensions
146
+ self.assertTrue(self.detector._is_binary_file("test.png"))
147
+ self.assertTrue(self.detector._is_binary_file("test.jpg"))
148
+ self.assertTrue(self.detector._is_binary_file("test.gif"))
149
+ self.assertTrue(self.detector._is_binary_file("test.pdf"))
150
+ self.assertTrue(self.detector._is_binary_file("test.zip"))
151
+
152
+ # Test with non-existent file
153
+ mock_isfile.return_value = False
154
+ self.assertFalse(self.detector._is_binary_file("nonexistent.py"))
155
+
156
+ @patch('os.path.isdir')
157
+ def test_should_ignore_directory(self, mock_isdir):
158
+ """Test _should_ignore_directory method"""
159
+ # Mock isdir to always return True
160
+ mock_isdir.return_value = True
161
+
162
+ # Test with common directories to ignore
163
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/node_modules"))
164
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/.git"))
165
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/__pycache__"))
166
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/venv"))
167
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/.vscode"))
168
+
169
+ # Test with directories not to ignore
170
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/src"))
171
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/app"))
172
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/docs"))
173
+
174
+ # Test with non-existent directory
175
+ mock_isdir.return_value = False
176
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/nonexistent"))
177
+
178
+ def test_should_ignore_file(self):
179
+ """Test _should_ignore_file method"""
180
+ # Test with common files to ignore
181
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/.DS_Store"))
182
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/Thumbs.db"))
183
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/.env"))
184
+
185
+ # Test with files not to ignore
186
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/main.py"))
187
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/app.js"))
188
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/README.md"))
189
+
190
+
191
+ if __name__ == "__main__":
192
+ unittest.main()
tests/test_performance_analyzer.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Performance Analyzer service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import re
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.performance_analyzer import PerformanceAnalyzer
20
+
21
+
22
+ class TestPerformanceAnalyzer(unittest.TestCase):
23
+ """Test cases for the PerformanceAnalyzer class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.analyzer = PerformanceAnalyzer()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ def test_analyze_python_performance(self):
31
+ """Test analyze_python_performance method"""
32
+ # Create a sample Python file content with performance issues
33
+ python_code = """
34
+ def slow_function():
35
+ # This is a slow list comprehension with nested loops
36
+ result = [x * y for x in range(1000) for y in range(1000)]
37
+
38
+ # Inefficient string concatenation in a loop
39
+ s = ""
40
+ for i in range(1000):
41
+ s += str(i)
42
+
43
+ # Using a list where a set would be more efficient
44
+ items = [1, 2, 3, 4, 5]
45
+ if 3 in items: # O(n) operation
46
+ print("Found")
47
+ """
48
+
49
+ # Mock the file discovery and reading
50
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.py']), \
51
+ patch('builtins.open', mock_open(read_data=python_code)):
52
+
53
+ # Call the method
54
+ result = self.analyzer.analyze_python_performance(self.test_repo_path)
55
+
56
+ # Verify the result
57
+ self.assertGreater(len(result['issues']), 0)
58
+ self.assertGreater(result['issue_count'], 0)
59
+
60
+ # Check for specific issues
61
+ nested_loop_issue = next((issue for issue in result['issues']
62
+ if 'nested loop' in issue['message'].lower()), None)
63
+ string_concat_issue = next((issue for issue in result['issues']
64
+ if 'string concatenation' in issue['message'].lower()), None)
65
+ list_vs_set_issue = next((issue for issue in result['issues']
66
+ if 'list' in issue['message'].lower() and 'set' in issue['message'].lower()), None)
67
+
68
+ self.assertIsNotNone(nested_loop_issue)
69
+ self.assertIsNotNone(string_concat_issue)
70
+ self.assertIsNotNone(list_vs_set_issue)
71
+
72
+ def test_analyze_javascript_performance(self):
73
+ """Test analyze_javascript_performance method"""
74
+ # Create a sample JavaScript file content with performance issues
75
+ js_code = """
76
+ function slowFunction() {
77
+ // Inefficient DOM manipulation in a loop
78
+ for (let i = 0; i < 1000; i++) {
79
+ document.getElementById('myElement').innerHTML += 'item ' + i;
80
+ }
81
+
82
+ // Memory leak with event listeners
83
+ document.getElementById('button').addEventListener('click', function() {
84
+ console.log('clicked');
85
+ });
86
+
87
+ // Blocking the main thread
88
+ let start = Date.now();
89
+ while (Date.now() - start < 1000) {
90
+ // Busy wait for 1 second
91
+ }
92
+ }
93
+ """
94
+
95
+ # Mock the file discovery and reading
96
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.js']), \
97
+ patch('builtins.open', mock_open(read_data=js_code)):
98
+
99
+ # Call the method
100
+ result = self.analyzer.analyze_javascript_performance(self.test_repo_path)
101
+
102
+ # Verify the result
103
+ self.assertGreater(len(result['issues']), 0)
104
+ self.assertGreater(result['issue_count'], 0)
105
+
106
+ # Check for specific issues
107
+ dom_issue = next((issue for issue in result['issues']
108
+ if 'dom' in issue['message'].lower()), None)
109
+ memory_leak_issue = next((issue for issue in result['issues']
110
+ if 'memory leak' in issue['message'].lower() or 'event listener' in issue['message'].lower()), None)
111
+ blocking_issue = next((issue for issue in result['issues']
112
+ if 'blocking' in issue['message'].lower() or 'main thread' in issue['message'].lower()), None)
113
+
114
+ self.assertIsNotNone(dom_issue)
115
+ self.assertIsNotNone(memory_leak_issue)
116
+ self.assertIsNotNone(blocking_issue)
117
+
118
+ def test_analyze_typescript_performance(self):
119
+ """Test analyze_typescript_performance method"""
120
+ # Create a sample TypeScript file content with performance issues
121
+ ts_code = """
122
+ function slowFunction(): void {
123
+ // Inefficient array operations
124
+ const array: number[] = [];
125
+ for (let i = 0; i < 1000; i++) {
126
+ array.unshift(i); // O(n) operation
127
+ }
128
+
129
+ // Excessive type casting
130
+ let value: any = "123";
131
+ let num: number = <number><any>value;
132
+
133
+ // Inefficient async/await usage
134
+ async function fetchData(): Promise<void> {
135
+ const promises = [];
136
+ for (let i = 0; i < 10; i++) {
137
+ const result = await fetch(`https://api.example.com/data/${i}`); // Sequential fetches
138
+ promises.push(result);
139
+ }
140
+ }
141
+ }
142
+ """
143
+
144
+ # Mock the file discovery and reading
145
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.ts']), \
146
+ patch('builtins.open', mock_open(read_data=ts_code)):
147
+
148
+ # Call the method
149
+ result = self.analyzer.analyze_typescript_performance(self.test_repo_path)
150
+
151
+ # Verify the result
152
+ self.assertGreater(len(result['issues']), 0)
153
+ self.assertGreater(result['issue_count'], 0)
154
+
155
+ # Check for specific issues
156
+ array_issue = next((issue for issue in result['issues']
157
+ if 'array' in issue['message'].lower() and 'unshift' in issue['message'].lower()), None)
158
+ type_casting_issue = next((issue for issue in result['issues']
159
+ if 'type casting' in issue['message'].lower()), None)
160
+ async_issue = next((issue for issue in result['issues']
161
+ if 'async' in issue['message'].lower() or 'await' in issue['message'].lower()), None)
162
+
163
+ self.assertIsNotNone(array_issue)
164
+ self.assertIsNotNone(type_casting_issue)
165
+ self.assertIsNotNone(async_issue)
166
+
167
+ def test_analyze_java_performance(self):
168
+ """Test analyze_java_performance method"""
169
+ # Create a sample Java file content with performance issues
170
+ java_code = """
171
+ public class SlowClass {
172
+ public void slowMethod() {
173
+ // Inefficient string concatenation
174
+ String result = "";
175
+ for (int i = 0; i < 1000; i++) {
176
+ result += i; // Creates a new string each time
177
+ }
178
+
179
+ // Using ArrayList where HashSet would be more efficient for lookups
180
+ ArrayList<Integer> list = new ArrayList<>();
181
+ for (int i = 0; i < 1000; i++) {
182
+ list.add(i);
183
+ }
184
+ boolean contains = list.contains(500); // O(n) operation
185
+
186
+ // Excessive object creation
187
+ for (int i = 0; i < 1000; i++) {
188
+ Integer obj = new Integer(i); // Creates 1000 objects
189
+ }
190
+ }
191
+ }
192
+ """
193
+
194
+ # Mock the file discovery and reading
195
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/SlowClass.java']), \
196
+ patch('builtins.open', mock_open(read_data=java_code)):
197
+
198
+ # Call the method
199
+ result = self.analyzer.analyze_java_performance(self.test_repo_path)
200
+
201
+ # Verify the result
202
+ self.assertGreater(len(result['issues']), 0)
203
+ self.assertGreater(result['issue_count'], 0)
204
+
205
+ # Check for specific issues
206
+ string_concat_issue = next((issue for issue in result['issues']
207
+ if 'string concatenation' in issue['message'].lower()), None)
208
+ collection_issue = next((issue for issue in result['issues']
209
+ if 'arraylist' in issue['message'].lower() and 'hashset' in issue['message'].lower()), None)
210
+ object_creation_issue = next((issue for issue in result['issues']
211
+ if 'object creation' in issue['message'].lower()), None)
212
+
213
+ self.assertIsNotNone(string_concat_issue)
214
+ self.assertIsNotNone(collection_issue)
215
+ self.assertIsNotNone(object_creation_issue)
216
+
217
+ def test_analyze_go_performance(self):
218
+ """Test analyze_go_performance method"""
219
+ # Create a sample Go file content with performance issues
220
+ go_code = """
221
+ package main
222
+
223
+ import (
224
+ "fmt"
225
+ "sync"
226
+ )
227
+
228
+ func slowFunction() {
229
+ // Inefficient slice operations
230
+ slice := []int{}
231
+ for i := 0; i < 1000; i++ {
232
+ slice = append(slice, i) // May cause reallocation
233
+ }
234
+
235
+ // Mutex instead of atomic operations
236
+ var mu sync.Mutex
237
+ counter := 0
238
+ for i := 0; i < 1000; i++ {
239
+ mu.Lock()
240
+ counter++
241
+ mu.Unlock()
242
+ }
243
+
244
+ // Inefficient string concatenation
245
+ result := ""
246
+ for i := 0; i < 1000; i++ {
247
+ result += fmt.Sprintf("%d", i) // Creates a new string each time
248
+ }
249
+ }
250
+ """
251
+
252
+ # Mock the file discovery and reading
253
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/main.go']), \
254
+ patch('builtins.open', mock_open(read_data=go_code)):
255
+
256
+ # Call the method
257
+ result = self.analyzer.analyze_go_performance(self.test_repo_path)
258
+
259
+ # Verify the result
260
+ self.assertGreater(len(result['issues']), 0)
261
+ self.assertGreater(result['issue_count'], 0)
262
+
263
+ # Check for specific issues
264
+ slice_issue = next((issue for issue in result['issues']
265
+ if 'slice' in issue['message'].lower() and 'append' in issue['message'].lower()), None)
266
+ mutex_issue = next((issue for issue in result['issues']
267
+ if 'mutex' in issue['message'].lower() or 'atomic' in issue['message'].lower()), None)
268
+ string_concat_issue = next((issue for issue in result['issues']
269
+ if 'string concatenation' in issue['message'].lower()), None)
270
+
271
+ self.assertIsNotNone(slice_issue)
272
+ self.assertIsNotNone(mutex_issue)
273
+ self.assertIsNotNone(string_concat_issue)
274
+
275
+ def test_analyze_rust_performance(self):
276
+ """Test analyze_rust_performance method"""
277
+ # Create a sample Rust file content with performance issues
278
+ rust_code = """
279
+ fn slow_function() {
280
+ // Inefficient string operations
281
+ let mut result = String::new();
282
+ for i in 0..1000 {
283
+ result.push_str(&i.to_string()); // Allocates a new string each time
284
+ }
285
+
286
+ // Excessive cloning
287
+ let data = vec![1, 2, 3, 4, 5];
288
+ let copied = data.clone(); // Clones the entire vector
289
+
290
+ // Inefficient iteration
291
+ let mut sum = 0;
292
+ for i in 0..data.len() {
293
+ sum += data[i]; // Bounds checking on each access
294
+ }
295
+ }
296
+ """
297
+
298
+ # Mock the file discovery and reading
299
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/main.rs']), \
300
+ patch('builtins.open', mock_open(read_data=rust_code)):
301
+
302
+ # Call the method
303
+ result = self.analyzer.analyze_rust_performance(self.test_repo_path)
304
+
305
+ # Verify the result
306
+ self.assertGreater(len(result['issues']), 0)
307
+ self.assertGreater(result['issue_count'], 0)
308
+
309
+ # Check for specific issues
310
+ string_issue = next((issue for issue in result['issues']
311
+ if 'string' in issue['message'].lower()), None)
312
+ clone_issue = next((issue for issue in result['issues']
313
+ if 'clone' in issue['message'].lower()), None)
314
+ iteration_issue = next((issue for issue in result['issues']
315
+ if 'iteration' in issue['message'].lower() or 'bounds checking' in issue['message'].lower()), None)
316
+
317
+ self.assertIsNotNone(string_issue)
318
+ self.assertIsNotNone(clone_issue)
319
+ self.assertIsNotNone(iteration_issue)
320
+
321
+ def test_analyze_repository(self):
322
+ """Test analyze_repository method"""
323
+ # Mock the language-specific analysis methods
324
+ self.analyzer.analyze_python_performance = MagicMock(return_value={
325
+ 'issues': [
326
+ {'file': 'file1.py', 'line': 10, 'message': 'Inefficient list comprehension'},
327
+ {'file': 'file1.py', 'line': 20, 'message': 'Inefficient string concatenation'}
328
+ ],
329
+ 'issue_count': 2
330
+ })
331
+ self.analyzer.analyze_javascript_performance = MagicMock(return_value={
332
+ 'issues': [
333
+ {'file': 'file1.js', 'line': 15, 'message': 'DOM manipulation in loop'}
334
+ ],
335
+ 'issue_count': 1
336
+ })
337
+
338
+ # Call the method
339
+ result = self.analyzer.analyze_repository(self.test_repo_path, ['Python', 'JavaScript'])
340
+
341
+ # Verify the result
342
+ self.assertEqual(len(result['language_results']), 2) # Two languages
343
+ self.assertIn('Python', result['language_results'])
344
+ self.assertIn('JavaScript', result['language_results'])
345
+ self.assertEqual(result['language_results']['Python']['issue_count'], 2)
346
+ self.assertEqual(result['language_results']['JavaScript']['issue_count'], 1)
347
+
348
+ # Check hotspots
349
+ self.assertEqual(len(result['hotspots']), 1) # One file with multiple issues
350
+ self.assertEqual(result['hotspots'][0]['file'], 'file1.py')
351
+ self.assertEqual(result['hotspots'][0]['issue_count'], 2)
352
+
353
+ # Verify the method calls
354
+ self.analyzer.analyze_python_performance.assert_called_once_with(self.test_repo_path)
355
+ self.analyzer.analyze_javascript_performance.assert_called_once_with(self.test_repo_path)
356
+
357
+ def test_identify_hotspots(self):
358
+ """Test _identify_hotspots method"""
359
+ # Create sample language results
360
+ language_results = {
361
+ 'Python': {
362
+ 'issues': [
363
+ {'file': 'file1.py', 'line': 10, 'message': 'Issue 1'},
364
+ {'file': 'file1.py', 'line': 20, 'message': 'Issue 2'},
365
+ {'file': 'file2.py', 'line': 5, 'message': 'Issue 3'}
366
+ ],
367
+ 'issue_count': 3
368
+ },
369
+ 'JavaScript': {
370
+ 'issues': [
371
+ {'file': 'file1.js', 'line': 15, 'message': 'Issue 4'},
372
+ {'file': 'file3.js', 'line': 25, 'message': 'Issue 5'},
373
+ {'file': 'file3.js', 'line': 30, 'message': 'Issue 6'}
374
+ ],
375
+ 'issue_count': 3
376
+ }
377
+ }
378
+
379
+ # Call the method
380
+ hotspots = self.analyzer._identify_hotspots(language_results)
381
+
382
+ # Verify the result
383
+ self.assertEqual(len(hotspots), 2) # Two files with multiple issues
384
+
385
+ # Find the hotspots by file
386
+ file1_py_hotspot = next((h for h in hotspots if h['file'] == 'file1.py'), None)
387
+ file3_js_hotspot = next((h for h in hotspots if h['file'] == 'file3.js'), None)
388
+
389
+ self.assertIsNotNone(file1_py_hotspot)
390
+ self.assertIsNotNone(file3_js_hotspot)
391
+ self.assertEqual(file1_py_hotspot['issue_count'], 2)
392
+ self.assertEqual(file3_js_hotspot['issue_count'], 2)
393
+
394
+ @patch('os.walk')
395
+ def test_find_files(self, mock_walk):
396
+ """Test _find_files method"""
397
+ # Set up the mock
398
+ mock_walk.return_value = [
399
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
400
+ ('/test/repo/dir1', [], ['file3.py'])
401
+ ]
402
+
403
+ # Call the method
404
+ python_files = self.analyzer._find_files(self.test_repo_path, '.py')
405
+
406
+ # Verify the result
407
+ self.assertEqual(len(python_files), 2)
408
+ self.assertIn('/test/repo/file1.py', python_files)
409
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
410
+
411
+ def test_analyze_file_with_patterns(self):
412
+ """Test _analyze_file_with_patterns method"""
413
+ # Create sample file content and patterns
414
+ file_content = """
415
+ def slow_function():
416
+ # This is a slow list comprehension
417
+ result = [x * y for x in range(1000) for y in range(1000)]
418
+
419
+ # Inefficient string concatenation
420
+ s = ""
421
+ for i in range(1000):
422
+ s += str(i)
423
+ """
424
+
425
+ patterns = [
426
+ (re.compile(r'\[.*for.*for.*\]', re.MULTILINE), "Nested list comprehension can be inefficient"),
427
+ (re.compile(r'\s+s\s\+=\s', re.MULTILINE), "String concatenation in a loop is inefficient")
428
+ ]
429
+
430
+ # Call the method
431
+ issues = self.analyzer._analyze_file_with_patterns('/test/repo/test.py', file_content, patterns)
432
+
433
+ # Verify the result
434
+ self.assertEqual(len(issues), 2) # Two patterns matched
435
+ self.assertEqual(issues[0]['file'], 'test.py') # Should be relative path
436
+ self.assertEqual(issues[1]['file'], 'test.py')
437
+ self.assertIn('Nested list comprehension', issues[0]['message'])
438
+ self.assertIn('String concatenation', issues[1]['message'])
439
+
440
+
441
+ if __name__ == "__main__":
442
+ unittest.main()
tests/test_report_generator.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Report Generator Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.report_generator import ReportGenerator
20
+
21
+
22
+ class TestReportGenerator(unittest.TestCase):
23
+ """Test cases for the ReportGenerator class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ # Create a temporary output directory for testing
28
+ self.test_output_dir = "test_reports"
29
+ self.generator = ReportGenerator(output_dir=self.test_output_dir)
30
+
31
+ # Sample test data
32
+ self.repo_name = "test-repo"
33
+ self.test_results = {
34
+ "repository_info": {
35
+ "branch": "main",
36
+ "commit": "abc123",
37
+ "remote_url": "https://github.com/test/test-repo",
38
+ "size": 1024,
39
+ "file_count": 10
40
+ },
41
+ "language_breakdown": {
42
+ "Python": {"files": 5, "lines": 500, "percentage": 70},
43
+ "JavaScript": {"files": 3, "lines": 200, "percentage": 30}
44
+ },
45
+ "code_analysis": {
46
+ "Python": {
47
+ "issue_count": 3,
48
+ "issues": [
49
+ {"severity": "high", "issue": "Unused variable", "file": "test.py", "line": 10, "description": "Variable 'x' is not used"},
50
+ {"severity": "medium", "issue": "Missing docstring", "file": "test.py", "line": 5, "description": "Function missing docstring"}
51
+ ]
52
+ },
53
+ "JavaScript": {
54
+ "issue_count": 2,
55
+ "issues": [
56
+ {"severity": "medium", "issue": "Unused variable", "file": "test.js", "line": 15, "description": "Variable 'y' is not used"}
57
+ ]
58
+ }
59
+ },
60
+ "security_scan": {
61
+ "Python": {
62
+ "vulnerability_count": 1,
63
+ "vulnerabilities": [
64
+ {"severity": "critical", "issue": "SQL Injection", "file": "db.py", "line": 25, "description": "Unsanitized SQL query"}
65
+ ]
66
+ },
67
+ "JavaScript": {
68
+ "vulnerability_count": 0,
69
+ "vulnerabilities": []
70
+ }
71
+ },
72
+ "performance_analysis": {
73
+ "language_results": {
74
+ "Python": {
75
+ "issue_count": 2,
76
+ "issues": [
77
+ {"issue": "Inefficient loop", "file": "test.py", "line": 20, "description": "Use list comprehension instead"}
78
+ ]
79
+ }
80
+ },
81
+ "hotspots": [
82
+ {"file": "test.py", "language": "Python", "issue_count": 2}
83
+ ]
84
+ },
85
+ "ai_review": {
86
+ "reviews": {
87
+ "test.py": {
88
+ "status": "success",
89
+ "review_text": "Code review for test.py",
90
+ "suggestions": [
91
+ {"section": "Code Quality", "line": 10, "description": "Variable 'x' is not used", "details": "Remove unused variable"}
92
+ ]
93
+ }
94
+ },
95
+ "summary": "Overall, the code quality is good but there are some issues to address."
96
+ }
97
+ }
98
+
99
+ def tearDown(self):
100
+ """Tear down test fixtures"""
101
+ # Clean up the test output directory
102
+ if os.path.exists(self.test_output_dir):
103
+ for file in os.listdir(self.test_output_dir):
104
+ os.remove(os.path.join(self.test_output_dir, file))
105
+ os.rmdir(self.test_output_dir)
106
+
107
+ def test_init(self):
108
+ """Test initialization of the generator"""
109
+ self.assertIsNotNone(self.generator)
110
+ self.assertEqual(self.generator.output_dir, self.test_output_dir)
111
+ self.assertTrue(os.path.exists(self.test_output_dir))
112
+
113
+ @patch('builtins.open', new_callable=mock_open)
114
+ @patch('json.dump')
115
+ def test_generate_json_report(self, mock_json_dump, mock_file_open):
116
+ """Test _generate_json_report method"""
117
+ # Call the method
118
+ report_content = {"test": "content"}
119
+ report_path = self.generator._generate_json_report("test_report", report_content)
120
+
121
+ # Verify the result
122
+ expected_path = os.path.join(self.test_output_dir, "test_report.json")
123
+ self.assertEqual(report_path, expected_path)
124
+ mock_file_open.assert_called_once_with(expected_path, "w", encoding="utf-8")
125
+ mock_json_dump.assert_called_once()
126
+
127
+ @patch('builtins.open', new_callable=mock_open)
128
+ @patch('markdown.markdown')
129
+ def test_generate_html_report(self, mock_markdown, mock_file_open):
130
+ """Test _generate_html_report method"""
131
+ # Mock markdown conversion
132
+ mock_markdown.return_value = "<h1>Test</h1>"
133
+
134
+ # Call the method
135
+ report_content = {"metadata": {"repository_name": "test-repo"}}
136
+ report_path = self.generator._generate_html_report("test_report", report_content)
137
+
138
+ # Verify the result
139
+ expected_path = os.path.join(self.test_output_dir, "test_report.html")
140
+ self.assertEqual(report_path, expected_path)
141
+ mock_file_open.assert_called_once_with(expected_path, "w", encoding="utf-8")
142
+ mock_markdown.assert_called_once()
143
+
144
+ @patch('pdfkit.from_file')
145
+ @patch('os.remove')
146
+ def test_generate_pdf_report(self, mock_remove, mock_pdfkit):
147
+ """Test _generate_pdf_report method"""
148
+ # Mock the HTML report generation
149
+ with patch.object(self.generator, '_generate_html_report') as mock_html_report:
150
+ mock_html_report.return_value = os.path.join(self.test_output_dir, "test_report_temp.html")
151
+
152
+ # Call the method
153
+ report_content = {"test": "content"}
154
+ report_path = self.generator._generate_pdf_report("test_report", report_content)
155
+
156
+ # Verify the result
157
+ expected_path = os.path.join(self.test_output_dir, "test_report.pdf")
158
+ self.assertEqual(report_path, expected_path)
159
+ mock_html_report.assert_called_once_with("test_report_temp", report_content)
160
+ mock_pdfkit.assert_called_once_with(
161
+ os.path.join(self.test_output_dir, "test_report_temp.html"),
162
+ expected_path
163
+ )
164
+ mock_remove.assert_called_once_with(os.path.join(self.test_output_dir, "test_report_temp.html"))
165
+
166
+ @patch('builtins.open', new_callable=mock_open)
167
+ @patch('csv.DictWriter')
168
+ def test_generate_csv_report(self, mock_csv_writer, mock_file_open):
169
+ """Test _generate_csv_report method"""
170
+ # Mock CSV writer
171
+ mock_writer = MagicMock()
172
+ mock_csv_writer.return_value = mock_writer
173
+
174
+ # Call the method
175
+ report_content = {
176
+ "code_quality": {"issues_by_language": {}},
177
+ "security": {"vulnerabilities_by_language": {}},
178
+ "performance": {"issues_by_language": {}},
179
+ "ai_review": {"file_reviews": {}}
180
+ }
181
+ report_path = self.generator._generate_csv_report("test_report", report_content)
182
+
183
+ # Verify the result
184
+ expected_path = os.path.join(self.test_output_dir, "test_report.csv")
185
+ self.assertEqual(report_path, expected_path)
186
+ mock_file_open.assert_called_once_with(expected_path, "w", newline="", encoding="utf-8")
187
+ mock_writer.writeheader.assert_called_once()
188
+ mock_writer.writerows.assert_called_once()
189
+
190
+ def test_calculate_summary_metrics(self):
191
+ """Test _calculate_summary_metrics method"""
192
+ # Call the method
193
+ metrics = self.generator._calculate_summary_metrics(self.test_results)
194
+
195
+ # Verify the result
196
+ self.assertEqual(metrics["total_files"], 10)
197
+ self.assertEqual(metrics["repository_size"], 1024)
198
+ self.assertEqual(metrics["total_code_issues"], 5) # 3 Python + 2 JavaScript
199
+ self.assertEqual(metrics["critical_code_issues"], 1) # 1 high severity issue
200
+ self.assertEqual(metrics["total_vulnerabilities"], 1) # 1 Python vulnerability
201
+ self.assertEqual(metrics["critical_vulnerabilities"], 1) # 1 critical vulnerability
202
+ self.assertEqual(metrics["total_performance_issues"], 2) # 2 Python performance issues
203
+ self.assertEqual(metrics["performance_hotspots"], 1) # 1 hotspot
204
+ self.assertIn("overall_score", metrics)
205
+ self.assertIn("quality_rating", metrics)
206
+
207
+ def test_extract_top_issues(self):
208
+ """Test _extract_top_issues method"""
209
+ # Call the method
210
+ top_issues = self.generator._extract_top_issues(self.test_results["code_analysis"])
211
+
212
+ # Verify the result
213
+ self.assertEqual(len(top_issues), 3) # Total issues in the test data
214
+ self.assertEqual(top_issues[0]["severity"], "high") # First issue should be high severity
215
+
216
+ def test_extract_critical_vulnerabilities(self):
217
+ """Test _extract_critical_vulnerabilities method"""
218
+ # Call the method
219
+ critical_vulns = self.generator._extract_critical_vulnerabilities(self.test_results["security_scan"])
220
+
221
+ # Verify the result
222
+ self.assertEqual(len(critical_vulns), 1) # Only one vulnerability in the test data
223
+ self.assertEqual(critical_vulns[0]["severity"], "critical")
224
+
225
+ def test_generate_recommendations(self):
226
+ """Test _generate_recommendations method"""
227
+ # Call the method
228
+ recommendations = self.generator._generate_recommendations(self.test_results)
229
+
230
+ # Verify the result
231
+ self.assertIn("high_priority", recommendations)
232
+ self.assertIn("medium_priority", recommendations)
233
+ self.assertIn("low_priority", recommendations)
234
+ self.assertEqual(len(recommendations["high_priority"]), 1) # One critical security vulnerability
235
+ self.assertGreaterEqual(len(recommendations["medium_priority"]), 1) # At least one high code issue
236
+
237
+ @patch('os.path.exists')
238
+ @patch('os.listdir')
239
+ def test_generate_report(self, mock_listdir, mock_exists):
240
+ """Test generate_report method"""
241
+ # Mock the report generation methods
242
+ with patch.object(self.generator, '_create_report_content') as mock_create_content, \
243
+ patch.object(self.generator, '_generate_json_report') as mock_json_report, \
244
+ patch.object(self.generator, '_generate_html_report') as mock_html_report, \
245
+ patch.object(self.generator, '_generate_pdf_report') as mock_pdf_report, \
246
+ patch.object(self.generator, '_generate_csv_report') as mock_csv_report:
247
+
248
+ # Set up the mocks
249
+ mock_create_content.return_value = {"test": "content"}
250
+ mock_json_report.return_value = "json_path"
251
+ mock_html_report.return_value = "html_path"
252
+ mock_pdf_report.return_value = "pdf_path"
253
+ mock_csv_report.return_value = "csv_path"
254
+
255
+ # Call the method with all formats
256
+ report_paths = self.generator.generate_report(self.repo_name, self.test_results, "all")
257
+
258
+ # Verify the result
259
+ self.assertEqual(report_paths["json"], "json_path")
260
+ self.assertEqual(report_paths["html"], "html_path")
261
+ self.assertEqual(report_paths["pdf"], "pdf_path")
262
+ self.assertEqual(report_paths["csv"], "csv_path")
263
+ mock_create_content.assert_called_once_with(self.repo_name, self.test_results)
264
+
265
+ # Call the method with specific format
266
+ report_paths = self.generator.generate_report(self.repo_name, self.test_results, "json")
267
+
268
+ # Verify the result
269
+ self.assertEqual(len(report_paths), 1)
270
+ self.assertEqual(report_paths["json"], "json_path")
271
+
272
+
273
+ if __name__ == "__main__":
274
+ unittest.main()
tests/test_repository_service.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Repository Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ import shutil
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.repository_service import (
20
+ validate_github_url,
21
+ normalize_github_url,
22
+ extract_repo_name,
23
+ clone_repository,
24
+ get_repository_info,
25
+ cleanup_repository,
26
+ cleanup_all_repositories
27
+ )
28
+
29
+
30
+ class TestRepositoryService(unittest.TestCase):
31
+ """Test cases for the repository service functions"""
32
+
33
+ def setUp(self):
34
+ """Set up test fixtures"""
35
+ self.test_repo_dir = "test_repos"
36
+ os.makedirs(self.test_repo_dir, exist_ok=True)
37
+
38
+ def tearDown(self):
39
+ """Tear down test fixtures"""
40
+ if os.path.exists(self.test_repo_dir):
41
+ shutil.rmtree(self.test_repo_dir)
42
+
43
+ def test_validate_github_url(self):
44
+ """Test validate_github_url function"""
45
+ # Valid URLs
46
+ self.assertTrue(validate_github_url("https://github.com/user/repo"))
47
+ self.assertTrue(validate_github_url("https://github.com/user/repo.git"))
48
+ self.assertTrue(validate_github_url("[email protected]:user/repo.git"))
49
+ self.assertTrue(validate_github_url("https://github.com/user/repo-with-dash"))
50
+ self.assertTrue(validate_github_url("https://github.com/user/repo_with_underscore"))
51
+
52
+ # Invalid URLs
53
+ self.assertFalse(validate_github_url("https://gitlab.com/user/repo"))
54
+ self.assertFalse(validate_github_url("https://github.com"))
55
+ self.assertFalse(validate_github_url("https://github.com/user"))
56
+ self.assertFalse(validate_github_url("not a url"))
57
+
58
+ def test_normalize_github_url(self):
59
+ """Test normalize_github_url function"""
60
+ # HTTPS URLs
61
+ self.assertEqual(
62
+ normalize_github_url("https://github.com/user/repo"),
63
+ "https://github.com/user/repo.git"
64
+ )
65
+ self.assertEqual(
66
+ normalize_github_url("https://github.com/user/repo.git"),
67
+ "https://github.com/user/repo.git"
68
+ )
69
+
70
+ # SSH URLs
71
+ self.assertEqual(
72
+ normalize_github_url("[email protected]:user/repo.git"),
73
+ "https://github.com/user/repo.git"
74
+ )
75
+ self.assertEqual(
76
+ normalize_github_url("[email protected]:user/repo"),
77
+ "https://github.com/user/repo.git"
78
+ )
79
+
80
+ # URLs with trailing slashes
81
+ self.assertEqual(
82
+ normalize_github_url("https://github.com/user/repo/"),
83
+ "https://github.com/user/repo.git"
84
+ )
85
+
86
+ # Invalid URLs should return None
87
+ self.assertIsNone(normalize_github_url("https://gitlab.com/user/repo"))
88
+ self.assertIsNone(normalize_github_url("not a url"))
89
+
90
+ def test_extract_repo_name(self):
91
+ """Test extract_repo_name function"""
92
+ self.assertEqual(extract_repo_name("https://github.com/user/repo"), "repo")
93
+ self.assertEqual(extract_repo_name("https://github.com/user/repo.git"), "repo")
94
+ self.assertEqual(extract_repo_name("[email protected]:user/repo.git"), "repo")
95
+ self.assertEqual(extract_repo_name("https://github.com/user/repo-with-dash"), "repo-with-dash")
96
+
97
+ # Invalid URLs should return None
98
+ self.assertIsNone(extract_repo_name("https://github.com"))
99
+ self.assertIsNone(extract_repo_name("not a url"))
100
+
101
+ @patch('git.Repo.clone_from')
102
+ def test_clone_repository(self, mock_clone_from):
103
+ """Test clone_repository function"""
104
+ # Mock the Git clone operation
105
+ mock_repo = MagicMock()
106
+ mock_clone_from.return_value = mock_repo
107
+
108
+ # Test with default branch
109
+ repo_path = clone_repository(
110
+ "https://github.com/user/repo",
111
+ output_dir=self.test_repo_dir
112
+ )
113
+
114
+ # Verify the result
115
+ expected_path = os.path.join(self.test_repo_dir, "repo")
116
+ self.assertEqual(repo_path, expected_path)
117
+ mock_clone_from.assert_called_once()
118
+
119
+ # Test with specific branch
120
+ mock_clone_from.reset_mock()
121
+ repo_path = clone_repository(
122
+ "https://github.com/user/repo",
123
+ branch="dev",
124
+ output_dir=self.test_repo_dir
125
+ )
126
+
127
+ # Verify the result
128
+ self.assertEqual(repo_path, expected_path)
129
+ mock_clone_from.assert_called_once()
130
+
131
+ # Test with invalid URL
132
+ with self.assertRaises(ValueError):
133
+ clone_repository(
134
+ "not a url",
135
+ output_dir=self.test_repo_dir
136
+ )
137
+
138
+ @patch('git.Repo')
139
+ @patch('os.path.getsize')
140
+ @patch('os.walk')
141
+ def test_get_repository_info(self, mock_walk, mock_getsize, mock_repo):
142
+ """Test get_repository_info function"""
143
+ # Mock the Git repository
144
+ mock_repo_instance = MagicMock()
145
+ mock_repo.return_value = mock_repo_instance
146
+
147
+ # Mock the active branch
148
+ mock_branch = MagicMock()
149
+ mock_branch.name = "main"
150
+ mock_repo_instance.active_branch = mock_branch
151
+
152
+ # Mock the head commit
153
+ mock_commit = MagicMock()
154
+ mock_commit.hexsha = "abc123"
155
+ mock_repo_instance.head.commit = mock_commit
156
+
157
+ # Mock the remote URL
158
+ mock_remote = MagicMock()
159
+ mock_remote.url = "https://github.com/user/repo.git"
160
+ mock_repo_instance.remotes.origin = mock_remote
161
+
162
+ # Mock the repository size
163
+ mock_getsize.return_value = 1024
164
+
165
+ # Mock the file count
166
+ mock_walk.return_value = [
167
+ ("/test/repo", ["dir1"], ["file1.py", "file2.py"]),
168
+ ("/test/repo/dir1", [], ["file3.py"])
169
+ ]
170
+
171
+ # Test the function
172
+ repo_info = get_repository_info("/test/repo")
173
+
174
+ # Verify the result
175
+ self.assertEqual(repo_info["branch"], "main")
176
+ self.assertEqual(repo_info["commit"], "abc123")
177
+ self.assertEqual(repo_info["remote_url"], "https://github.com/user/repo.git")
178
+ self.assertEqual(repo_info["size"], 1024)
179
+ self.assertEqual(repo_info["file_count"], 3)
180
+
181
+ @patch('shutil.rmtree')
182
+ @patch('os.path.exists')
183
+ def test_cleanup_repository(self, mock_exists, mock_rmtree):
184
+ """Test cleanup_repository function"""
185
+ # Mock the path exists check
186
+ mock_exists.return_value = True
187
+
188
+ # Test the function
189
+ cleanup_repository("/test/repo")
190
+
191
+ # Verify the result
192
+ mock_exists.assert_called_once_with("/test/repo")
193
+ mock_rmtree.assert_called_once_with("/test/repo")
194
+
195
+ # Test with non-existent path
196
+ mock_exists.reset_mock()
197
+ mock_rmtree.reset_mock()
198
+ mock_exists.return_value = False
199
+
200
+ cleanup_repository("/test/repo")
201
+
202
+ mock_exists.assert_called_once_with("/test/repo")
203
+ mock_rmtree.assert_not_called()
204
+
205
+ @patch('os.listdir')
206
+ @patch('os.path.isdir')
207
+ @patch('shutil.rmtree')
208
+ def test_cleanup_all_repositories(self, mock_rmtree, mock_isdir, mock_listdir):
209
+ """Test cleanup_all_repositories function"""
210
+ # Mock the directory listing
211
+ mock_listdir.return_value = ["repo1", "repo2", "file.txt"]
212
+
213
+ # Mock the isdir check
214
+ mock_isdir.side_effect = lambda path: path.endswith("repo1") or path.endswith("repo2")
215
+
216
+ # Test the function
217
+ cleanup_all_repositories(self.test_repo_dir)
218
+
219
+ # Verify the result
220
+ mock_listdir.assert_called_once_with(self.test_repo_dir)
221
+ self.assertEqual(mock_isdir.call_count, 3) # Called for each item in the directory
222
+ self.assertEqual(mock_rmtree.call_count, 2) # Called for each directory
223
+
224
+
225
+ if __name__ == "__main__":
226
+ unittest.main()
tests/test_security_scanner.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Security Scanner service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.security_scanner import SecurityScanner
20
+
21
+
22
+ class TestSecurityScanner(unittest.TestCase):
23
+ """Test cases for the SecurityScanner class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.scanner = SecurityScanner()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ @patch('os.path.exists')
31
+ @patch('subprocess.run')
32
+ def test_scan_python_dependencies(self, mock_run, mock_exists):
33
+ """Test scan_python_dependencies method"""
34
+ # Set up the mocks
35
+ mock_exists.return_value = True
36
+
37
+ # Mock the requirements.txt file
38
+ with patch('builtins.open', mock_open(read_data="requests==2.25.1\ndjango==2.2.0\n")):
39
+ # Mock the subprocess.run result
40
+ mock_process = MagicMock()
41
+ mock_process.returncode = 0
42
+ mock_process.stdout = json.dumps({
43
+ "vulnerabilities": [
44
+ {
45
+ "package_name": "django",
46
+ "vulnerable_spec": "<2.2.28",
47
+ "installed_version": "2.2.0",
48
+ "description": "Django before 2.2.28 has a potential directory traversal via ../ in the file name.",
49
+ "id": "CVE-2022-34265",
50
+ "cvss_v3_score": "7.5"
51
+ }
52
+ ]
53
+ })
54
+ mock_run.return_value = mock_process
55
+
56
+ # Call the method
57
+ result = self.scanner.scan_python_dependencies(self.test_repo_path)
58
+
59
+ # Verify the result
60
+ self.assertEqual(len(result['vulnerabilities']), 1)
61
+ self.assertEqual(result['vulnerability_count'], 1)
62
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'django')
63
+ self.assertEqual(result['vulnerabilities'][0]['installed_version'], '2.2.0')
64
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'CVE-2022-34265')
65
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high') # 7.5 maps to high
66
+
67
+ @patch('os.path.exists')
68
+ @patch('subprocess.run')
69
+ def test_scan_javascript_dependencies(self, mock_run, mock_exists):
70
+ """Test scan_javascript_dependencies method"""
71
+ # Set up the mocks
72
+ mock_exists.return_value = True
73
+
74
+ # Mock the subprocess.run result
75
+ mock_process = MagicMock()
76
+ mock_process.returncode = 0
77
+ mock_process.stdout = json.dumps({
78
+ "vulnerabilities": {
79
+ "lodash": [
80
+ {
81
+ "name": "lodash",
82
+ "severity": "high",
83
+ "via": [
84
+ {
85
+ "source": 1065,
86
+ "name": "lodash",
87
+ "dependency": "lodash",
88
+ "title": "Prototype Pollution",
89
+ "url": "https://npmjs.com/advisories/1065",
90
+ "severity": "high",
91
+ "range": "<4.17.12"
92
+ }
93
+ ],
94
+ "effects": [],
95
+ "range": "<4.17.12",
96
+ "nodes": ["node_modules/lodash"],
97
+ "fixAvailable": true
98
+ }
99
+ ]
100
+ }
101
+ })
102
+ mock_run.return_value = mock_process
103
+
104
+ # Call the method
105
+ result = self.scanner.scan_javascript_dependencies(self.test_repo_path)
106
+
107
+ # Verify the result
108
+ self.assertEqual(len(result['vulnerabilities']), 1)
109
+ self.assertEqual(result['vulnerability_count'], 1)
110
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'lodash')
111
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high')
112
+ self.assertEqual(result['vulnerabilities'][0]['title'], 'Prototype Pollution')
113
+
114
+ @patch('os.path.exists')
115
+ @patch('subprocess.run')
116
+ def test_scan_go_dependencies(self, mock_run, mock_exists):
117
+ """Test scan_go_dependencies method"""
118
+ # Set up the mocks
119
+ mock_exists.return_value = True
120
+
121
+ # Mock the subprocess.run result
122
+ mock_process = MagicMock()
123
+ mock_process.returncode = 0
124
+ mock_process.stdout = json.dumps({
125
+ "Vulns": [
126
+ {
127
+ "ID": "GO-2020-0015",
128
+ "Details": "Improper certificate validation in crypto/x509",
129
+ "Affected": [
130
+ {
131
+ "Module": {
132
+ "Path": "golang.org/x/crypto",
133
+ "Versions": [
134
+ {
135
+ "Fixed": "v0.0.0-20200221170555-0f29369cfe45"
136
+ }
137
+ ]
138
+ },
139
+ "Packages": [
140
+ {
141
+ "Path": "golang.org/x/crypto/cryptobyte",
142
+ "Symbols": ["String.ReadASN1"]
143
+ }
144
+ ]
145
+ }
146
+ ],
147
+ "References": [
148
+ {
149
+ "Type": "FIX",
150
+ "URL": "https://go.dev/cl/219877"
151
+ },
152
+ {
153
+ "Type": "REPORT",
154
+ "URL": "https://go.dev/issue/36837"
155
+ },
156
+ {
157
+ "Type": "WEB",
158
+ "URL": "https://nvd.nist.gov/vuln/detail/CVE-2020-7919"
159
+ }
160
+ ],
161
+ "Description": "Due to improper bounds checking, maliciously crafted X.509 certificates can cause a panic in certificate verification.",
162
+ "CVEs": ["CVE-2020-7919"],
163
+ "Severity": "MODERATE"
164
+ }
165
+ ]
166
+ })
167
+ mock_run.return_value = mock_process
168
+
169
+ # Call the method
170
+ result = self.scanner.scan_go_dependencies(self.test_repo_path)
171
+
172
+ # Verify the result
173
+ self.assertEqual(len(result['vulnerabilities']), 1)
174
+ self.assertEqual(result['vulnerability_count'], 1)
175
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'golang.org/x/crypto')
176
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'GO-2020-0015')
177
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'medium') # MODERATE maps to medium
178
+
179
+ @patch('os.path.exists')
180
+ @patch('subprocess.run')
181
+ def test_scan_rust_dependencies(self, mock_run, mock_exists):
182
+ """Test scan_rust_dependencies method"""
183
+ # Set up the mocks
184
+ mock_exists.return_value = True
185
+
186
+ # Mock the subprocess.run result
187
+ mock_process = MagicMock()
188
+ mock_process.returncode = 0
189
+ mock_process.stdout = json.dumps({
190
+ "vulnerabilities": {
191
+ "RUSTSEC-2020-0071": {
192
+ "advisory": {
193
+ "id": "RUSTSEC-2020-0071",
194
+ "package": "smallvec",
195
+ "title": "Buffer overflow in SmallVec::insert_many",
196
+ "description": "Affected versions of smallvec did not properly calculate capacity when inserting multiple elements, which could result in a buffer overflow.",
197
+ "date": "2020-12-02",
198
+ "aliases": ["CVE-2021-25900"],
199
+ "categories": ["memory-corruption"],
200
+ "keywords": ["buffer-overflow", "heap-overflow"],
201
+ "cvss": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
202
+ "related": []
203
+ },
204
+ "versions": {
205
+ "patched": [">=1.6.1"],
206
+ "unaffected": ["<1.0.0"]
207
+ },
208
+ "affected": {
209
+ "arch": [],
210
+ "os": [],
211
+ "functions": ["smallvec::SmallVec::insert_many"]
212
+ }
213
+ }
214
+ },
215
+ "warnings": []
216
+ })
217
+ mock_run.return_value = mock_process
218
+
219
+ # Call the method
220
+ result = self.scanner.scan_rust_dependencies(self.test_repo_path)
221
+
222
+ # Verify the result
223
+ self.assertEqual(len(result['vulnerabilities']), 1)
224
+ self.assertEqual(result['vulnerability_count'], 1)
225
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'smallvec')
226
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'RUSTSEC-2020-0071')
227
+ self.assertEqual(result['vulnerabilities'][0]['title'], 'Buffer overflow in SmallVec::insert_many')
228
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'critical') # CVSS 9.8 maps to critical
229
+
230
+ @patch('os.path.exists')
231
+ @patch('subprocess.run')
232
+ def test_scan_python_code(self, mock_run, mock_exists):
233
+ """Test scan_python_code method"""
234
+ # Set up the mocks
235
+ mock_exists.return_value = True
236
+
237
+ # Mock the subprocess.run result
238
+ mock_process = MagicMock()
239
+ mock_process.returncode = 0
240
+ mock_process.stdout = json.dumps({
241
+ "results": [
242
+ {
243
+ "filename": "test.py",
244
+ "line_number": 42,
245
+ "issue_severity": "HIGH",
246
+ "issue_confidence": "HIGH",
247
+ "issue_text": "Possible hardcoded password: 'super_secret'",
248
+ "test_id": "B105",
249
+ "test_name": "hardcoded_password_string"
250
+ }
251
+ ]
252
+ })
253
+ mock_run.return_value = mock_process
254
+
255
+ # Mock the file discovery
256
+ with patch.object(self.scanner, '_find_files', return_value=['/test/repo/test.py']):
257
+ # Call the method
258
+ result = self.scanner.scan_python_code(self.test_repo_path)
259
+
260
+ # Verify the result
261
+ self.assertEqual(len(result['vulnerabilities']), 1)
262
+ self.assertEqual(result['vulnerability_count'], 1)
263
+ self.assertEqual(result['vulnerabilities'][0]['file'], 'test.py')
264
+ self.assertEqual(result['vulnerabilities'][0]['line'], 42)
265
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high')
266
+ self.assertEqual(result['vulnerabilities'][0]['message'], "Possible hardcoded password: 'super_secret'")
267
+
268
+ @patch('os.path.exists')
269
+ @patch('subprocess.run')
270
+ def test_scan_javascript_code(self, mock_run, mock_exists):
271
+ """Test scan_javascript_code method"""
272
+ # Set up the mocks
273
+ mock_exists.return_value = True
274
+
275
+ # Mock the subprocess.run result
276
+ mock_process = MagicMock()
277
+ mock_process.returncode = 0
278
+ mock_process.stdout = json.dumps([
279
+ {
280
+ "filePath": "/test/repo/test.js",
281
+ "messages": [
282
+ {
283
+ "ruleId": "security/detect-eval-with-expression",
284
+ "severity": 2,
285
+ "message": "eval() with variable content can allow an attacker to run arbitrary code.",
286
+ "line": 10,
287
+ "column": 1,
288
+ "nodeType": "CallExpression"
289
+ }
290
+ ],
291
+ "errorCount": 1,
292
+ "warningCount": 0,
293
+ "fixableErrorCount": 0,
294
+ "fixableWarningCount": 0
295
+ }
296
+ ])
297
+ mock_run.return_value = mock_process
298
+
299
+ # Mock the file discovery
300
+ with patch.object(self.scanner, '_find_files', return_value=['/test/repo/test.js']):
301
+ # Call the method
302
+ result = self.scanner.scan_javascript_code(self.test_repo_path)
303
+
304
+ # Verify the result
305
+ self.assertEqual(len(result['vulnerabilities']), 1)
306
+ self.assertEqual(result['vulnerability_count'], 1)
307
+ self.assertEqual(result['vulnerabilities'][0]['file'], 'test.js')
308
+ self.assertEqual(result['vulnerabilities'][0]['line'], 10)
309
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high') # Severity 2 maps to high
310
+ self.assertEqual(result['vulnerabilities'][0]['message'], "eval() with variable content can allow an attacker to run arbitrary code.")
311
+
312
+ def test_scan_repository(self):
313
+ """Test scan_repository method"""
314
+ # Mock the language-specific scanning methods
315
+ self.scanner.scan_python_dependencies = MagicMock(return_value={
316
+ 'vulnerabilities': [{'package': 'django', 'vulnerability_id': 'CVE-2022-34265', 'severity': 'high'}],
317
+ 'vulnerability_count': 1
318
+ })
319
+ self.scanner.scan_python_code = MagicMock(return_value={
320
+ 'vulnerabilities': [{'file': 'test.py', 'line': 42, 'severity': 'high'}],
321
+ 'vulnerability_count': 1
322
+ })
323
+ self.scanner.scan_javascript_dependencies = MagicMock(return_value={
324
+ 'vulnerabilities': [{'package': 'lodash', 'severity': 'high'}],
325
+ 'vulnerability_count': 1
326
+ })
327
+ self.scanner.scan_javascript_code = MagicMock(return_value={
328
+ 'vulnerabilities': [{'file': 'test.js', 'line': 10, 'severity': 'high'}],
329
+ 'vulnerability_count': 1
330
+ })
331
+
332
+ # Call the method
333
+ result = self.scanner.scan_repository(self.test_repo_path, ['Python', 'JavaScript'])
334
+
335
+ # Verify the result
336
+ self.assertEqual(len(result), 2) # Two languages
337
+ self.assertIn('Python', result)
338
+ self.assertIn('JavaScript', result)
339
+
340
+ # Check Python results
341
+ self.assertEqual(result['Python']['dependency_vulnerabilities']['vulnerability_count'], 1)
342
+ self.assertEqual(result['Python']['code_vulnerabilities']['vulnerability_count'], 1)
343
+ self.assertEqual(result['Python']['total_vulnerabilities'], 2)
344
+
345
+ # Check JavaScript results
346
+ self.assertEqual(result['JavaScript']['dependency_vulnerabilities']['vulnerability_count'], 1)
347
+ self.assertEqual(result['JavaScript']['code_vulnerabilities']['vulnerability_count'], 1)
348
+ self.assertEqual(result['JavaScript']['total_vulnerabilities'], 2)
349
+
350
+ # Verify the method calls
351
+ self.scanner.scan_python_dependencies.assert_called_once_with(self.test_repo_path)
352
+ self.scanner.scan_python_code.assert_called_once_with(self.test_repo_path)
353
+ self.scanner.scan_javascript_dependencies.assert_called_once_with(self.test_repo_path)
354
+ self.scanner.scan_javascript_code.assert_called_once_with(self.test_repo_path)
355
+
356
+ @patch('os.walk')
357
+ def test_find_files(self, mock_walk):
358
+ """Test _find_files method"""
359
+ # Set up the mock
360
+ mock_walk.return_value = [
361
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
362
+ ('/test/repo/dir1', [], ['file3.py'])
363
+ ]
364
+
365
+ # Call the method
366
+ python_files = self.scanner._find_files(self.test_repo_path, '.py')
367
+
368
+ # Verify the result
369
+ self.assertEqual(len(python_files), 2)
370
+ self.assertIn('/test/repo/file1.py', python_files)
371
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
372
+
373
+ @patch('os.path.exists')
374
+ def test_check_tool_availability(self, mock_exists):
375
+ """Test _check_tool_availability method"""
376
+ # Set up the mock
377
+ mock_exists.side_effect = [True, False] # First tool exists, second doesn't
378
+
379
+ # Call the method
380
+ result1 = self.scanner._check_tool_availability('tool1')
381
+ result2 = self.scanner._check_tool_availability('tool2')
382
+
383
+ # Verify the result
384
+ self.assertTrue(result1)
385
+ self.assertFalse(result2)
386
+
387
+ @patch('subprocess.run')
388
+ def test_run_command(self, mock_run):
389
+ """Test _run_command method"""
390
+ # Set up the mock
391
+ mock_process = MagicMock()
392
+ mock_process.returncode = 0
393
+ mock_process.stdout = "Test output"
394
+ mock_run.return_value = mock_process
395
+
396
+ # Call the method
397
+ returncode, output = self.scanner._run_command(['test', 'command'])
398
+
399
+ # Verify the result
400
+ self.assertEqual(returncode, 0)
401
+ self.assertEqual(output, "Test output")
402
+ mock_run.assert_called_once()
403
+
404
+ def test_map_cvss_to_severity(self):
405
+ """Test _map_cvss_to_severity method"""
406
+ # Call the method with different CVSS scores
407
+ low = self.scanner._map_cvss_to_severity(3.5)
408
+ medium = self.scanner._map_cvss_to_severity(5.5)
409
+ high = self.scanner._map_cvss_to_severity(8.0)
410
+ critical = self.scanner._map_cvss_to_severity(9.5)
411
+
412
+ # Verify the results
413
+ self.assertEqual(low, 'low')
414
+ self.assertEqual(medium, 'medium')
415
+ self.assertEqual(high, 'high')
416
+ self.assertEqual(critical, 'critical')
417
+
418
+
419
+ if __name__ == "__main__":
420
+ unittest.main()