CodeReviewAgent / src /services /security_scanner.py
c1r3x's picture
Review Agent: first commit
88d205f
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Security Scanner Service
This module provides functionality for scanning code for security vulnerabilities.
"""
import os
import subprocess
import logging
import json
import tempfile
import concurrent.futures
from collections import defaultdict
logger = logging.getLogger(__name__)
class SecurityScanner:
"""
Service for scanning code for security vulnerabilities.
"""
def __init__(self):
"""
Initialize the SecurityScanner.
"""
logger.info("Initialized SecurityScanner")
self.scanners = {
'Python': self._scan_python,
'JavaScript': self._scan_javascript,
'TypeScript': self._scan_javascript, # TypeScript uses the same scanner as JavaScript
'Java': self._scan_java,
'Go': self._scan_go,
'Rust': self._scan_rust,
}
def scan_repository(self, repo_path, languages):
"""
Scan a repository for security vulnerabilities in the specified languages using parallel processing.
Args:
repo_path (str): The path to the repository.
languages (list): A list of programming languages to scan.
Returns:
dict: A dictionary containing scan results for each language.
"""
logger.info(f"Scanning repository at {repo_path} for security vulnerabilities in languages: {languages}")
results = {}
# Scan dependencies first (language-agnostic)
results['dependencies'] = self._scan_dependencies(repo_path)
# Define a function to scan a single language
def scan_language(language):
if language in self.scanners:
try:
logger.info(f"Scanning {language} code in {repo_path} for security vulnerabilities")
return language, self.scanners[language](repo_path)
except Exception as e:
logger.error(f"Error scanning {language} code for security vulnerabilities: {e}")
return language, {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
else:
logger.warning(f"No security scanner available for {language}")
return language, {
'status': 'not_supported',
'message': f"Security scanning for {language} is not supported yet.",
'vulnerabilities': [],
}
# Use ThreadPoolExecutor to scan languages in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(languages), 5)) as executor:
# Submit all language scanning tasks
future_to_language = {executor.submit(scan_language, language): language for language in languages}
# Process results as they complete
for future in concurrent.futures.as_completed(future_to_language):
language = future_to_language[future]
try:
lang, result = future.result()
results[lang] = result
logger.info(f"Completed security scanning for {lang}")
except Exception as e:
logger.error(f"Exception occurred during security scanning of {language}: {e}")
results[language] = {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
return results
def _scan_dependencies(self, repo_path):
"""
Scan dependencies for known vulnerabilities.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Dependency scan results.
"""
logger.info(f"Scanning dependencies in {repo_path}")
results = {
'python': self._scan_python_dependencies(repo_path),
'javascript': self._scan_javascript_dependencies(repo_path),
'java': self._scan_java_dependencies(repo_path),
'go': self._scan_go_dependencies(repo_path),
'rust': self._scan_rust_dependencies(repo_path),
}
# Aggregate vulnerabilities
all_vulnerabilities = []
for lang_result in results.values():
all_vulnerabilities.extend(lang_result.get('vulnerabilities', []))
return {
'status': 'success',
'vulnerabilities': all_vulnerabilities,
'vulnerability_count': len(all_vulnerabilities),
'language_results': results,
}
def _scan_python_dependencies(self, repo_path):
"""
Scan Python dependencies for known vulnerabilities using safety.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Python dependencies.
"""
logger.info(f"Scanning Python dependencies in {repo_path}")
# Find requirements files
requirements_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file == 'requirements.txt' or file == 'Pipfile' or file == 'Pipfile.lock' or file == 'setup.py':
requirements_files.append(os.path.join(root, file))
if not requirements_files:
return {
'status': 'no_dependencies',
'message': 'No Python dependency files found.',
'vulnerabilities': [],
}
vulnerabilities = []
for req_file in requirements_files:
try:
# Run safety check
cmd = [
'safety',
'check',
'--file', req_file,
'--json',
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
# Parse safety output
if process.stdout.strip():
try:
safety_results = json.loads(process.stdout)
for vuln in safety_results.get('vulnerabilities', []):
vulnerabilities.append({
'package': vuln.get('package_name', ''),
'installed_version': vuln.get('installed_version', ''),
'affected_versions': vuln.get('vulnerable_spec', ''),
'description': vuln.get('advisory', ''),
'severity': vuln.get('severity', ''),
'file': req_file,
'language': 'Python',
})
except json.JSONDecodeError:
logger.error(f"Error parsing safety output: {process.stdout}")
except Exception as e:
logger.error(f"Error running safety on {req_file}: {e}")
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerability_count': len(vulnerabilities),
'files_scanned': requirements_files,
}
def _scan_javascript_dependencies(self, repo_path):
"""
Scan JavaScript/TypeScript dependencies for known vulnerabilities using npm audit.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for JavaScript dependencies.
"""
logger.info(f"Scanning JavaScript dependencies in {repo_path}")
# Find package.json files
package_files = []
for root, _, files in os.walk(repo_path):
if 'package.json' in files:
package_files.append(os.path.join(root, 'package.json'))
if not package_files:
return {
'status': 'no_dependencies',
'message': 'No JavaScript dependency files found.',
'vulnerabilities': [],
}
vulnerabilities = []
for pkg_file in package_files:
pkg_dir = os.path.dirname(pkg_file)
try:
# Run npm audit
cmd = [
'npm',
'audit',
'--json',
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=pkg_dir, # Run in the directory containing package.json
)
# Parse npm audit output
if process.stdout.strip():
try:
audit_results = json.loads(process.stdout)
# Extract vulnerabilities from npm audit results
for vuln_id, vuln_info in audit_results.get('vulnerabilities', {}).items():
vulnerabilities.append({
'package': vuln_info.get('name', ''),
'installed_version': vuln_info.get('version', ''),
'affected_versions': vuln_info.get('range', ''),
'description': vuln_info.get('overview', ''),
'severity': vuln_info.get('severity', ''),
'file': pkg_file,
'language': 'JavaScript',
'cwe': vuln_info.get('cwe', ''),
'recommendation': vuln_info.get('recommendation', ''),
})
except json.JSONDecodeError:
logger.error(f"Error parsing npm audit output: {process.stdout}")
except Exception as e:
logger.error(f"Error running npm audit on {pkg_file}: {e}")
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerability_count': len(vulnerabilities),
'files_scanned': package_files,
}
def _scan_java_dependencies(self, repo_path):
"""
Scan Java dependencies for known vulnerabilities.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Java dependencies.
"""
logger.info(f"Scanning Java dependencies in {repo_path}")
# Find pom.xml or build.gradle files
dependency_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file == 'pom.xml' or file == 'build.gradle':
dependency_files.append(os.path.join(root, file))
if not dependency_files:
return {
'status': 'no_dependencies',
'message': 'No Java dependency files found.',
'vulnerabilities': [],
}
# For now, we'll just return a placeholder since we don't have a direct tool
# In a real implementation, you might use OWASP Dependency Check or similar
return {
'status': 'not_implemented',
'message': 'Java dependency scanning is not fully implemented yet.',
'vulnerabilities': [],
'files_scanned': dependency_files,
}
def _scan_go_dependencies(self, repo_path):
"""
Scan Go dependencies for known vulnerabilities using govulncheck.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Go dependencies.
"""
logger.info(f"Scanning Go dependencies in {repo_path}")
# Check if go.mod exists
go_mod_path = os.path.join(repo_path, 'go.mod')
if not os.path.exists(go_mod_path):
return {
'status': 'no_dependencies',
'message': 'No Go dependency files found.',
'vulnerabilities': [],
}
try:
# Run govulncheck
cmd = [
'govulncheck',
'-json',
'./...',
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=repo_path, # Run in the repository directory
)
# Parse govulncheck output
vulnerabilities = []
if process.stdout.strip():
for line in process.stdout.splitlines():
try:
result = json.loads(line)
if 'vulnerability' in result:
vuln = result['vulnerability']
vulnerabilities.append({
'package': vuln.get('package', ''),
'description': vuln.get('details', ''),
'severity': 'high', # govulncheck doesn't provide severity
'file': go_mod_path,
'language': 'Go',
'cve': vuln.get('osv', {}).get('id', ''),
'affected_versions': vuln.get('osv', {}).get('affected', ''),
})
except json.JSONDecodeError:
continue
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerability_count': len(vulnerabilities),
'files_scanned': [go_mod_path],
}
except Exception as e:
logger.error(f"Error running govulncheck: {e}")
return {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
def _scan_rust_dependencies(self, repo_path):
"""
Scan Rust dependencies for known vulnerabilities using cargo-audit.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Rust dependencies.
"""
logger.info(f"Scanning Rust dependencies in {repo_path}")
# Check if Cargo.toml exists
cargo_toml_path = os.path.join(repo_path, 'Cargo.toml')
if not os.path.exists(cargo_toml_path):
return {
'status': 'no_dependencies',
'message': 'No Rust dependency files found.',
'vulnerabilities': [],
}
try:
# Run cargo-audit
cmd = [
'cargo',
'audit',
'--json',
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=repo_path, # Run in the repository directory
)
# Parse cargo-audit output
vulnerabilities = []
if process.stdout.strip():
try:
audit_results = json.loads(process.stdout)
for vuln in audit_results.get('vulnerabilities', {}).get('list', []):
vulnerabilities.append({
'package': vuln.get('package', {}).get('name', ''),
'installed_version': vuln.get('package', {}).get('version', ''),
'description': vuln.get('advisory', {}).get('description', ''),
'severity': vuln.get('advisory', {}).get('severity', ''),
'file': cargo_toml_path,
'language': 'Rust',
'cve': vuln.get('advisory', {}).get('id', ''),
})
except json.JSONDecodeError:
logger.error(f"Error parsing cargo-audit output: {process.stdout}")
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerability_count': len(vulnerabilities),
'files_scanned': [cargo_toml_path],
}
except Exception as e:
logger.error(f"Error running cargo-audit: {e}")
return {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
def _scan_python(self, repo_path):
"""
Scan Python code for security vulnerabilities using bandit.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Python code.
"""
logger.info(f"Scanning Python code in {repo_path} for security vulnerabilities")
# Find Python files
python_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file.endswith('.py'):
python_files.append(os.path.join(root, file))
if not python_files:
return {
'status': 'no_files',
'message': 'No Python files found in the repository.',
'vulnerabilities': [],
}
try:
# Run bandit
cmd = [
'bandit',
'-r',
'-f', 'json',
repo_path,
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
# Parse bandit output
vulnerabilities = []
if process.stdout.strip():
try:
bandit_results = json.loads(process.stdout)
for result in bandit_results.get('results', []):
vulnerabilities.append({
'file': result.get('filename', ''),
'line': result.get('line_number', 0),
'code': result.get('code', ''),
'issue': result.get('issue_text', ''),
'severity': result.get('issue_severity', ''),
'confidence': result.get('issue_confidence', ''),
'cwe': result.get('cwe', ''),
'test_id': result.get('test_id', ''),
'test_name': result.get('test_name', ''),
'language': 'Python',
})
except json.JSONDecodeError:
logger.error(f"Error parsing bandit output: {process.stdout}")
# Group vulnerabilities by severity
vulns_by_severity = defaultdict(list)
for vuln in vulnerabilities:
severity = vuln.get('severity', 'unknown')
vulns_by_severity[severity].append(vuln)
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerabilities_by_severity': dict(vulns_by_severity),
'vulnerability_count': len(vulnerabilities),
'files_scanned': len(python_files),
}
except Exception as e:
logger.error(f"Error running bandit: {e}")
return {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
def _scan_javascript(self, repo_path):
"""
Scan JavaScript/TypeScript code for security vulnerabilities using NodeJSScan.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for JavaScript/TypeScript code.
"""
logger.info(f"Scanning JavaScript/TypeScript code in {repo_path} for security vulnerabilities")
# Find JavaScript/TypeScript files
js_files = []
for root, _, files in os.walk(repo_path):
if 'node_modules' in root:
continue
for file in files:
if file.endswith(('.js', '.jsx', '.ts', '.tsx')):
js_files.append(os.path.join(root, file))
if not js_files:
return {
'status': 'no_files',
'message': 'No JavaScript/TypeScript files found in the repository.',
'vulnerabilities': [],
}
# For now, we'll use a simplified approach since NodeJSScan might not be available
# In a real implementation, you might use NodeJSScan or similar
# Create a temporary ESLint configuration file with security rules
eslint_config = {
"env": {
"browser": True,
"es2021": True,
"node": True
},
"extends": [
"eslint:recommended",
"plugin:security/recommended"
],
"plugins": [
"security"
],
"parserOptions": {
"ecmaVersion": 12,
"sourceType": "module",
"ecmaFeatures": {
"jsx": True
}
},
"rules": {}
}
with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
json.dump(eslint_config, temp_config)
temp_config_path = temp_config.name
try:
# Run ESLint with security plugin
cmd = [
'npx',
'eslint',
'--config', temp_config_path,
'--format', 'json',
'--plugin', 'security',
] + js_files
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
# Parse ESLint output
vulnerabilities = []
if process.stdout.strip():
try:
eslint_results = json.loads(process.stdout)
for result in eslint_results:
file_path = result.get('filePath', '')
for message in result.get('messages', []):
# Only include security-related issues
rule_id = message.get('ruleId', '')
if rule_id and ('security' in rule_id or 'no-eval' in rule_id or 'no-implied-eval' in rule_id):
vulnerabilities.append({
'file': file_path,
'line': message.get('line', 0),
'column': message.get('column', 0),
'issue': message.get('message', ''),
'severity': 'high' if message.get('severity', 0) == 2 else 'medium',
'rule': rule_id,
'language': 'JavaScript',
})
except json.JSONDecodeError:
logger.error(f"Error parsing ESLint output: {process.stdout}")
# Group vulnerabilities by severity
vulns_by_severity = defaultdict(list)
for vuln in vulnerabilities:
severity = vuln.get('severity', 'unknown')
vulns_by_severity[severity].append(vuln)
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerabilities_by_severity': dict(vulns_by_severity),
'vulnerability_count': len(vulnerabilities),
'files_scanned': len(js_files),
}
except Exception as e:
logger.error(f"Error scanning JavaScript/TypeScript code: {e}")
return {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
finally:
# Clean up the temporary configuration file
if os.path.exists(temp_config_path):
os.unlink(temp_config_path)
def _scan_java(self, repo_path):
"""
Scan Java code for security vulnerabilities.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Java code.
"""
logger.info(f"Scanning Java code in {repo_path} for security vulnerabilities")
# Find Java files
java_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file.endswith('.java'):
java_files.append(os.path.join(root, file))
if not java_files:
return {
'status': 'no_files',
'message': 'No Java files found in the repository.',
'vulnerabilities': [],
}
# For now, we'll just return a placeholder since we don't have a direct tool
# In a real implementation, you might use FindSecBugs or similar
return {
'status': 'not_implemented',
'message': 'Java security scanning is not fully implemented yet.',
'vulnerabilities': [],
'files_scanned': java_files,
}
def _scan_go(self, repo_path):
"""
Scan Go code for security vulnerabilities using gosec.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Go code.
"""
logger.info(f"Scanning Go code in {repo_path} for security vulnerabilities")
# Find Go files
go_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file.endswith('.go'):
go_files.append(os.path.join(root, file))
if not go_files:
return {
'status': 'no_files',
'message': 'No Go files found in the repository.',
'vulnerabilities': [],
}
try:
# Run gosec
cmd = [
'gosec',
'-fmt', 'json',
'-quiet',
'./...',
]
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
cwd=repo_path, # Run in the repository directory
)
# Parse gosec output
vulnerabilities = []
if process.stdout.strip():
try:
gosec_results = json.loads(process.stdout)
for issue in gosec_results.get('Issues', []):
vulnerabilities.append({
'file': issue.get('file', ''),
'line': issue.get('line', ''),
'code': issue.get('code', ''),
'issue': issue.get('details', ''),
'severity': issue.get('severity', ''),
'confidence': issue.get('confidence', ''),
'cwe': issue.get('cwe', {}).get('ID', ''),
'rule_id': issue.get('rule_id', ''),
'language': 'Go',
})
except json.JSONDecodeError:
logger.error(f"Error parsing gosec output: {process.stdout}")
# Group vulnerabilities by severity
vulns_by_severity = defaultdict(list)
for vuln in vulnerabilities:
severity = vuln.get('severity', 'unknown')
vulns_by_severity[severity].append(vuln)
return {
'status': 'success',
'vulnerabilities': vulnerabilities,
'vulnerabilities_by_severity': dict(vulns_by_severity),
'vulnerability_count': len(vulnerabilities),
'files_scanned': len(go_files),
}
except Exception as e:
logger.error(f"Error running gosec: {e}")
return {
'status': 'error',
'error': str(e),
'vulnerabilities': [],
}
def _scan_rust(self, repo_path):
"""
Scan Rust code for security vulnerabilities.
Args:
repo_path (str): The path to the repository.
Returns:
dict: Scan results for Rust code.
"""
logger.info(f"Scanning Rust code in {repo_path} for security vulnerabilities")
# Find Rust files
rust_files = []
for root, _, files in os.walk(repo_path):
for file in files:
if file.endswith('.rs'):
rust_files.append(os.path.join(root, file))
if not rust_files:
return {
'status': 'no_files',
'message': 'No Rust files found in the repository.',
'vulnerabilities': [],
}
# For now, we'll just return a placeholder since we don't have a direct tool
# In a real implementation, you might use cargo-audit or similar for code scanning
return {
'status': 'not_implemented',
'message': 'Rust security scanning is not fully implemented yet.',
'vulnerabilities': [],
'files_scanned': rust_files,
}