Updated model, added checklist
Browse files- app.py +33 -36
- lib/code_reviewer.py +110 -0
- requirements.txt +2 -0
- standards/ansible_ode_standards.json +94 -0
app.py
CHANGED
|
@@ -1,37 +1,34 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
with open("review_results.json", "w") as f:
|
| 33 |
-
f.write(json_result)
|
| 34 |
-
|
| 35 |
-
# Display results and download link
|
| 36 |
-
st.json(result)
|
| 37 |
-
st.download_button("Download Review Results", json_result, file_name="review_results.json")
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
|
| 3 |
+
# Import necessary modules from lib
|
| 4 |
+
from lib.code_reviewer import CodeReviewer, ReviewManager
|
| 5 |
import os
|
| 6 |
+
|
| 7 |
+
def main():
|
| 8 |
+
# Directory structure setup
|
| 9 |
+
# Directory for storing input Ansible files
|
| 10 |
+
input_directory = "input_files"
|
| 11 |
+
# Directory for storing output JSON reviews
|
| 12 |
+
output_directory = "output_reviews"
|
| 13 |
+
|
| 14 |
+
# Ensure the directories exist
|
| 15 |
+
os.makedirs(input_directory, exist_ok=True)
|
| 16 |
+
os.makedirs(output_directory, exist_ok=True)
|
| 17 |
+
|
| 18 |
+
# Paths for testing
|
| 19 |
+
sample_files = [os.path.join(input_directory, "example1.yml"), os.path.join(input_directory, "example2.yml")]
|
| 20 |
+
output_json_path = os.path.join(output_directory, "code_review_results.json")
|
| 21 |
+
|
| 22 |
+
# Initialize the code reviewer and review manager
|
| 23 |
+
code_reviewer = CodeReviewer()
|
| 24 |
+
review_manager = ReviewManager(reviewer=code_reviewer)
|
| 25 |
+
|
| 26 |
+
# Process files and generate reviews
|
| 27 |
+
reviews = review_manager.process_files(sample_files)
|
| 28 |
+
|
| 29 |
+
# Save reviews to JSON
|
| 30 |
+
review_manager.save_reviews_to_json(reviews, output_json_path)
|
| 31 |
+
print(f"Reviews saved to {output_json_path}")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lib/code_reviewer.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# lib/code_reviewer.py
|
| 2 |
+
|
| 3 |
+
# Import necessary libraries
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
+
|
| 9 |
+
# Custom Imports
|
| 10 |
+
from typing import List, Dict
|
| 11 |
+
|
| 12 |
+
class CodeReviewer:
|
| 13 |
+
def __init__(self, model_name: str = "facebook/incoder-6B"):
|
| 14 |
+
"""
|
| 15 |
+
Initializes the code reviewer with the specified language model.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
model_name (str): The name of the pre-trained model to use.
|
| 19 |
+
"""
|
| 20 |
+
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 21 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 22 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
|
| 23 |
+
# Load code standards checklist
|
| 24 |
+
self.code_standards = self.load_code_standards()
|
| 25 |
+
|
| 26 |
+
def load_code_standards(self) -> Dict:
|
| 27 |
+
"""
|
| 28 |
+
Loads the code standards checklist from a JSON file.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Dict: The code standards in dictionary form.
|
| 32 |
+
"""
|
| 33 |
+
standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_ode_standards.json")
|
| 34 |
+
with open(standards_path, 'r') as f:
|
| 35 |
+
return json.load(f)
|
| 36 |
+
|
| 37 |
+
def generate_prompt(self, code: str) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Generates a review prompt for the input code based on the loaded standards.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
code (str): The code to be reviewed.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
str: The prompt used for reviewing the code.
|
| 46 |
+
"""
|
| 47 |
+
# Build prompt from code standards
|
| 48 |
+
prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
|
| 49 |
+
for category in self.code_standards["code_standards"]:
|
| 50 |
+
prompt += f"{category['category']}:\n"
|
| 51 |
+
for standard in category['standards']:
|
| 52 |
+
prompt += f"- {standard['description']}\n"
|
| 53 |
+
prompt += "\nHere is the code:\n"
|
| 54 |
+
return prompt + code
|
| 55 |
+
|
| 56 |
+
def review_code(self, code: str) -> str:
|
| 57 |
+
"""
|
| 58 |
+
Uses the model to generate a review for the provided code.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
code (str): The code to be reviewed.
|
| 62 |
+
|
| 63 |
+
Returns:
|
| 64 |
+
str: The review generated by the model.
|
| 65 |
+
"""
|
| 66 |
+
prompt = self.generate_prompt(code)
|
| 67 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
| 68 |
+
output = self.model.generate(**inputs, max_length=512)
|
| 69 |
+
review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
|
| 70 |
+
return review_text
|
| 71 |
+
|
| 72 |
+
class ReviewManager:
|
| 73 |
+
def __init__(self, reviewer: CodeReviewer):
|
| 74 |
+
"""
|
| 75 |
+
Initializes the review manager with a given reviewer.
|
| 76 |
+
|
| 77 |
+
Args:
|
| 78 |
+
reviewer (CodeReviewer): An instance of the CodeReviewer class.
|
| 79 |
+
"""
|
| 80 |
+
self.reviewer = reviewer
|
| 81 |
+
|
| 82 |
+
def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
|
| 83 |
+
"""
|
| 84 |
+
Processes multiple files for review.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
file_paths (List[str]): List of file paths to be reviewed.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
List[Dict[str, str]]: A list containing review data for each file.
|
| 91 |
+
"""
|
| 92 |
+
reviews = []
|
| 93 |
+
for file_path in file_paths:
|
| 94 |
+
with open(file_path, 'r') as file:
|
| 95 |
+
code = file.read()
|
| 96 |
+
review = self.reviewer.review_code(code)
|
| 97 |
+
reviews.append({"filename": os.path.basename(file_path), "review": review})
|
| 98 |
+
return reviews
|
| 99 |
+
|
| 100 |
+
def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
|
| 101 |
+
"""
|
| 102 |
+
Saves the review data to a JSON file.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
reviews (List[Dict[str, str]]): The list of reviews to save.
|
| 106 |
+
output_path (str): The path to save the JSON output.
|
| 107 |
+
"""
|
| 108 |
+
with open(output_path, 'w') as json_file:
|
| 109 |
+
json.dump(reviews, json_file, indent=4)
|
| 110 |
+
|
requirements.txt
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
torch
|
| 2 |
streamlit
|
| 3 |
transformers
|
|
|
|
|
|
|
| 4 |
ansible
|
| 5 |
pandas
|
|
|
|
| 1 |
torch
|
| 2 |
streamlit
|
| 3 |
transformers
|
| 4 |
+
datasets
|
| 5 |
+
yaml
|
| 6 |
ansible
|
| 7 |
pandas
|
standards/ansible_ode_standards.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"code_standards": [
|
| 3 |
+
{
|
| 4 |
+
"category": "Security",
|
| 5 |
+
"standards": [
|
| 6 |
+
{
|
| 7 |
+
"rule_id": "SEC001",
|
| 8 |
+
"description": "Avoid hard-coded secrets such as passwords and API keys. Use secure storage solutions."
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"rule_id": "SEC002",
|
| 12 |
+
"description": "Ensure all shell commands have proper sanitization to avoid command injection."
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"rule_id": "SEC003",
|
| 16 |
+
"description": "Apply appropriate role-based access control to sensitive tasks to prevent unauthorized access."
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"rule_id": "SEC004",
|
| 20 |
+
"description": "Ensure secrets are encrypted during storage and transit, and are accessed only when necessary."
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"rule_id": "SEC005",
|
| 24 |
+
"description": "Check Secure Business Logic, ensuring critical automation tasks do not expose or misuse sensitive information."
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"category": "Idempotency",
|
| 30 |
+
"standards": [
|
| 31 |
+
{
|
| 32 |
+
"rule_id": "IDP001",
|
| 33 |
+
"description": "Ensure tasks are idempotent, such as using 'state: present' for package installation to avoid repeated actions."
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"rule_id": "IDP002",
|
| 37 |
+
"description": "All configuration files should use templates to ensure consistent and predictable outputs across multiple runs."
|
| 38 |
+
}
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"category": "Best Practices",
|
| 43 |
+
"standards": [
|
| 44 |
+
{
|
| 45 |
+
"rule_id": "BP001",
|
| 46 |
+
"description": "Use standard modules for common tasks, such as 'apt' for package management and 'user' for managing users."
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"rule_id": "BP002",
|
| 50 |
+
"description": "Use variables for values that are subject to change, ensuring maintainability and readability of the code."
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"rule_id": "BP003",
|
| 54 |
+
"description": "Include meaningful task names for all Ansible tasks, making playbooks self-documenting."
|
| 55 |
+
}
|
| 56 |
+
]
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"category": "Readability",
|
| 60 |
+
"standards": [
|
| 61 |
+
{
|
| 62 |
+
"rule_id": "RD001",
|
| 63 |
+
"description": "All significant tasks must have clear and descriptive comments to explain their purpose."
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"rule_id": "RD002",
|
| 67 |
+
"description": "Use meaningful variable names that convey the purpose clearly to ensure easy understanding for future developers."
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"rule_id": "RD003",
|
| 71 |
+
"description": "Ensure proper formatting and indentation for better readability and adherence to Ansible's YAML syntax rules."
|
| 72 |
+
}
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"category": "Business Logic Security",
|
| 77 |
+
"standards": [
|
| 78 |
+
{
|
| 79 |
+
"rule_id": "BLS001",
|
| 80 |
+
"description": "Verify that business logic automation does not inadvertently alter data integrity or bypass critical business validations."
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"rule_id": "BLS002",
|
| 84 |
+
"description": "Ensure all critical paths in business processes have appropriate error handling and logging for audit purposes."
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"rule_id": "BLS003",
|
| 88 |
+
"description": "Critical business operations must include validations to prevent data loss or unauthorized modification."
|
| 89 |
+
}
|
| 90 |
+
]
|
| 91 |
+
}
|
| 92 |
+
]
|
| 93 |
+
}
|
| 94 |
+
|