Updated model, added checklist
Browse files- app.py +33 -36
- lib/code_reviewer.py +110 -0
- requirements.txt +2 -0
- standards/ansible_ode_standards.json +94 -0
app.py
CHANGED
@@ -1,37 +1,34 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
import os
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
with open("review_results.json", "w") as f:
|
33 |
-
f.write(json_result)
|
34 |
-
|
35 |
-
# Display results and download link
|
36 |
-
st.json(result)
|
37 |
-
st.download_button("Download Review Results", json_result, file_name="review_results.json")
|
|
|
1 |
+
# app.py
|
2 |
+
|
3 |
+
# Import necessary modules from lib
|
4 |
+
from lib.code_reviewer import CodeReviewer, ReviewManager
|
5 |
import os
|
6 |
+
|
7 |
+
def main():
|
8 |
+
# Directory structure setup
|
9 |
+
# Directory for storing input Ansible files
|
10 |
+
input_directory = "input_files"
|
11 |
+
# Directory for storing output JSON reviews
|
12 |
+
output_directory = "output_reviews"
|
13 |
+
|
14 |
+
# Ensure the directories exist
|
15 |
+
os.makedirs(input_directory, exist_ok=True)
|
16 |
+
os.makedirs(output_directory, exist_ok=True)
|
17 |
+
|
18 |
+
# Paths for testing
|
19 |
+
sample_files = [os.path.join(input_directory, "example1.yml"), os.path.join(input_directory, "example2.yml")]
|
20 |
+
output_json_path = os.path.join(output_directory, "code_review_results.json")
|
21 |
+
|
22 |
+
# Initialize the code reviewer and review manager
|
23 |
+
code_reviewer = CodeReviewer()
|
24 |
+
review_manager = ReviewManager(reviewer=code_reviewer)
|
25 |
+
|
26 |
+
# Process files and generate reviews
|
27 |
+
reviews = review_manager.process_files(sample_files)
|
28 |
+
|
29 |
+
# Save reviews to JSON
|
30 |
+
review_manager.save_reviews_to_json(reviews, output_json_path)
|
31 |
+
print(f"Reviews saved to {output_json_path}")
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
lib/code_reviewer.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# lib/code_reviewer.py
|
2 |
+
|
3 |
+
# Import necessary libraries
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import torch
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
+
|
9 |
+
# Custom Imports
|
10 |
+
from typing import List, Dict
|
11 |
+
|
12 |
+
class CodeReviewer:
|
13 |
+
def __init__(self, model_name: str = "facebook/incoder-6B"):
|
14 |
+
"""
|
15 |
+
Initializes the code reviewer with the specified language model.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
model_name (str): The name of the pre-trained model to use.
|
19 |
+
"""
|
20 |
+
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
21 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
22 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
|
23 |
+
# Load code standards checklist
|
24 |
+
self.code_standards = self.load_code_standards()
|
25 |
+
|
26 |
+
def load_code_standards(self) -> Dict:
|
27 |
+
"""
|
28 |
+
Loads the code standards checklist from a JSON file.
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
Dict: The code standards in dictionary form.
|
32 |
+
"""
|
33 |
+
standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_ode_standards.json")
|
34 |
+
with open(standards_path, 'r') as f:
|
35 |
+
return json.load(f)
|
36 |
+
|
37 |
+
def generate_prompt(self, code: str) -> str:
|
38 |
+
"""
|
39 |
+
Generates a review prompt for the input code based on the loaded standards.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
code (str): The code to be reviewed.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
str: The prompt used for reviewing the code.
|
46 |
+
"""
|
47 |
+
# Build prompt from code standards
|
48 |
+
prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
|
49 |
+
for category in self.code_standards["code_standards"]:
|
50 |
+
prompt += f"{category['category']}:\n"
|
51 |
+
for standard in category['standards']:
|
52 |
+
prompt += f"- {standard['description']}\n"
|
53 |
+
prompt += "\nHere is the code:\n"
|
54 |
+
return prompt + code
|
55 |
+
|
56 |
+
def review_code(self, code: str) -> str:
|
57 |
+
"""
|
58 |
+
Uses the model to generate a review for the provided code.
|
59 |
+
|
60 |
+
Args:
|
61 |
+
code (str): The code to be reviewed.
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
str: The review generated by the model.
|
65 |
+
"""
|
66 |
+
prompt = self.generate_prompt(code)
|
67 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
68 |
+
output = self.model.generate(**inputs, max_length=512)
|
69 |
+
review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
|
70 |
+
return review_text
|
71 |
+
|
72 |
+
class ReviewManager:
|
73 |
+
def __init__(self, reviewer: CodeReviewer):
|
74 |
+
"""
|
75 |
+
Initializes the review manager with a given reviewer.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
reviewer (CodeReviewer): An instance of the CodeReviewer class.
|
79 |
+
"""
|
80 |
+
self.reviewer = reviewer
|
81 |
+
|
82 |
+
def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
|
83 |
+
"""
|
84 |
+
Processes multiple files for review.
|
85 |
+
|
86 |
+
Args:
|
87 |
+
file_paths (List[str]): List of file paths to be reviewed.
|
88 |
+
|
89 |
+
Returns:
|
90 |
+
List[Dict[str, str]]: A list containing review data for each file.
|
91 |
+
"""
|
92 |
+
reviews = []
|
93 |
+
for file_path in file_paths:
|
94 |
+
with open(file_path, 'r') as file:
|
95 |
+
code = file.read()
|
96 |
+
review = self.reviewer.review_code(code)
|
97 |
+
reviews.append({"filename": os.path.basename(file_path), "review": review})
|
98 |
+
return reviews
|
99 |
+
|
100 |
+
def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
|
101 |
+
"""
|
102 |
+
Saves the review data to a JSON file.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
reviews (List[Dict[str, str]]): The list of reviews to save.
|
106 |
+
output_path (str): The path to save the JSON output.
|
107 |
+
"""
|
108 |
+
with open(output_path, 'w') as json_file:
|
109 |
+
json.dump(reviews, json_file, indent=4)
|
110 |
+
|
requirements.txt
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
torch
|
2 |
streamlit
|
3 |
transformers
|
|
|
|
|
4 |
ansible
|
5 |
pandas
|
|
|
1 |
torch
|
2 |
streamlit
|
3 |
transformers
|
4 |
+
datasets
|
5 |
+
yaml
|
6 |
ansible
|
7 |
pandas
|
standards/ansible_ode_standards.json
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"code_standards": [
|
3 |
+
{
|
4 |
+
"category": "Security",
|
5 |
+
"standards": [
|
6 |
+
{
|
7 |
+
"rule_id": "SEC001",
|
8 |
+
"description": "Avoid hard-coded secrets such as passwords and API keys. Use secure storage solutions."
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"rule_id": "SEC002",
|
12 |
+
"description": "Ensure all shell commands have proper sanitization to avoid command injection."
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"rule_id": "SEC003",
|
16 |
+
"description": "Apply appropriate role-based access control to sensitive tasks to prevent unauthorized access."
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"rule_id": "SEC004",
|
20 |
+
"description": "Ensure secrets are encrypted during storage and transit, and are accessed only when necessary."
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"rule_id": "SEC005",
|
24 |
+
"description": "Check Secure Business Logic, ensuring critical automation tasks do not expose or misuse sensitive information."
|
25 |
+
}
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"category": "Idempotency",
|
30 |
+
"standards": [
|
31 |
+
{
|
32 |
+
"rule_id": "IDP001",
|
33 |
+
"description": "Ensure tasks are idempotent, such as using 'state: present' for package installation to avoid repeated actions."
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"rule_id": "IDP002",
|
37 |
+
"description": "All configuration files should use templates to ensure consistent and predictable outputs across multiple runs."
|
38 |
+
}
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"category": "Best Practices",
|
43 |
+
"standards": [
|
44 |
+
{
|
45 |
+
"rule_id": "BP001",
|
46 |
+
"description": "Use standard modules for common tasks, such as 'apt' for package management and 'user' for managing users."
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"rule_id": "BP002",
|
50 |
+
"description": "Use variables for values that are subject to change, ensuring maintainability and readability of the code."
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"rule_id": "BP003",
|
54 |
+
"description": "Include meaningful task names for all Ansible tasks, making playbooks self-documenting."
|
55 |
+
}
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"category": "Readability",
|
60 |
+
"standards": [
|
61 |
+
{
|
62 |
+
"rule_id": "RD001",
|
63 |
+
"description": "All significant tasks must have clear and descriptive comments to explain their purpose."
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"rule_id": "RD002",
|
67 |
+
"description": "Use meaningful variable names that convey the purpose clearly to ensure easy understanding for future developers."
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"rule_id": "RD003",
|
71 |
+
"description": "Ensure proper formatting and indentation for better readability and adherence to Ansible's YAML syntax rules."
|
72 |
+
}
|
73 |
+
]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"category": "Business Logic Security",
|
77 |
+
"standards": [
|
78 |
+
{
|
79 |
+
"rule_id": "BLS001",
|
80 |
+
"description": "Verify that business logic automation does not inadvertently alter data integrity or bypass critical business validations."
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"rule_id": "BLS002",
|
84 |
+
"description": "Ensure all critical paths in business processes have appropriate error handling and logging for audit purposes."
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"rule_id": "BLS003",
|
88 |
+
"description": "Critical business operations must include validations to prevent data loss or unauthorized modification."
|
89 |
+
}
|
90 |
+
]
|
91 |
+
}
|
92 |
+
]
|
93 |
+
}
|
94 |
+
|