vsagar100 commited on
Commit
e49d8aa
·
1 Parent(s): 88588af

Updated model, added checklist

Browse files
app.py CHANGED
@@ -1,37 +1,34 @@
1
- import streamlit as st
 
 
 
2
  import os
3
- import json
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
-
6
- # UI Components for Streamlit
7
- st.title("Ansible Code Reviewer")
8
- uploaded_files = st.file_uploader("Upload your Ansible code files", type=['yml', 'yaml'], accept_multiple_files=True)
9
-
10
- if uploaded_files:
11
- result = []
12
- model_name = "facebook/incoder-1B" # Example model name
13
- tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- model = AutoModelForCausalLM.from_pretrained(model_name)
15
-
16
- # Process each uploaded file
17
- for uploaded_file in uploaded_files:
18
- content = uploaded_file.read().decode("utf-8")
19
- # Here you could use the model to evaluate the content
20
- tokens = tokenizer(content, return_tensors="pt")
21
- review_output = model.generate(**tokens)
22
- review_text = tokenizer.decode(review_output[0], skip_special_tokens=True)
23
-
24
- # Store results
25
- result.append({
26
- "filename": uploaded_file.name,
27
- "review": review_text
28
- })
29
-
30
- # Save the results to a JSON file
31
- json_result = json.dumps(result, indent=4)
32
- with open("review_results.json", "w") as f:
33
- f.write(json_result)
34
-
35
- # Display results and download link
36
- st.json(result)
37
- st.download_button("Download Review Results", json_result, file_name="review_results.json")
 
1
+ # app.py
2
+
3
+ # Import necessary modules from lib
4
+ from lib.code_reviewer import CodeReviewer, ReviewManager
5
  import os
6
+
7
+ def main():
8
+ # Directory structure setup
9
+ # Directory for storing input Ansible files
10
+ input_directory = "input_files"
11
+ # Directory for storing output JSON reviews
12
+ output_directory = "output_reviews"
13
+
14
+ # Ensure the directories exist
15
+ os.makedirs(input_directory, exist_ok=True)
16
+ os.makedirs(output_directory, exist_ok=True)
17
+
18
+ # Paths for testing
19
+ sample_files = [os.path.join(input_directory, "example1.yml"), os.path.join(input_directory, "example2.yml")]
20
+ output_json_path = os.path.join(output_directory, "code_review_results.json")
21
+
22
+ # Initialize the code reviewer and review manager
23
+ code_reviewer = CodeReviewer()
24
+ review_manager = ReviewManager(reviewer=code_reviewer)
25
+
26
+ # Process files and generate reviews
27
+ reviews = review_manager.process_files(sample_files)
28
+
29
+ # Save reviews to JSON
30
+ review_manager.save_reviews_to_json(reviews, output_json_path)
31
+ print(f"Reviews saved to {output_json_path}")
32
+
33
+ if __name__ == "__main__":
34
+ main()
 
 
 
 
 
 
lib/code_reviewer.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lib/code_reviewer.py
2
+
3
+ # Import necessary libraries
4
+ import os
5
+ import json
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+
9
+ # Custom Imports
10
+ from typing import List, Dict
11
+
12
+ class CodeReviewer:
13
+ def __init__(self, model_name: str = "facebook/incoder-6B"):
14
+ """
15
+ Initializes the code reviewer with the specified language model.
16
+
17
+ Args:
18
+ model_name (str): The name of the pre-trained model to use.
19
+ """
20
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
21
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
23
+ # Load code standards checklist
24
+ self.code_standards = self.load_code_standards()
25
+
26
+ def load_code_standards(self) -> Dict:
27
+ """
28
+ Loads the code standards checklist from a JSON file.
29
+
30
+ Returns:
31
+ Dict: The code standards in dictionary form.
32
+ """
33
+ standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_ode_standards.json")
34
+ with open(standards_path, 'r') as f:
35
+ return json.load(f)
36
+
37
+ def generate_prompt(self, code: str) -> str:
38
+ """
39
+ Generates a review prompt for the input code based on the loaded standards.
40
+
41
+ Args:
42
+ code (str): The code to be reviewed.
43
+
44
+ Returns:
45
+ str: The prompt used for reviewing the code.
46
+ """
47
+ # Build prompt from code standards
48
+ prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
49
+ for category in self.code_standards["code_standards"]:
50
+ prompt += f"{category['category']}:\n"
51
+ for standard in category['standards']:
52
+ prompt += f"- {standard['description']}\n"
53
+ prompt += "\nHere is the code:\n"
54
+ return prompt + code
55
+
56
+ def review_code(self, code: str) -> str:
57
+ """
58
+ Uses the model to generate a review for the provided code.
59
+
60
+ Args:
61
+ code (str): The code to be reviewed.
62
+
63
+ Returns:
64
+ str: The review generated by the model.
65
+ """
66
+ prompt = self.generate_prompt(code)
67
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
68
+ output = self.model.generate(**inputs, max_length=512)
69
+ review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
70
+ return review_text
71
+
72
+ class ReviewManager:
73
+ def __init__(self, reviewer: CodeReviewer):
74
+ """
75
+ Initializes the review manager with a given reviewer.
76
+
77
+ Args:
78
+ reviewer (CodeReviewer): An instance of the CodeReviewer class.
79
+ """
80
+ self.reviewer = reviewer
81
+
82
+ def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
83
+ """
84
+ Processes multiple files for review.
85
+
86
+ Args:
87
+ file_paths (List[str]): List of file paths to be reviewed.
88
+
89
+ Returns:
90
+ List[Dict[str, str]]: A list containing review data for each file.
91
+ """
92
+ reviews = []
93
+ for file_path in file_paths:
94
+ with open(file_path, 'r') as file:
95
+ code = file.read()
96
+ review = self.reviewer.review_code(code)
97
+ reviews.append({"filename": os.path.basename(file_path), "review": review})
98
+ return reviews
99
+
100
+ def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
101
+ """
102
+ Saves the review data to a JSON file.
103
+
104
+ Args:
105
+ reviews (List[Dict[str, str]]): The list of reviews to save.
106
+ output_path (str): The path to save the JSON output.
107
+ """
108
+ with open(output_path, 'w') as json_file:
109
+ json.dump(reviews, json_file, indent=4)
110
+
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
  torch
2
  streamlit
3
  transformers
 
 
4
  ansible
5
  pandas
 
1
  torch
2
  streamlit
3
  transformers
4
+ datasets
5
+ yaml
6
  ansible
7
  pandas
standards/ansible_ode_standards.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "code_standards": [
3
+ {
4
+ "category": "Security",
5
+ "standards": [
6
+ {
7
+ "rule_id": "SEC001",
8
+ "description": "Avoid hard-coded secrets such as passwords and API keys. Use secure storage solutions."
9
+ },
10
+ {
11
+ "rule_id": "SEC002",
12
+ "description": "Ensure all shell commands have proper sanitization to avoid command injection."
13
+ },
14
+ {
15
+ "rule_id": "SEC003",
16
+ "description": "Apply appropriate role-based access control to sensitive tasks to prevent unauthorized access."
17
+ },
18
+ {
19
+ "rule_id": "SEC004",
20
+ "description": "Ensure secrets are encrypted during storage and transit, and are accessed only when necessary."
21
+ },
22
+ {
23
+ "rule_id": "SEC005",
24
+ "description": "Check Secure Business Logic, ensuring critical automation tasks do not expose or misuse sensitive information."
25
+ }
26
+ ]
27
+ },
28
+ {
29
+ "category": "Idempotency",
30
+ "standards": [
31
+ {
32
+ "rule_id": "IDP001",
33
+ "description": "Ensure tasks are idempotent, such as using 'state: present' for package installation to avoid repeated actions."
34
+ },
35
+ {
36
+ "rule_id": "IDP002",
37
+ "description": "All configuration files should use templates to ensure consistent and predictable outputs across multiple runs."
38
+ }
39
+ ]
40
+ },
41
+ {
42
+ "category": "Best Practices",
43
+ "standards": [
44
+ {
45
+ "rule_id": "BP001",
46
+ "description": "Use standard modules for common tasks, such as 'apt' for package management and 'user' for managing users."
47
+ },
48
+ {
49
+ "rule_id": "BP002",
50
+ "description": "Use variables for values that are subject to change, ensuring maintainability and readability of the code."
51
+ },
52
+ {
53
+ "rule_id": "BP003",
54
+ "description": "Include meaningful task names for all Ansible tasks, making playbooks self-documenting."
55
+ }
56
+ ]
57
+ },
58
+ {
59
+ "category": "Readability",
60
+ "standards": [
61
+ {
62
+ "rule_id": "RD001",
63
+ "description": "All significant tasks must have clear and descriptive comments to explain their purpose."
64
+ },
65
+ {
66
+ "rule_id": "RD002",
67
+ "description": "Use meaningful variable names that convey the purpose clearly to ensure easy understanding for future developers."
68
+ },
69
+ {
70
+ "rule_id": "RD003",
71
+ "description": "Ensure proper formatting and indentation for better readability and adherence to Ansible's YAML syntax rules."
72
+ }
73
+ ]
74
+ },
75
+ {
76
+ "category": "Business Logic Security",
77
+ "standards": [
78
+ {
79
+ "rule_id": "BLS001",
80
+ "description": "Verify that business logic automation does not inadvertently alter data integrity or bypass critical business validations."
81
+ },
82
+ {
83
+ "rule_id": "BLS002",
84
+ "description": "Ensure all critical paths in business processes have appropriate error handling and logging for audit purposes."
85
+ },
86
+ {
87
+ "rule_id": "BLS003",
88
+ "description": "Critical business operations must include validations to prevent data loss or unauthorized modification."
89
+ }
90
+ ]
91
+ }
92
+ ]
93
+ }
94
+