Spaces:

vsagar100
/

codevista

Sleeping

App Files Files Community

vsagar100 commited on Oct 17, 2024

Commit

e49d8aa

1 Parent(s): 88588af

Updated model, added checklist

Browse files

Files changed (4) hide show

app.py +33 -36
lib/code_reviewer.py +110 -0
requirements.txt +2 -0
standards/ansible_ode_standards.json +94 -0

app.py CHANGED Viewed

@@ -1,37 +1,34 @@
-import streamlit as st
 import os
-import json
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# UI Components for Streamlit
-st.title("Ansible Code Reviewer")
-uploaded_files = st.file_uploader("Upload your Ansible code files", type=['yml', 'yaml'], accept_multiple_files=True)
-if uploaded_files:
-    result = []
-    model_name = "facebook/incoder-1B"  # Example model name
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
-    # Process each uploaded file
-    for uploaded_file in uploaded_files:
-        content = uploaded_file.read().decode("utf-8")
-        # Here you could use the model to evaluate the content
-        tokens = tokenizer(content, return_tensors="pt")
-        review_output = model.generate(**tokens)
-        review_text = tokenizer.decode(review_output[0], skip_special_tokens=True)
-        # Store results
-        result.append({
-            "filename": uploaded_file.name,
-            "review": review_text
-        })
-    # Save the results to a JSON file
-    json_result = json.dumps(result, indent=4)
-    with open("review_results.json", "w") as f:
-        f.write(json_result)
-    # Display results and download link
-    st.json(result)
-    st.download_button("Download Review Results", json_result, file_name="review_results.json")

+# app.py
+# Import necessary modules from lib
+from lib.code_reviewer import CodeReviewer, ReviewManager
 import os
+def main():
+    # Directory structure setup
+    # Directory for storing input Ansible files
+    input_directory = "input_files"
+    # Directory for storing output JSON reviews
+    output_directory = "output_reviews"
+    # Ensure the directories exist
+    os.makedirs(input_directory, exist_ok=True)
+    os.makedirs(output_directory, exist_ok=True)
+    # Paths for testing
+    sample_files = [os.path.join(input_directory, "example1.yml"), os.path.join(input_directory, "example2.yml")]
+    output_json_path = os.path.join(output_directory, "code_review_results.json")
+    # Initialize the code reviewer and review manager
+    code_reviewer = CodeReviewer()
+    review_manager = ReviewManager(reviewer=code_reviewer)
+    # Process files and generate reviews
+    reviews = review_manager.process_files(sample_files)
+    # Save reviews to JSON
+    review_manager.save_reviews_to_json(reviews, output_json_path)
+    print(f"Reviews saved to {output_json_path}")
+if __name__ == "__main__":
+    main()

lib/code_reviewer.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# lib/code_reviewer.py
+# Import necessary libraries
+import os
+import json
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Custom Imports
+from typing import List, Dict
+class CodeReviewer:
+    def __init__(self, model_name: str = "facebook/incoder-6B"):
+        """
+        Initializes the code reviewer with the specified language model.
+        Args:
+            model_name (str): The name of the pre-trained model to use.
+        """
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
+        # Load code standards checklist
+        self.code_standards = self.load_code_standards()
+    def load_code_standards(self) -> Dict:
+        """
+        Loads the code standards checklist from a JSON file.
+        Returns:
+            Dict: The code standards in dictionary form.
+        """
+        standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_ode_standards.json")
+        with open(standards_path, 'r') as f:
+            return json.load(f)
+    def generate_prompt(self, code: str) -> str:
+        """
+        Generates a review prompt for the input code based on the loaded standards.
+        Args:
+            code (str): The code to be reviewed.
+        Returns:
+            str: The prompt used for reviewing the code.
+        """
+        # Build prompt from code standards
+        prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
+        for category in self.code_standards["code_standards"]:
+            prompt += f"{category['category']}:\n"
+            for standard in category['standards']:
+                prompt += f"- {standard['description']}\n"
+        prompt += "\nHere is the code:\n"
+        return prompt + code
+    def review_code(self, code: str) -> str:
+        """
+        Uses the model to generate a review for the provided code.
+        Args:
+            code (str): The code to be reviewed.
+        Returns:
+            str: The review generated by the model.
+        """
+        prompt = self.generate_prompt(code)
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        output = self.model.generate(**inputs, max_length=512)
+        review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
+        return review_text
+class ReviewManager:
+    def __init__(self, reviewer: CodeReviewer):
+        """
+        Initializes the review manager with a given reviewer.
+        Args:
+            reviewer (CodeReviewer): An instance of the CodeReviewer class.
+        """
+        self.reviewer = reviewer
+    def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
+        """
+        Processes multiple files for review.
+        Args:
+            file_paths (List[str]): List of file paths to be reviewed.
+        Returns:
+            List[Dict[str, str]]: A list containing review data for each file.
+        """
+        reviews = []
+        for file_path in file_paths:
+            with open(file_path, 'r') as file:
+                code = file.read()
+                review = self.reviewer.review_code(code)
+                reviews.append({"filename": os.path.basename(file_path), "review": review})
+        return reviews
+    def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
+        """
+        Saves the review data to a JSON file.
+        Args:
+            reviews (List[Dict[str, str]]): The list of reviews to save.
+            output_path (str): The path to save the JSON output.
+        """
+        with open(output_path, 'w') as json_file:
+            json.dump(reviews, json_file, indent=4)

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
 torch
 streamlit
 transformers
 ansible
 pandas

 torch
 streamlit
 transformers
+datasets
+yaml
 ansible
 pandas

standards/ansible_ode_standards.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+    "code_standards": [
+      {
+        "category": "Security",
+        "standards": [
+          {
+            "rule_id": "SEC001",
+            "description": "Avoid hard-coded secrets such as passwords and API keys. Use secure storage solutions."
+          },
+          {
+            "rule_id": "SEC002",
+            "description": "Ensure all shell commands have proper sanitization to avoid command injection."
+          },
+          {
+            "rule_id": "SEC003",
+            "description": "Apply appropriate role-based access control to sensitive tasks to prevent unauthorized access."
+          },
+          {
+            "rule_id": "SEC004",
+            "description": "Ensure secrets are encrypted during storage and transit, and are accessed only when necessary."
+          },
+          {
+            "rule_id": "SEC005",
+            "description": "Check Secure Business Logic, ensuring critical automation tasks do not expose or misuse sensitive information."
+          }
+        ]
+      },
+      {
+        "category": "Idempotency",
+        "standards": [
+          {
+            "rule_id": "IDP001",
+            "description": "Ensure tasks are idempotent, such as using 'state: present' for package installation to avoid repeated actions."
+          },
+          {
+            "rule_id": "IDP002",
+            "description": "All configuration files should use templates to ensure consistent and predictable outputs across multiple runs."
+          }
+        ]
+      },
+      {
+        "category": "Best Practices",
+        "standards": [
+          {
+            "rule_id": "BP001",
+            "description": "Use standard modules for common tasks, such as 'apt' for package management and 'user' for managing users."
+          },
+          {
+            "rule_id": "BP002",
+            "description": "Use variables for values that are subject to change, ensuring maintainability and readability of the code."
+          },
+          {
+            "rule_id": "BP003",
+            "description": "Include meaningful task names for all Ansible tasks, making playbooks self-documenting."
+          }
+        ]
+      },
+      {
+        "category": "Readability",
+        "standards": [
+          {
+            "rule_id": "RD001",
+            "description": "All significant tasks must have clear and descriptive comments to explain their purpose."
+          },
+          {
+            "rule_id": "RD002",
+            "description": "Use meaningful variable names that convey the purpose clearly to ensure easy understanding for future developers."
+          },
+          {
+            "rule_id": "RD003",
+            "description": "Ensure proper formatting and indentation for better readability and adherence to Ansible's YAML syntax rules."
+          }
+        ]
+      },
+      {
+        "category": "Business Logic Security",
+        "standards": [
+          {
+            "rule_id": "BLS001",
+            "description": "Verify that business logic automation does not inadvertently alter data integrity or bypass critical business validations."
+          },
+          {
+            "rule_id": "BLS002",
+            "description": "Ensure all critical paths in business processes have appropriate error handling and logging for audit purposes."
+          },
+          {
+            "rule_id": "BLS003",
+            "description": "Critical business operations must include validations to prevent data loss or unauthorized modification."
+          }
+        ]
+      }
+    ]
+  }