|
|
|
|
|
|
|
import os |
|
import json |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import requests |
|
import zipfile |
|
import io |
|
|
|
|
|
from typing import List, Dict |
|
|
|
class CodeReviewer: |
|
def __init__(self, model_name: str = "facebook/incoder-1B"): |
|
""" |
|
Initializes the code reviewer with the specified language model. |
|
|
|
Args: |
|
model_name (str): The name of the pre-trained model to use. |
|
""" |
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device) |
|
|
|
self.code_standards = self.load_code_standards() |
|
|
|
def load_code_standards(self) -> Dict: |
|
""" |
|
Loads the code standards checklist from a JSON file. |
|
|
|
Returns: |
|
Dict: The code standards in dictionary form. |
|
""" |
|
standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_code_standards.json") |
|
with open(standards_path, 'r') as f: |
|
return json.load(f) |
|
|
|
def generate_prompt(self, code: str) -> str: |
|
""" |
|
Generates a review prompt for the input code based on the loaded standards. |
|
|
|
Args: |
|
code (str): The code to be reviewed. |
|
|
|
Returns: |
|
str: The prompt used for reviewing the code. |
|
""" |
|
|
|
prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n" |
|
for category in self.code_standards["code_standards"]: |
|
prompt += f"{category['category']}:\n" |
|
for standard in category['standards']: |
|
prompt += f"- {standard['description']}\n" |
|
prompt += "\nHere is the code:\n" |
|
return prompt + code |
|
|
|
def review_code(self, code: str) -> str: |
|
""" |
|
Uses the model to generate a review for the provided code. |
|
|
|
Args: |
|
code (str): The code to be reviewed. |
|
|
|
Returns: |
|
str: The review generated by the model. |
|
""" |
|
prompt = self.generate_prompt(code) |
|
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device) |
|
|
|
inputs = {k: v for k, v in inputs.items() if k in self.model.forward.__code__.co_varnames} |
|
output = self.model.generate(**inputs, max_length=512) |
|
review_text = self.tokenizer.decode(output[0], skip_special_tokens=True) |
|
return review_text |
|
|
|
class ReviewManager: |
|
def __init__(self, reviewer: CodeReviewer): |
|
""" |
|
Initializes the review manager with a given reviewer. |
|
|
|
Args: |
|
reviewer (CodeReviewer): An instance of the CodeReviewer class. |
|
""" |
|
self.reviewer = reviewer |
|
|
|
def download_repo(self, repo_url: str, token: str, download_path: str): |
|
""" |
|
Downloads a GitHub repository as a ZIP file and extracts it. |
|
|
|
Args: |
|
repo_url (str): The GitHub repository URL. |
|
token (str): The GitHub personal access token for authentication. |
|
download_path (str): The path to extract the downloaded repository. |
|
""" |
|
headers = {"Authorization": f"Bearer {token}"} |
|
response = requests.get(repo_url, headers=headers) |
|
if response.status_code == 200: |
|
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref: |
|
zip_ref.extractall(download_path) |
|
else: |
|
raise Exception(f"Failed to download repository. Status code: {response.status_code}, Message: {response.text}") |
|
|
|
def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]: |
|
""" |
|
Processes multiple files for review. |
|
|
|
Args: |
|
file_paths (List[str]): List of file paths to be reviewed. |
|
|
|
Returns: |
|
List[Dict[str, str]]: A list containing review data for each file. |
|
""" |
|
reviews = [] |
|
for file_path in file_paths: |
|
with open(file_path, 'r') as file: |
|
code = file.read() |
|
review = self.reviewer.review_code(code) |
|
reviews.append({"filename": os.path.basename(file_path), "review": review}) |
|
return reviews |
|
|
|
def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str): |
|
""" |
|
Saves the review data to a JSON file. |
|
|
|
Args: |
|
reviews (List[Dict[str, str]]): The list of reviews to save. |
|
output_path (str): The path to save the JSON output. |
|
""" |
|
with open(output_path, 'w') as json_file: |
|
json.dump(reviews, json_file, indent=4) |
|
|
|
|