# lib/code_reviewer.py # Import necessary libraries import os import json import torch from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments import requests import zipfile import io # Custom Imports from typing import List, Dict class CodeReviewer: def __init__(self, model_name: str = "facebook/incoder-1B"): """ Initializes the code reviewer with the specified language model. Args: model_name (str): The name of the pre-trained model to use. """ self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device) # Load code standards checklist self.code_standards = self.load_code_standards() def load_code_standards(self) -> Dict: """ Loads the code standards checklist from a JSON file. Returns: Dict: The code standards in dictionary form. """ standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "code_standards.json") with open(standards_path, 'r') as f: return json.load(f) def generate_prompt(self, code: str) -> str: """ Generates a review prompt for the input code based on the loaded standards. Args: code (str): The code to be reviewed. Returns: str: The prompt used for reviewing the code. """ # Build prompt from code standards prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n" for category in self.code_standards["code_standards"]: prompt += f"{category['category']}:\n" for standard in category['standards']: prompt += f"- {standard['description']}\n" prompt += "\nHere is the code:\n" return prompt + code def review_code(self, code: str) -> str: """ Uses the model to generate a review for the provided code. Args: code (str): The code to be reviewed. Returns: str: The review generated by the model. """ prompt = self.generate_prompt(code) inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device) # Remove unsupported keys from model input inputs = {k: v for k, v in inputs.items() if k in self.model.forward.__code__.co_varnames} output = self.model.generate(**inputs, max_length=512) review_text = self.tokenizer.decode(output[0], skip_special_tokens=True) return review_text def fine_tune_model(self, dataset, output_dir="./fine_tuned_incoder"): """ Fine-tunes the model with a custom dataset. Args: dataset: The dataset used for fine-tuning. output_dir (str): Directory where the fine-tuned model will be saved. """ training_args = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=4, num_train_epochs=3, logging_dir="./logs", save_steps=10_000, logging_steps=500, evaluation_strategy="steps", save_total_limit=2 ) trainer = Trainer( model=self.model, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["validation"] ) # Start fine-tuning trainer.train() # Save the fine-tuned model self.model.save_pretrained(output_dir) self.tokenizer.save_pretrained(output_dir) print(f"Fine-tuned model saved at {output_dir}") class ReviewManager: def __init__(self, reviewer: CodeReviewer): """ Initializes the review manager with a given reviewer. Args: reviewer (CodeReviewer): An instance of the CodeReviewer class. """ self.reviewer = reviewer def download_repo(self, repo_url: str, branch: str, token: str, download_path: str): """ Downloads a GitHub repository as a ZIP file and extracts it. Args: repo_url (str): The GitHub repository URL. branch (str): The branch or tag to download. token (str): The GitHub personal access token for authentication. download_path (str): The path to extract the downloaded repository. """ zip_url = f"{repo_url}/archive/refs/heads/{branch}.zip" headers = {"Authorization": f"Bearer {token}"} response = requests.get(zip_url, headers=headers) if response.status_code == 200: with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref: zip_ref.extractall(download_path) else: raise Exception(f"Failed to download repository. Status code: {response.status_code}, Message: {response.text}") def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]: """ Processes multiple files for review. Args: file_paths (List[str]): List of file paths to be reviewed. Returns: List[Dict[str, str]]: A list containing review data for each file. """ reviews = [] for file_path in file_paths: with open(file_path, 'r') as file: code = file.read() review = self.reviewer.review_code(code) reviews.append({"filename": os.path.basename(file_path), "review": review}) return reviews def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str): """ Saves the review data to a JSON file. Args: reviews (List[Dict[str, str]]): The list of reviews to save. output_path (str): The path to save the JSON output. """ with open(output_path, 'w') as json_file: json.dump(reviews, json_file, indent=4)