codevista / lib /code_reviewer.py
vsagar100's picture
Added provision to accept the git repo url from UI
02bdc55
raw
history blame
4.79 kB
# lib/code_reviewer.py
# Import necessary libraries
import os
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import requests
import zipfile
import io
# Custom Imports
from typing import List, Dict
class CodeReviewer:
def __init__(self, model_name: str = "facebook/incoder-6B"):
"""
Initializes the code reviewer with the specified language model.
Args:
model_name (str): The name of the pre-trained model to use.
"""
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
# Load code standards checklist
self.code_standards = self.load_code_standards()
def load_code_standards(self) -> Dict:
"""
Loads the code standards checklist from a JSON file.
Returns:
Dict: The code standards in dictionary form.
"""
standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "code_standards.json")
with open(standards_path, 'r') as f:
return json.load(f)
def generate_prompt(self, code: str) -> str:
"""
Generates a review prompt for the input code based on the loaded standards.
Args:
code (str): The code to be reviewed.
Returns:
str: The prompt used for reviewing the code.
"""
# Build prompt from code standards
prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
for category in self.code_standards["code_standards"]:
prompt += f"{category['category']}:\n"
for standard in category['standards']:
prompt += f"- {standard['description']}\n"
prompt += "\nHere is the code:\n"
return prompt + code
def review_code(self, code: str) -> str:
"""
Uses the model to generate a review for the provided code.
Args:
code (str): The code to be reviewed.
Returns:
str: The review generated by the model.
"""
prompt = self.generate_prompt(code)
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
output = self.model.generate(**inputs, max_length=512)
review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
return review_text
class ReviewManager:
def __init__(self, reviewer: CodeReviewer):
"""
Initializes the review manager with a given reviewer.
Args:
reviewer (CodeReviewer): An instance of the CodeReviewer class.
"""
self.reviewer = reviewer
def download_repo(self, repo_url: str, token: str, download_path: str):
"""
Downloads a GitHub repository as a ZIP file and extracts it.
Args:
repo_url (str): The GitHub repository URL.
token (str): The GitHub personal access token for authentication.
download_path (str): The path to extract the downloaded repository.
"""
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(repo_url, headers=headers)
if response.status_code == 200:
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
zip_ref.extractall(download_path)
else:
raise Exception(f"Failed to download repository. Status code: {response.status_code}, Message: {response.text}")
def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
"""
Processes multiple files for review.
Args:
file_paths (List[str]): List of file paths to be reviewed.
Returns:
List[Dict[str, str]]: A list containing review data for each file.
"""
reviews = []
for file_path in file_paths:
with open(file_path, 'r') as file:
code = file.read()
review = self.reviewer.review_code(code)
reviews.append({"filename": os.path.basename(file_path), "review": review})
return reviews
def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
"""
Saves the review data to a JSON file.
Args:
reviews (List[Dict[str, str]]): The list of reviews to save.
output_path (str): The path to save the JSON output.
"""
with open(output_path, 'w') as json_file:
json.dump(reviews, json_file, indent=4)