Spaces:

vsagar100
/

codevista

Sleeping

App Files Files Community

codevista / lib /code_reviewer.py

vsagar100

Changed model to 1B

3344c31 verified 9 months ago

raw

history blame

4.97 kB

	# lib/code_reviewer.py

	# Import necessary libraries
	import os
	import json
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import requests
	import zipfile
	import io

	# Custom Imports
	from typing import List, Dict

	class CodeReviewer:
	def __init__(self, model_name: str = "facebook/incoder-1B"):
	"""
	Initializes the code reviewer with the specified language model.

	Args:
	model_name (str): The name of the pre-trained model to use.
	"""
	self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
	# Load code standards checklist
	self.code_standards = self.load_code_standards()

	def load_code_standards(self) -> Dict:
	"""
	Loads the code standards checklist from a JSON file.

	Returns:
	Dict: The code standards in dictionary form.
	"""
	standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_code_standards.json")
	with open(standards_path, 'r') as f:
	return json.load(f)

	def generate_prompt(self, code: str) -> str:
	"""
	Generates a review prompt for the input code based on the loaded standards.

	Args:
	code (str): The code to be reviewed.

	Returns:
	str: The prompt used for reviewing the code.
	"""
	# Build prompt from code standards
	prompt = "You are an expert Ansible code reviewer. Review the following script thoroughly for the specified standards:\n\n"
	for category in self.code_standards["code_standards"]:
	prompt += f"{category['category']}:\n"
	for standard in category['standards']:
	prompt += f"- {standard['description']}\n"
	prompt += "\nHere is the code:\n"
	return prompt + code

	def review_code(self, code: str) -> str:
	"""
	Uses the model to generate a review for the provided code.

	Args:
	code (str): The code to be reviewed.

	Returns:
	str: The review generated by the model.
	"""
	prompt = self.generate_prompt(code)
	inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(self.device)
	# Remove unsupported keys from model input
	inputs = {k: v for k, v in inputs.items() if k in self.model.forward.__code__.co_varnames}
	output = self.model.generate(**inputs, max_length=512)
	review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
	return review_text

	class ReviewManager:
	def __init__(self, reviewer: CodeReviewer):
	"""
	Initializes the review manager with a given reviewer.

	Args:
	reviewer (CodeReviewer): An instance of the CodeReviewer class.
	"""
	self.reviewer = reviewer

	def download_repo(self, repo_url: str, token: str, download_path: str):
	"""
	Downloads a GitHub repository as a ZIP file and extracts it.

	Args:
	repo_url (str): The GitHub repository URL.
	token (str): The GitHub personal access token for authentication.
	download_path (str): The path to extract the downloaded repository.
	"""
	headers = {"Authorization": f"Bearer {token}"}
	response = requests.get(repo_url, headers=headers)
	if response.status_code == 200:
	with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
	zip_ref.extractall(download_path)
	else:
	raise Exception(f"Failed to download repository. Status code: {response.status_code}, Message: {response.text}")

	def process_files(self, file_paths: List[str]) -> List[Dict[str, str]]:
	"""
	Processes multiple files for review.

	Args:
	file_paths (List[str]): List of file paths to be reviewed.

	Returns:
	List[Dict[str, str]]: A list containing review data for each file.
	"""
	reviews = []
	for file_path in file_paths:
	with open(file_path, 'r') as file:
	code = file.read()
	review = self.reviewer.review_code(code)
	reviews.append({"filename": os.path.basename(file_path), "review": review})
	return reviews

	def save_reviews_to_json(self, reviews: List[Dict[str, str]], output_path: str):
	"""
	Saves the review data to a JSON file.

	Args:
	reviews (List[Dict[str, str]]): The list of reviews to save.
	output_path (str): The path to save the JSON output.
	"""
	with open(output_path, 'w') as json_file:
	json.dump(reviews, json_file, indent=4)