Spaces:

vsagar100
/

codevista

Sleeping

App Files Files Community

vsagar100 commited on Oct 20, 2024

Commit

1c7a008

verified ·

1 Parent(s): 3344c31

Update lib/code_reviewer.py

Browse files

Files changed (1) hide show

lib/code_reviewer.py +40 -5

lib/code_reviewer.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import os
 import json
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import requests
 import zipfile
 import io
@@ -33,7 +33,7 @@ class CodeReviewer:
         Returns:
             Dict: The code standards in dictionary form.
         """
-        standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "ansible_code_standards.json")
         with open(standards_path, 'r') as f:
             return json.load(f)
@@ -74,6 +74,40 @@ class CodeReviewer:
         review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
         return review_text
 class ReviewManager:
     def __init__(self, reviewer: CodeReviewer):
         """
@@ -84,17 +118,19 @@ class ReviewManager:
         """
         self.reviewer = reviewer
-    def download_repo(self, repo_url: str, token: str, download_path: str):
         """
         Downloads a GitHub repository as a ZIP file and extracts it.
         Args:
             repo_url (str): The GitHub repository URL.
             token (str): The GitHub personal access token for authentication.
             download_path (str): The path to extract the downloaded repository.
         """
         headers = {"Authorization": f"Bearer {token}"}
-        response = requests.get(repo_url, headers=headers)
         if response.status_code == 200:
             with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
                 zip_ref.extractall(download_path)
@@ -129,4 +165,3 @@ class ReviewManager:
         """
         with open(output_path, 'w') as json_file:
             json.dump(reviews, json_file, indent=4)

 import os
 import json
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
 import requests
 import zipfile
 import io
         Returns:
             Dict: The code standards in dictionary form.
         """
+        standards_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "standards", "code_standards.json")
         with open(standards_path, 'r') as f:
             return json.load(f)
         review_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
         return review_text
+    def fine_tune_model(self, dataset, output_dir="./fine_tuned_incoder"):
+        """
+        Fine-tunes the model with a custom dataset.
+        Args:
+            dataset: The dataset used for fine-tuning.
+            output_dir (str): Directory where the fine-tuned model will be saved.
+        """
+        training_args = TrainingArguments(
+            output_dir=output_dir,
+            per_device_train_batch_size=4,
+            num_train_epochs=3,
+            logging_dir="./logs",
+            save_steps=10_000,
+            logging_steps=500,
+            evaluation_strategy="steps",
+            save_total_limit=2
+        )
+        trainer = Trainer(
+            model=self.model,
+            args=training_args,
+            train_dataset=dataset["train"],
+            eval_dataset=dataset["validation"]
+        )
+        # Start fine-tuning
+        trainer.train()
+        # Save the fine-tuned model
+        self.model.save_pretrained(output_dir)
+        self.tokenizer.save_pretrained(output_dir)
+        print(f"Fine-tuned model saved at {output_dir}")
 class ReviewManager:
     def __init__(self, reviewer: CodeReviewer):
         """
         """
         self.reviewer = reviewer
+    def download_repo(self, repo_url: str, branch: str, token: str, download_path: str):
         """
         Downloads a GitHub repository as a ZIP file and extracts it.
         Args:
             repo_url (str): The GitHub repository URL.
+            branch (str): The branch or tag to download.
             token (str): The GitHub personal access token for authentication.
             download_path (str): The path to extract the downloaded repository.
         """
+        zip_url = f"{repo_url}/archive/refs/heads/{branch}.zip"
         headers = {"Authorization": f"Bearer {token}"}
+        response = requests.get(zip_url, headers=headers)
         if response.status_code == 200:
             with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
                 zip_ref.extractall(download_path)
         """
         with open(output_path, 'w') as json_file:
             json.dump(reviews, json_file, indent=4)