File size: 4,849 Bytes
782d1a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e7d63
83b4b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# app.py

# Import necessary modules from lib
from lib.code_reviewer import CodeReviewer, ReviewManager
import streamlit as st
import os
import glob
import json
from datasets import DatasetDict

# Sample Training Data for Ansible Code Standards
sample_data = [
    {
        "prompt": "Review the following Ansible script thoroughly according to best practices:\n\n- Avoid using hard-coded values\n- Ensure idempotency\n- Proper error handling\n\nCode:\n---\n- name: Install nginx\n  hosts: web\n  tasks:\n    - name: Install nginx package\n      apt:\n        name: nginx\n        state: present",
        "response": "Ensure that the playbook includes proper error handling for the package installation. Consider using a retry mechanism and notify handlers if required. Avoid hard-coding package names where possible."  
    },
    {
        "prompt": "Review the following Ansible script thoroughly according to best practices:\n\n- Avoid using hard-coded values\n- Ensure idempotency\n- Use variables\n\nCode:\n---\n- name: Create a directory\n  hosts: all\n  tasks:\n    - name: Create /var/www directory\n      file:\n        path: /var/www\n        state: directory",
        "response": "It is recommended to use variables for paths such as '/var/www' to ensure flexibility. The task is idempotent, which is good, but consider adding more context to describe the usage of the directory."  
    }
]

# Create a DatasetDict for Training
training_dataset = DatasetDict({
    "train": sample_data,
    "validation": sample_data
})

# Streamlit UI for inputting GitHub repository details
st.title("GitHub Repository Code Reviewer")

# Input for GitHub repository URL
GITHUB_REPO_URL = st.text_input("Enter the GitHub repository URL:")
GITHUB_BRANCH = st.text_input("Enter the branch or tag to download (default: main):", "main")

# Check if the GitHub token is provided as a secret in Streamlit
if 1 != 0:
    st.error("GitHub token is not available. Please set it in the secrets.")
else:
    GITHUB_TOKEN = "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"

    # Add a button for fine-tuning the model
    if st.button("Fine-Tune Model"):
        with st.spinner("Fine-tuning the model with provided dataset..."):
            code_reviewer = CodeReviewer()
            code_reviewer.fine_tune_model(training_dataset)
            st.success("Model fine-tuned successfully.")

    if st.button("Review Code") and GITHUB_REPO_URL:
        # Directory structure setup
        download_directory = "downloaded_repo"
        output_directory = "output_reviews"

        # Ensure the directories exist
        os.makedirs(download_directory, exist_ok=True)
        os.makedirs(output_directory, exist_ok=True)

        # Initialize the code reviewer and review manager
        with st.spinner("Initializing CodeReviewer and ReviewManager..."):
            code_reviewer = CodeReviewer()
            review_manager = ReviewManager(reviewer=code_reviewer)

        try:
            # Download GitHub repository
            with st.spinner("Downloading GitHub repository..."):
                review_manager.download_repo(GITHUB_REPO_URL, GITHUB_BRANCH, GITHUB_TOKEN, download_directory)
                st.success("Repository downloaded successfully.")

            # Find all YAML files in the downloaded repository
            with st.spinner("Searching for YAML files in the downloaded repository..."):
                yaml_files = glob.glob(os.path.join(download_directory, "**", "*.yml"), recursive=True)
                yaml_files += glob.glob(os.path.join(download_directory, "**", "*.yaml"), recursive=True)
                st.info(f"Found {len(yaml_files)} YAML files for review.")

            # Process files and generate reviews
            with st.spinner("Processing files for review..."):
                reviews = review_manager.process_files(yaml_files)
                st.success("Files processed successfully.")

            # Save reviews to JSON
            output_json_path = os.path.join(output_directory, "code_review_results.json")
            review_manager.save_reviews_to_json(reviews, output_json_path)

            # Display review results
            st.success(f"Reviews saved to {output_json_path}")
            for review in reviews:
                st.subheader(f"Review for {review['filename']}")
                st.text(review['review'])

            # Display JSON output on the UI
            st.subheader("Full JSON Review Output")
            st.json(reviews)

            # Provide download link for JSON results
            with open(output_json_path, "r") as json_file:
                st.download_button("Download JSON Results", json_file, file_name="code_review_results.json")

        except Exception as e:
            st.error(f"An error occurred: {str(e)}")