Spaces:
Sleeping
Sleeping
import streamlit as st | |
import google.generativeai as genai | |
import zipfile | |
import io | |
import json | |
import os | |
from pathlib import Path | |
import time # Added for simulating mock delay | |
# --- Configuration --- | |
GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25" | |
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed | |
AVAILABLE_ANALYSES = { | |
"generate_docs": "Generate Missing Docstrings/Comments", | |
"find_bugs": "Identify Potential Bugs & Anti-patterns", | |
"check_style": "Check Style Guide Compliance (General)", | |
"summarize_modules": "Summarize Complex Modules/Files", | |
"suggest_refactoring": "Suggest Refactoring Opportunities" | |
} | |
CODE_EXTENSIONS = { | |
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', | |
'.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql' | |
} | |
# --- Session State Initialization --- | |
if 'mock_api_call' not in st.session_state: | |
st.session_state.mock_api_call = False # Default to using the real API | |
# --- Gemini API Setup --- | |
model = None | |
def initialize_gemini_model(): | |
"""Initializes the Gemini model if not in mock mode.""" | |
global model | |
if model is None and not st.session_state.mock_api_call: | |
try: | |
if 'GEMINI_API_KEY' not in st.secrets: | |
st.error("π¨ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.") | |
st.stop() | |
genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) | |
model = genai.GenerativeModel(GEMINI_MODEL_NAME) | |
print("Gemini Model Initialized.") | |
return True | |
except Exception as e: | |
st.error(f"π¨ Error initializing Gemini SDK: {e}") | |
st.stop() | |
return False | |
elif st.session_state.mock_api_call: | |
print("Running in Mock Mode. Skipping Gemini initialization.") | |
return True # Allow proceeding in mock mode | |
elif model is not None: | |
print("Gemini Model already initialized.") | |
return True | |
return False | |
# --- Helper Functions --- | |
def estimate_token_count(text): | |
"""Roughly estimate token count (assumes ~3-4 characters per token).""" | |
return len(text) // 3 | |
def process_zip_file(uploaded_file): | |
""" | |
Extracts code files and their content from the uploaded ZIP file. | |
Returns: | |
code_files (dict): Mapping of file paths to content. | |
total_chars (int): Total number of characters in included files. | |
file_count (int): Count of processed code files. | |
ignored_files (list): List of files skipped or not processed. | |
""" | |
code_files = {} | |
total_chars = 0 | |
file_count = 0 | |
ignored_files = [] | |
try: | |
with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref: | |
for member in zip_ref.infolist(): | |
# Skip directories, hidden files, and files with '__' in the name | |
if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename: | |
continue | |
file_path = Path(member.filename) | |
if file_path.suffix.lower() in CODE_EXTENSIONS: | |
try: | |
with zip_ref.open(member) as file: | |
try: | |
content = file.read().decode('utf-8') | |
except UnicodeDecodeError: | |
try: | |
content = file.read().decode('latin-1') | |
except Exception as decode_err: | |
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})") | |
continue | |
code_files[member.filename] = content | |
total_chars += len(content) | |
file_count += 1 | |
except Exception as read_err: | |
ignored_files.append(f"{member.filename} (Read Error: {read_err})") | |
else: | |
# Only add to ignored if it's not explicitly ignored by path rules above | |
if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename): | |
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})") | |
except zipfile.BadZipFile: | |
st.error("π¨ Invalid or corrupted ZIP file.") | |
return None, 0, 0, [] | |
except Exception as e: | |
st.error(f"π¨ Error processing ZIP file: {e}") | |
return None, 0, 0, [] | |
return code_files, total_chars, file_count, ignored_files | |
def construct_analysis_prompt(code_files_dict, requested_analyses): | |
""" | |
Constructs the prompt for Gemini, including code content and a JSON structure request. | |
Returns: | |
full_prompt (str): The complete prompt. | |
included_files (list): List of file names included in the prompt. | |
""" | |
prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n" | |
current_token_estimate = estimate_token_count(prompt_content) | |
included_files = [] | |
concatenated_code = "" | |
for filename, content in code_files_dict.items(): | |
file_marker = f"--- START FILE: {filename} ---\n" | |
file_content = f"{content}\n" | |
file_end_marker = f"--- END FILE: {filename} ---\n\n" | |
segment = file_marker + file_content + file_end_marker | |
segment_token_estimate = estimate_token_count(segment) | |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE: | |
concatenated_code += segment | |
current_token_estimate += segment_token_estimate | |
included_files.append(filename) | |
else: | |
st.warning(f"β οΈ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).") | |
break | |
if not included_files: | |
st.error("π¨ No code files could be included within the estimated token limit.") | |
return None, [] | |
prompt_content += concatenated_code | |
# Build the expected JSON structure dynamically based on the selected analyses | |
json_structure_description = "{\n" | |
structure_parts = [] | |
if "generate_docs" in requested_analyses: | |
structure_parts.append(' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}]') | |
if "find_bugs" in requested_analyses: | |
structure_parts.append(' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}]') | |
if "check_style" in requested_analyses: | |
structure_parts.append(' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}]') | |
if "summarize_modules" in requested_analyses: | |
structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]') | |
if "suggest_refactoring" in requested_analyses: | |
structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]') | |
json_structure_description += ",\n".join(structure_parts) | |
json_structure_description += "\n}" | |
prompt_footer = f""" | |
**Analysis Task:** | |
Perform the analyses corresponding to the keys present in the JSON structure below, based *only* on the provided code files ({', '.join(included_files)}). | |
**Output Format:** | |
Respond ONLY with a single, valid JSON object adhering strictly to the following structure. If no issues/suggestions are found for a category, provide an empty list `[]`. Do not include explanations outside the JSON structure. | |
{json_structure_description} | |
**JSON Output Only:** | |
""" | |
full_prompt = prompt_content + prompt_footer | |
return full_prompt, included_files | |
def call_gemini_api(prompt): | |
""" | |
Calls the Gemini API (or simulates it in mock mode) with the provided prompt. | |
Returns: | |
insights (dict): The parsed JSON response from the API. | |
error_message (str): An error message if something went wrong. | |
""" | |
if not prompt: | |
return None, "Prompt generation failed." | |
# --- MOCK MODE LOGIC --- | |
if st.session_state.mock_api_call: | |
st.info(" MOCK MODE: Simulating API call...") | |
time.sleep(2) # Simulate network/processing delay | |
# Simulated successful response | |
mock_json_response = json.dumps({ | |
"documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}], | |
"potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}], | |
"style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}], | |
"module_summaries": [ | |
{"file": "mock/core.py", "summary": "This file contains the core mock processing logic."}, | |
{"file": "mock/utils.py", "summary": "Utility functions for mocking."} | |
], | |
"refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}] | |
}) | |
st.success("Mock response generated successfully.") | |
return json.loads(mock_json_response), None | |
# --- REAL API CALL LOGIC --- | |
else: | |
if not initialize_gemini_model(): | |
return None, "Gemini Model Initialization Failed." | |
if model is None: | |
return None, "Gemini model not available." | |
try: | |
st.write(f"π‘ Sending request to {GEMINI_MODEL_NAME}...") | |
response = model.generate_content( | |
prompt, | |
generation_config=genai.types.GenerationConfig(temperature=0.2), | |
safety_settings=[ | |
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, | |
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, | |
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, | |
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, | |
] | |
) | |
st.write("β Response received from AI.") | |
try: | |
json_response_text = response.text.strip() | |
# Remove potential markdown code block fences | |
if json_response_text.startswith("```json"): | |
json_response_text = json_response_text[7:] | |
if json_response_text.startswith("```"): | |
json_response_text = json_response_text[3:] | |
if json_response_text.endswith("```"): | |
json_response_text = json_response_text[:-3] | |
# Extract JSON object boundaries | |
json_start = json_response_text.find('{') | |
json_end = json_response_text.rfind('}') + 1 | |
if json_start != -1 and json_end != -1 and json_end > json_start: | |
final_json_text = json_response_text[json_start:json_end] | |
insights = json.loads(final_json_text) | |
return insights, None | |
else: | |
st.warning("β οΈ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.") | |
return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text." | |
except json.JSONDecodeError as json_err: | |
st.error(f"π¨ Error parsing JSON response from AI: {json_err}") | |
st.error("Raw AI Response:") | |
st.code(response.text, language='text') | |
return None, f"AI response was not valid JSON: {json_err}" | |
except AttributeError: | |
st.error("π¨ Unexpected API response structure.") | |
st.code(f"Response object: {response}", language='text') | |
try: | |
block_reason = response.prompt_feedback.block_reason | |
if block_reason: | |
return None, f"Content blocked by API. Reason: {block_reason}" | |
except Exception: | |
pass | |
return None, "Unexpected response structure from API." | |
except Exception as e: | |
st.error(f"π¨ Unexpected issue processing AI response: {e}") | |
try: | |
st.code(f"Response object: {response}", language='text') | |
except Exception: | |
pass | |
return None, f"Unexpected response structure: {e}" | |
except Exception as e: | |
st.error(f"π¨ An error occurred during API call: {e}") | |
error_msg = f"API call failed: {e}" | |
if hasattr(e, 'message'): | |
if "429" in e.message: | |
error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard." | |
elif "API key not valid" in e.message: | |
error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`." | |
elif "blocked" in e.message.lower(): | |
error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." | |
elif "block_reason: SAFETY" in str(e): | |
error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." | |
return None, error_msg | |
def display_results(results_json, requested_analyses): | |
"""Renders the analysis results in the Streamlit interface.""" | |
st.header("π Analysis Report") | |
if not isinstance(results_json, dict): | |
st.error("Invalid results format received.") | |
st.json(results_json) | |
return | |
if "raw_response" in results_json: | |
st.subheader("Raw AI Response (JSON Parsing Failed)") | |
st.code(results_json["raw_response"], language='text') | |
return | |
def display_list_items(items, fields): | |
if items: | |
for item in items: | |
details = [] | |
for field_key, field_label in fields.items(): | |
value = item.get(field_key, 'N/A') | |
if value != 'N/A': | |
details.append(f"**{field_label}:** {value}") | |
st.markdown("- " + " - ".join(details)) | |
# Display multi-line outputs when applicable | |
if 'suggestion' in item: | |
st.code(item['suggestion'], language='text') | |
elif 'description' in item: | |
st.markdown(f" > {item['description']}") | |
elif 'summary' in item: | |
st.markdown(f" > {item['summary']}") | |
else: | |
st.markdown("_No items found for this category._") | |
st.divider() | |
display_config = { | |
"generate_docs": { | |
"key": "documentation_suggestions", | |
"title": AVAILABLE_ANALYSES["generate_docs"], | |
"fields": {"file": "File", "line": "Line"} | |
}, | |
"find_bugs": { | |
"key": "potential_bugs", | |
"title": AVAILABLE_ANALYSES["find_bugs"], | |
"fields": {"file": "File", "line": "Line", "severity": "Severity"} | |
}, | |
"check_style": { | |
"key": "style_issues", | |
"title": AVAILABLE_ANALYSES["check_style"], | |
"fields": {"file": "File", "line": "Line"} | |
}, | |
"summarize_modules": { | |
"key": "module_summaries", | |
"title": AVAILABLE_ANALYSES["summarize_modules"], | |
"fields": {"file": "File"} | |
}, | |
"suggest_refactoring": { | |
"key": "refactoring_suggestions", | |
"title": AVAILABLE_ANALYSES["suggest_refactoring"], | |
"fields": {"file": "File", "line": "Line", "area": "Area"} | |
} | |
} | |
any_results = False | |
for analysis_key in requested_analyses: | |
if analysis_key in display_config: | |
config = display_config[analysis_key] | |
st.subheader(config["title"]) | |
items = results_json.get(config["key"], []) | |
display_list_items(items, config["fields"]) | |
if items: | |
any_results = True | |
if not any_results: | |
st.info("No specific findings were identified in the analysis based on your selections.") | |
st.download_button( | |
label="Download Full Report (JSON)", | |
data=json.dumps(results_json, indent=4), | |
file_name="code_audit_report.json", | |
mime="application/json" | |
) | |
# --- Streamlit App Main Interface --- | |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide") | |
st.title("π€ Codebase Audit & Documentation Assistant") | |
st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.") | |
# Sidebar controls | |
with st.sidebar: | |
st.header("βοΈ Analysis Controls") | |
st.session_state.mock_api_call = st.toggle( | |
"π§ͺ Enable Mock API Mode (for Testing)", | |
value=st.session_state.mock_api_call, | |
help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing." | |
) | |
if st.session_state.mock_api_call: | |
st.info("Mock API Mode ACTIVE") | |
else: | |
st.info("Using REAL Gemini API") | |
st.divider() | |
st.header("π Select Analyses") | |
selected_analyses = [] | |
for key, name in AVAILABLE_ANALYSES.items(): | |
if st.checkbox(name, value=True, key=f"cb_{key}"): | |
selected_analyses.append(key) | |
st.divider() | |
st.header("π How To Use") | |
st.info( | |
"1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n" | |
"2. Toggle Mock Mode if needed.\n" | |
"3. Select desired analyses.\n" | |
"4. Create a **ZIP archive** of your codebase.\n" | |
"5. Upload the `.zip` file.\n" | |
"6. Click 'Analyze Codebase'.\n" | |
"7. Review the report." | |
) | |
st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).") | |
st.divider() | |
st.warning("β οΈ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.") | |
# Main content area | |
uploaded_file = st.file_uploader("π Upload Codebase ZIP File", type=['zip'], key="file_uploader") | |
analysis_triggered = False | |
results_cache = None # To store results briefly | |
if uploaded_file: | |
st.success(f"β File '{uploaded_file.name}' uploaded.") | |
with st.spinner("Inspecting ZIP file..."): | |
code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file) | |
if code_files is not None: | |
st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}") | |
if ignored_files: | |
with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): | |
st.code("\n".join(ignored_files), language='text') | |
analyze_button_disabled = (not selected_analyses or file_count == 0) | |
analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code" | |
if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled): | |
analysis_triggered = True | |
if not selected_analyses: | |
st.warning("Please select at least one analysis type from the sidebar.") | |
elif file_count == 0: | |
st.warning("No relevant code files found in the ZIP archive to analyze.") | |
else: | |
st.divider() | |
with st.spinner(f"π Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."): | |
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses) | |
if analysis_prompt and included_files_in_prompt: | |
st.write(f"Analyzing {len(included_files_in_prompt)} files...") | |
results_json, error_message = call_gemini_api(analysis_prompt) | |
results_cache = (results_json, error_message) | |
elif not included_files_in_prompt: | |
results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).") | |
else: | |
results_cache = (None, "Failed to generate analysis prompt.") | |
else: | |
# Error during ZIP processing (error already displayed) | |
pass | |
if analysis_triggered and results_cache: | |
results_json, error_message = results_cache | |
st.divider() | |
if error_message: | |
st.error(f"Analysis Failed: {error_message}") | |
if results_json and isinstance(results_json, dict) and "raw_response" in results_json: | |
st.subheader("Raw AI Response") | |
st.code(results_json["raw_response"], language='text') | |
elif results_json: | |
display_results(results_json, selected_analyses) | |
else: | |
st.error("Analysis did not return results or an unknown error occurred.") | |
elif not uploaded_file: | |
st.info("Upload a ZIP file containing your source code to begin.") | |
st.divider() | |
st.markdown("_Assistant powered by Google Gemini._") | |