import streamlit as st import google.generativeai as genai import zipfile import io import json import os from pathlib import Path import time # Added for simulating mock delay # --- Configuration --- GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25" MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed AVAILABLE_ANALYSES = { "generate_docs": "Generate Missing Docstrings/Comments", "find_bugs": "Identify Potential Bugs & Anti-patterns", "check_style": "Check Style Guide Compliance (General)", "summarize_modules": "Summarize Complex Modules/Files", "suggest_refactoring": "Suggest Refactoring Opportunities" } CODE_EXTENSIONS = { '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql' } # --- Session State Initialization --- if 'mock_api_call' not in st.session_state: st.session_state.mock_api_call = False # Default to using the real API # --- Gemini API Setup --- model = None def initialize_gemini_model(): """Initializes the Gemini model if not in mock mode.""" global model if model is None and not st.session_state.mock_api_call: try: if 'GEMINI_API_KEY' not in st.secrets: st.error("๐Ÿšจ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.") st.stop() genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) model = genai.GenerativeModel(GEMINI_MODEL_NAME) print("Gemini Model Initialized.") return True except Exception as e: st.error(f"๐Ÿšจ Error initializing Gemini SDK: {e}") st.stop() return False elif st.session_state.mock_api_call: print("Running in Mock Mode. Skipping Gemini initialization.") return True # Allow proceeding in mock mode elif model is not None: print("Gemini Model already initialized.") return True return False # --- Helper Functions --- def estimate_token_count(text): """Roughly estimate token count (assumes ~3-4 characters per token).""" return len(text) // 3 def process_zip_file(uploaded_file): """ Extracts code files and their content from the uploaded ZIP file. Returns: code_files (dict): Mapping of file paths to content. total_chars (int): Total number of characters in included files. file_count (int): Count of processed code files. ignored_files (list): List of files skipped or not processed. """ code_files = {} total_chars = 0 file_count = 0 ignored_files = [] try: with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref: for member in zip_ref.infolist(): # Skip directories, hidden files, and files with '__' in the name if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename: continue file_path = Path(member.filename) if file_path.suffix.lower() in CODE_EXTENSIONS: try: with zip_ref.open(member) as file: try: content = file.read().decode('utf-8') except UnicodeDecodeError: try: content = file.read().decode('latin-1') except Exception as decode_err: ignored_files.append(f"{member.filename} (Decode Error: {decode_err})") continue code_files[member.filename] = content total_chars += len(content) file_count += 1 except Exception as read_err: ignored_files.append(f"{member.filename} (Read Error: {read_err})") else: # Only add to ignored if it's not explicitly ignored by path rules above if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename): ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})") except zipfile.BadZipFile: st.error("๐Ÿšจ Invalid or corrupted ZIP file.") return None, 0, 0, [] except Exception as e: st.error(f"๐Ÿšจ Error processing ZIP file: {e}") return None, 0, 0, [] return code_files, total_chars, file_count, ignored_files def construct_analysis_prompt(code_files_dict, requested_analyses): """ Constructs the prompt for Gemini, including code content and a JSON structure request. Returns: full_prompt (str): The complete prompt. included_files (list): List of file names included in the prompt. """ prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n" current_token_estimate = estimate_token_count(prompt_content) included_files = [] concatenated_code = "" for filename, content in code_files_dict.items(): file_marker = f"--- START FILE: {filename} ---\n" file_content = f"{content}\n" file_end_marker = f"--- END FILE: {filename} ---\n\n" segment = file_marker + file_content + file_end_marker segment_token_estimate = estimate_token_count(segment) if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE: concatenated_code += segment current_token_estimate += segment_token_estimate included_files.append(filename) else: st.warning(f"โš ๏ธ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).") break if not included_files: st.error("๐Ÿšจ No code files could be included within the estimated token limit.") return None, [] prompt_content += concatenated_code # Build the expected JSON structure dynamically based on the selected analyses json_structure_description = "{\n" structure_parts = [] if "generate_docs" in requested_analyses: structure_parts.append(' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}]') if "find_bugs" in requested_analyses: structure_parts.append(' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}]') if "check_style" in requested_analyses: structure_parts.append(' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}]') if "summarize_modules" in requested_analyses: structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]') if "suggest_refactoring" in requested_analyses: structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]') json_structure_description += ",\n".join(structure_parts) json_structure_description += "\n}" prompt_footer = f""" **Analysis Task:** Perform the analyses corresponding to the keys present in the JSON structure below, based *only* on the provided code files ({', '.join(included_files)}). **Output Format:** Respond ONLY with a single, valid JSON object adhering strictly to the following structure. If no issues/suggestions are found for a category, provide an empty list `[]`. Do not include explanations outside the JSON structure. {json_structure_description} **JSON Output Only:** """ full_prompt = prompt_content + prompt_footer return full_prompt, included_files def call_gemini_api(prompt): """ Calls the Gemini API (or simulates it in mock mode) with the provided prompt. Returns: insights (dict): The parsed JSON response from the API. error_message (str): An error message if something went wrong. """ if not prompt: return None, "Prompt generation failed." # --- MOCK MODE LOGIC --- if st.session_state.mock_api_call: st.info(" MOCK MODE: Simulating API call...") time.sleep(2) # Simulate network/processing delay # Simulated successful response mock_json_response = json.dumps({ "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}], "potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}], "style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}], "module_summaries": [ {"file": "mock/core.py", "summary": "This file contains the core mock processing logic."}, {"file": "mock/utils.py", "summary": "Utility functions for mocking."} ], "refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}] }) st.success("Mock response generated successfully.") return json.loads(mock_json_response), None # --- REAL API CALL LOGIC --- else: if not initialize_gemini_model(): return None, "Gemini Model Initialization Failed." if model is None: return None, "Gemini model not available." try: st.write(f"๐Ÿ“ก Sending request to {GEMINI_MODEL_NAME}...") response = model.generate_content( prompt, generation_config=genai.types.GenerationConfig(temperature=0.2), safety_settings=[ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, ] ) st.write("โœ… Response received from AI.") try: json_response_text = response.text.strip() # Remove potential markdown code block fences if json_response_text.startswith("```json"): json_response_text = json_response_text[7:] if json_response_text.startswith("```"): json_response_text = json_response_text[3:] if json_response_text.endswith("```"): json_response_text = json_response_text[:-3] # Extract JSON object boundaries json_start = json_response_text.find('{') json_end = json_response_text.rfind('}') + 1 if json_start != -1 and json_end != -1 and json_end > json_start: final_json_text = json_response_text[json_start:json_end] insights = json.loads(final_json_text) return insights, None else: st.warning("โš ๏ธ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.") return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text." except json.JSONDecodeError as json_err: st.error(f"๐Ÿšจ Error parsing JSON response from AI: {json_err}") st.error("Raw AI Response:") st.code(response.text, language='text') return None, f"AI response was not valid JSON: {json_err}" except AttributeError: st.error("๐Ÿšจ Unexpected API response structure.") st.code(f"Response object: {response}", language='text') try: block_reason = response.prompt_feedback.block_reason if block_reason: return None, f"Content blocked by API. Reason: {block_reason}" except Exception: pass return None, "Unexpected response structure from API." except Exception as e: st.error(f"๐Ÿšจ Unexpected issue processing AI response: {e}") try: st.code(f"Response object: {response}", language='text') except Exception: pass return None, f"Unexpected response structure: {e}" except Exception as e: st.error(f"๐Ÿšจ An error occurred during API call: {e}") error_msg = f"API call failed: {e}" if hasattr(e, 'message'): if "429" in e.message: error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard." elif "API key not valid" in e.message: error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`." elif "blocked" in e.message.lower(): error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." elif "block_reason: SAFETY" in str(e): error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." return None, error_msg def display_results(results_json, requested_analyses): """Renders the analysis results in the Streamlit interface.""" st.header("๐Ÿ“Š Analysis Report") if not isinstance(results_json, dict): st.error("Invalid results format received.") st.json(results_json) return if "raw_response" in results_json: st.subheader("Raw AI Response (JSON Parsing Failed)") st.code(results_json["raw_response"], language='text') return def display_list_items(items, fields): if items: for item in items: details = [] for field_key, field_label in fields.items(): value = item.get(field_key, 'N/A') if value != 'N/A': details.append(f"**{field_label}:** {value}") st.markdown("- " + " - ".join(details)) # Display multi-line outputs when applicable if 'suggestion' in item: st.code(item['suggestion'], language='text') elif 'description' in item: st.markdown(f" > {item['description']}") elif 'summary' in item: st.markdown(f" > {item['summary']}") else: st.markdown("_No items found for this category._") st.divider() display_config = { "generate_docs": { "key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"} }, "find_bugs": { "key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"} }, "check_style": { "key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"} }, "summarize_modules": { "key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"} }, "suggest_refactoring": { "key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"} } } any_results = False for analysis_key in requested_analyses: if analysis_key in display_config: config = display_config[analysis_key] st.subheader(config["title"]) items = results_json.get(config["key"], []) display_list_items(items, config["fields"]) if items: any_results = True if not any_results: st.info("No specific findings were identified in the analysis based on your selections.") st.download_button( label="Download Full Report (JSON)", data=json.dumps(results_json, indent=4), file_name="code_audit_report.json", mime="application/json" ) # --- Streamlit App Main Interface --- st.set_page_config(page_title="Codebase Audit Assistant", layout="wide") st.title("๐Ÿค– Codebase Audit & Documentation Assistant") st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.") # Sidebar controls with st.sidebar: st.header("โš™๏ธ Analysis Controls") st.session_state.mock_api_call = st.toggle( "๐Ÿงช Enable Mock API Mode (for Testing)", value=st.session_state.mock_api_call, help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing." ) if st.session_state.mock_api_call: st.info("Mock API Mode ACTIVE") else: st.info("Using REAL Gemini API") st.divider() st.header("๐Ÿ”Ž Select Analyses") selected_analyses = [] for key, name in AVAILABLE_ANALYSES.items(): if st.checkbox(name, value=True, key=f"cb_{key}"): selected_analyses.append(key) st.divider() st.header("๐Ÿ“„ How To Use") st.info( "1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n" "2. Toggle Mock Mode if needed.\n" "3. Select desired analyses.\n" "4. Create a **ZIP archive** of your codebase.\n" "5. Upload the `.zip` file.\n" "6. Click 'Analyze Codebase'.\n" "7. Review the report." ) st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).") st.divider() st.warning("โš ๏ธ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.") # Main content area uploaded_file = st.file_uploader("๐Ÿ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader") analysis_triggered = False results_cache = None # To store results briefly if uploaded_file: st.success(f"โœ… File '{uploaded_file.name}' uploaded.") with st.spinner("Inspecting ZIP file..."): code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file) if code_files is not None: st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}") if ignored_files: with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): st.code("\n".join(ignored_files), language='text') analyze_button_disabled = (not selected_analyses or file_count == 0) analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code" if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled): analysis_triggered = True if not selected_analyses: st.warning("Please select at least one analysis type from the sidebar.") elif file_count == 0: st.warning("No relevant code files found in the ZIP archive to analyze.") else: st.divider() with st.spinner(f"๐Ÿš€ Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."): analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses) if analysis_prompt and included_files_in_prompt: st.write(f"Analyzing {len(included_files_in_prompt)} files...") results_json, error_message = call_gemini_api(analysis_prompt) results_cache = (results_json, error_message) elif not included_files_in_prompt: results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).") else: results_cache = (None, "Failed to generate analysis prompt.") else: # Error during ZIP processing (error already displayed) pass if analysis_triggered and results_cache: results_json, error_message = results_cache st.divider() if error_message: st.error(f"Analysis Failed: {error_message}") if results_json and isinstance(results_json, dict) and "raw_response" in results_json: st.subheader("Raw AI Response") st.code(results_json["raw_response"], language='text') elif results_json: display_results(results_json, selected_analyses) else: st.error("Analysis did not return results or an unknown error occurred.") elif not uploaded_file: st.info("Upload a ZIP file containing your source code to begin.") st.divider() st.markdown("_Assistant powered by Google Gemini._")