import streamlit as st import google.generativeai as genai import zipfile import io import json import os from pathlib import Path import time # Added for simulating mock delay # --- Configuration --- GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25" MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed AVAILABLE_ANALYSES = { "generate_docs": "Generate Missing Docstrings/Comments", "find_bugs": "Identify Potential Bugs & Anti-patterns", "check_style": "Check Style Guide Compliance (General)", "summarize_modules": "Summarize Complex Modules/Files", "suggest_refactoring": "Suggest Refactoring Opportunities" } CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'} # --- Session State Initialization --- # Initialize session state for mock mode toggle if it doesn't exist if 'mock_api_call' not in st.session_state: st.session_state.mock_api_call = False # Default to using the real API # --- Gemini API Setup --- # Defer full initialization until needed if mock mode might be used first model = None def initialize_gemini_model(): global model if model is None and not st.session_state.mock_api_call: try: if 'GEMINI_API_KEY' not in st.secrets: st.error("๐Ÿšจ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.") st.stop() genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) model = genai.GenerativeModel(GEMINI_MODEL_NAME) print("Gemini Model Initialized.") return True except Exception as e: st.error(f"๐Ÿšจ Error initializing Gemini SDK: {e}") st.stop() return False elif st.session_state.mock_api_call: print("Running in Mock Mode. Skipping Gemini initialization.") return True # Allow proceeding in mock mode elif model is not None: print("Gemini Model already initialized.") return True return False # --- Helper Functions --- def estimate_token_count(text): """Roughly estimate token count (3-4 chars per token).""" return len(text) // 3 def process_zip_file(uploaded_file): """Extracts code files and their content from the uploaded zip file.""" code_files = {} total_chars = 0 file_count = 0 ignored_files = [] try: with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref: for member in zip_ref.infolist(): if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename: continue file_path = Path(member.filename) if file_path.suffix.lower() in CODE_EXTENSIONS: try: with zip_ref.open(member) as file: try: content = file.read().decode('utf-8') except UnicodeDecodeError: try: content = file.read().decode('latin-1') except Exception as decode_err: ignored_files.append(f"{member.filename} (Decode Error: {decode_err})") continue code_files[member.filename] = content total_chars += len(content) file_count += 1 except Exception as read_err: ignored_files.append(f"{member.filename} (Read Error: {read_err})") else: # Only add to ignored if it's not explicitly ignored by path rules above if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename): ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})") except zipfile.BadZipFile: st.error("๐Ÿšจ Invalid or corrupted ZIP file.") return None, 0, 0, [] except Exception as e: st.error(f"๐Ÿšจ Error processing ZIP file: {e}") return None, 0, 0, [] return code_files, total_chars, file_count, ignored_files def construct_analysis_prompt(code_files_dict, requested_analyses): """Constructs the prompt for Gemini, including code content and JSON structure request.""" prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n" current_token_estimate = estimate_token_count(prompt_content) included_files = [] concatenated_code = "" for filename, content in code_files_dict.items(): file_marker = f"--- START FILE: {filename} ---\n" file_content = f"{content}\n" file_end_marker = f"--- END FILE: {filename} ---\n\n" segment = file_marker + file_content + file_end_marker segment_token_estimate = estimate_token_count(segment) if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE: concatenated_code += segment current_token_estimate += segment_token_estimate included_files.append(filename) else: st.warning(f"โš ๏ธ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).") break if not included_files: st.error("๐Ÿšจ No code files could be included within the estimated token limit.") return None, [] prompt_content += concatenated_code json_structure_description = "{\n" # Dynamically build the JSON structure based on selection structure_parts = [] if "generate_docs" in requested_analyses: structure_parts.append(' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}]') if "find_bugs" in requested_analyses: structure_parts.append(' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}]') if "check_style" in requested_analyses: structure_parts.append(' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}]') if "summarize_modules" in requested_analyses: structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]') if "suggest_refactoring" in requested_analyses: structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]') json_structure_description += ",\n".join(structure_parts) json_structure_description += "\n}" prompt_footer = f""" **Analysis Task:** Perform the analyses corresponding to the keys present in the JSON structure below, based *only* on the provided code files ({', '.join(included_files)}). **Output Format:** Respond ONLY with a single, valid JSON object adhering strictly to the following structure. If no issues/suggestions are found for a category, provide an empty list `[]`. Do not include explanations outside the JSON structure. {json_structure_description} **JSON Output Only:** """ full_prompt = prompt_content + prompt_footer # print(f"--- PROMPT (First 500 chars): ---\n{full_prompt[:500]}\n--------------------------") # print(f"--- PROMPT (Last 500 chars): ---\n{full_prompt[-500:]}\n--------------------------") return full_prompt, included_files def call_gemini_api(prompt): """Calls the Gemini API or returns mock data based on session state.""" if not prompt: return None, "Prompt generation failed." # --- MOCK MODE LOGIC --- if st.session_state.mock_api_call: st.info(" MOCK MODE: Simulating API call...") time.sleep(2) # Simulate network/processing delay # --- CHOOSE YOUR MOCK RESPONSE --- # Option 1: Simulate successful response with some data mock_json_response = json.dumps({ "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}], "potential_bugs": [{"file":"mock/utils.py", "line": 22, "description":"Potential division by zero if denominator is not checked.", "severity":"Medium"}], "style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}], "module_summaries": [{"file": "mock/core.py", "summary": "This file contains the core mock processing logic."}, {"file":"mock/utils.py", "summary": "Utility functions for mocking."}], "refactoring_suggestions": [{"file":"mock/utils.py", "line": 30, "area":"calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}] }) st.success("Mock response generated successfully.") return json.loads(mock_json_response), None # Return insights, no error # Option 2: Simulate API error # st.error("Simulating API error.") # return None, "MOCK ERROR: Simulated API Quota Exceeded." # Option 3: Simulate invalid JSON response # st.warning("Simulating invalid JSON response from AI.") # return {"raw_response": "{malformed json'"}, "AI response was not valid JSON, showing raw text." # # Option 4: Simulate empty results # mock_empty_json = json.dumps({ # "documentation_suggestions": [], "potential_bugs": [], "style_issues": [], # "module_summaries": [], "refactoring_suggestions": [] # }) # st.success("Mock response generated (empty results).") # return json.loads(mock_empty_json), None # --- END MOCK MODE LOGIC --- # --- REAL API CALL LOGIC --- else: if not initialize_gemini_model(): # Ensure model is ready return None, "Gemini Model Initialization Failed." if model is None: # Should not happen if initialize check passed, but safeguard return None, "Gemini model not available." try: st.write(f"๐Ÿ“ก Sending request to {GEMINI_MODEL_NAME}...") response = model.generate_content( prompt, generation_config=genai.types.GenerationConfig(temperature=0.2), safety_settings=[ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, ] ) st.write("โœ… Response received from AI.") # Debug: Print raw response text # print(f"--- RAW API RESPONSE ---\n{response.text}\n------------------------") try: # Try to extract JSON robustly json_response_text = response.text.strip() # Handle potential markdown code block fences if json_response_text.startswith("```json"): json_response_text = json_response_text[7:] if json_response_text.startswith("```"): # Handle case where ```json wasn't used json_response_text = json_response_text[3:] if json_response_text.endswith("```"): json_response_text = json_response_text[:-3] # Find the first '{' and the last '}' json_start = json_response_text.find('{') json_end = json_response_text.rfind('}') + 1 if json_start != -1 and json_end != -1 and json_end > json_start: final_json_text = json_response_text[json_start:json_end] insights = json.loads(final_json_text) return insights, None else: st.warning("โš ๏ธ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.") return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text." except json.JSONDecodeError as json_err: st.error(f"๐Ÿšจ Error parsing JSON response from AI: {json_err}") st.error("Raw AI Response:") st.code(response.text, language='text') return None, f"AI response was not valid JSON: {json_err}" except AttributeError: # Handle cases where response structure might be different (e.g. blocked) st.error(f"๐Ÿšจ Unexpected API response structure.") st.code(f"Response object: {response}", language='text') # Log the problematic response # Try to get blocked reason if available try: block_reason = response.prompt_feedback.block_reason if block_reason: return None, f"Content blocked by API. Reason: {block_reason}" except Exception: pass # Ignore if feedback structure isn't as expected return None, "Unexpected response structure from API." except Exception as e: st.error(f"๐Ÿšจ Unexpected issue processing AI response: {e}") try: st.code(f"Response object: {response}", language='text') except: pass return None, f"Unexpected response structure: {e}" except Exception as e: st.error(f"๐Ÿšจ An error occurred during API call: {e}") error_msg = f"API call failed: {e}" # Improved error identification if hasattr(e, 'message'): # For google.api_core.exceptions if "429" in e.message: error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard." elif "API key not valid" in e.message: error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`." elif "blocked" in e.message.lower(): # General check for safety blocks error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." elif "block_reason: SAFETY" in str(e): # Fallback check error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate." return None, error_msg def display_results(results_json, requested_analyses): """Renders the analysis results in Streamlit.""" st.header("๐Ÿ“Š Analysis Report") if not isinstance(results_json, dict): st.error("Invalid results format received.") st.json(results_json) return if "raw_response" in results_json: st.subheader("Raw AI Response (JSON Parsing Failed)") st.code(results_json["raw_response"], language='text') return # Define display functions for clarity def display_list_items(items, fields): if items: for item in items: details = [] for field_key, field_label in fields.items(): value = item.get(field_key, 'N/A') if value != 'N/A': # Only show if value exists details.append(f"**{field_label}:** {value}") st.markdown("- " + " - ".join(details)) # Handle specific multi-line outputs like suggestions/summaries if 'suggestion' in item: st.code(item['suggestion'], language='text') elif 'description' in item: st.markdown(f" > {item['description']}") # Indent description elif 'summary' in item: st.markdown(f" > {item['summary']}") # Indent summary else: st.markdown("_No items found for this category._") st.divider() # Map keys to display configurations display_config = { "generate_docs": { "key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"} # Suggestion shown by st.code }, "find_bugs": { "key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"} # Description shown separately }, "check_style": { "key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"} # Description shown separately }, "summarize_modules": { "key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"} # Summary shown separately }, "suggest_refactoring": { "key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"} # Suggestion shown separately } } # Iterate and display selected sections any_results = False for analysis_key in requested_analyses: if analysis_key in display_config: config = display_config[analysis_key] st.subheader(config["title"]) items = results_json.get(config["key"], []) display_list_items(items, config["fields"]) if items: any_results = True if not any_results: st.info("No specific findings were identified in the analysis based on your selections.") # Download button st.download_button( label="Download Full Report (JSON)", data=json.dumps(results_json, indent=4), file_name="code_audit_report.json", mime="application/json" ) # --- Streamlit App Main Interface --- st.set_page_config(page_title="Codebase Audit Assistant", layout="wide") st.title("๐Ÿค– Codebase Audit & Documentation Assistant") st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.") # Sidebar controls with st.sidebar: st.header("โš™๏ธ Analysis Controls") # Mock Mode Toggle st.session_state.mock_api_call = st.toggle("๐Ÿงช Enable Mock API Mode (for Testing)", value=st.session_state.mock_api_call, help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing.") if st.session_state.mock_api_call: st.info("Mock API Mode ACTIVE") else: st.info("Using REAL Gemini API") st.divider() st.header("๐Ÿ”Ž Select Analyses") selected_analyses = [] for key, name in AVAILABLE_ANALYSES.items(): if st.checkbox(name, value=True, key=f"cb_{key}"): selected_analyses.append(key) st.divider() st.header("๐Ÿ“„ How To Use") st.info( "1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n" "2. Toggle Mock Mode if needed.\n" "3. Select desired analyses.\n" "4. Create a **ZIP archive** of your codebase.\n" "5. Upload the `.zip` file.\n" "6. Click 'Analyze Codebase'.\n" "7. Review the report." ) st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).") st.divider() st.warning("โš ๏ธ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.") # Main content area uploaded_file = st.file_uploader("๐Ÿ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader") analysis_triggered = False results_cache = None # To store results briefly if uploaded_file: st.success(f"โœ… File '{uploaded_file.name}' uploaded.") with st.spinner("Inspecting ZIP file..."): code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file) if code_files is not None: st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}") if ignored_files: with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): # Use st.code for better formatting of list st.code("\n".join(ignored_files), language='text') analyze_button_disabled = (not selected_analyses or file_count == 0) analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code" if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled): analysis_triggered = True if not selected_analyses: st.warning("Please select at least one analysis type from the sidebar.") elif file_count == 0: st.warning("No relevant code files found in the ZIP archive to analyze.") else: st.divider() with st.spinner(f"๐Ÿš€ Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."): # 1. Construct Prompt analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses) if analysis_prompt and included_files_in_prompt: st.write(f"Analyzing {len(included_files_in_prompt)} files...") # 2. Call API (Real or Mock) results_json, error_message = call_gemini_api(analysis_prompt) results_cache = (results_json, error_message) # Store results elif not included_files_in_prompt: results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).") else: results_cache = (None, "Failed to generate analysis prompt.") else: # Error during zip processing pass # Error message already shown # Display results outside the button click block if analysis was triggered if analysis_triggered and results_cache: results_json, error_message = results_cache st.divider() if error_message: st.error(f"Analysis Failed: {error_message}") # Display partial results if available (e.g., raw response on JSON error) if results_json and isinstance(results_json, dict) and "raw_response" in results_json: st.subheader("Raw AI Response") st.code(results_json["raw_response"], language='text') elif results_json: display_results(results_json, selected_analyses) else: st.error("Analysis did not return results or an unknown error occurred.") elif not uploaded_file: st.info("Upload a ZIP file containing your source code to begin.") st.divider() st.markdown("_Assistant powered by Google Gemini._")