import streamlit as st import google.generativeai as genai import zipfile import io import json import os # Still needed for API key potentially, but not model names from pathlib import Path import time import plotly.express as px import pandas as pd # --- Configuration --- MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate RESULTS_PAGE_SIZE = 25 AVAILABLE_ANALYSES = { # Keep analyses config "generate_docs": "Generate Missing Docstrings/Comments", "find_bugs": "Identify Potential Bugs & Anti-patterns", "check_style": "Check Style Guide Compliance (General)", "summarize_modules": "Summarize Complex Modules/Files", "suggest_refactoring": "Suggest Refactoring Opportunities", } CODE_EXTENSIONS = { '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql' } # --- Session State Initialization --- if 'mock_api_call' not in st.session_state: st.session_state.mock_api_call = False if 'analysis_results' not in st.session_state: st.session_state.analysis_results = None if 'error_message' not in st.session_state: st.session_state.error_message = None if 'analysis_requested' not in st.session_state: st.session_state.analysis_requested = False if 'selected_model_name' not in st.session_state: st.session_state.selected_model_name = None # Will hold the "models/..." name if 'available_models_dict' not in st.session_state: st.session_state.available_models_dict = {} # Mapping display_name -> name # --- Gemini API Setup & Model Discovery --- model = None # Global variable for the initialized model instance @st.cache_data(ttl=3600) def get_available_models(): """Lists models supporting 'generateContent' using the API key.""" model_dict = {} try: if 'GEMINI_API_KEY' not in st.secrets: print("API key not found in secrets during model listing attempt.") return {} genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) print("Listing available models via API...") for m in genai.list_models(): if 'generateContent' in m.supported_generation_methods: model_dict[m.display_name] = m.name print(f"Found {len(model_dict)} compatible models.") return model_dict except Exception as e: st.error(f"๐Ÿšจ Error listing available models: {e}") return {} def initialize_gemini_model(): """Initializes the Gemini model based on the selected name.""" global model selected_name = st.session_state.get('selected_model_name') if selected_name and model is None and not st.session_state.mock_api_call: try: if 'GEMINI_API_KEY' not in st.secrets: st.error("๐Ÿšจ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.") st.stop() genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) print(f"Initializing Gemini Model: {selected_name}") model = genai.GenerativeModel(model_name=selected_name) print(f"Gemini Model Initialized ({selected_name}).") return True except Exception as e: st.error(f"๐Ÿšจ Error initializing selected Gemini model '{selected_name}': {e}") st.session_state.selected_model_name = None st.stop() return False elif st.session_state.mock_api_call: return True elif model is not None and model.model_name == selected_name: return True elif model is not None and model.model_name != selected_name: print("Model changed. Re-initializing...") model = None return initialize_gemini_model() elif not selected_name and not st.session_state.mock_api_call: return False return False # --- Helper Functions --- def estimate_token_count(text): """ Estimates the token count. If a string is provided, calculates based on its length. If an integer (e.g. total char count) is provided, uses that directly. """ if isinstance(text, int): return text // 3 return len(text) // 3 @st.cache_data(max_entries=5) def process_zip_file_cached(file_id, file_size, file_content_bytes): """ Processes a ZIP file and extracts code files. Returns (code_files dict, total_chars, file_count, ignored_files list). """ code_files = {} total_chars = 0 file_count = 0 ignored_files = [] status_placeholder = st.empty() progress_bar = status_placeholder.progress(0) try: with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref: members = zip_ref.infolist() total_members = len(members) for i, member in enumerate(members): if i % 10 == 0: progress_bar.progress(int((i / total_members) * 100)) if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename: continue file_path = Path(member.filename) if file_path.suffix.lower() in CODE_EXTENSIONS: try: with zip_ref.open(member) as file: file_bytes = file.read() try: content = file_bytes.decode('utf-8') except UnicodeDecodeError: try: content = file_bytes.decode('latin-1') except Exception as decode_err: ignored_files.append(f"{member.filename} (Decode Error: {decode_err})") continue code_files[member.filename] = content total_chars += len(content) file_count += 1 except Exception as read_err: ignored_files.append(f"{member.filename} (Read Error: {read_err})") else: if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename): ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})") progress_bar.progress(100) status_placeholder.empty() except zipfile.BadZipFile: status_placeholder.empty() st.error("๐Ÿšจ Invalid ZIP.") return None, 0, 0, [] except Exception as e: status_placeholder.empty() st.error(f"๐Ÿšจ ZIP Error: {e}") return None, 0, 0, [] if file_count == 0: if not ignored_files: st.warning("No code files found.") else: st.warning("No code files found; some skipped.") return code_files, total_chars, file_count, ignored_files def construct_analysis_prompt(code_files_dict, requested_analyses): """ Constructs the prompt for analysis by including code files and a JSON structure for output. Returns the full prompt and a list of included files. """ prompt_parts = ["Analyze the following codebase...\n\n"] current_token_estimate = estimate_token_count(prompt_parts[0]) included_files = [] code_segments = [] prompt_status = st.empty() if len(code_files_dict) > 50: prompt_status.info("Constructing prompt...") for filename, content in code_files_dict.items(): segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n" segment_token_estimate = estimate_token_count(segment) if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE: code_segments.append(segment) current_token_estimate += segment_token_estimate included_files.append(filename) else: st.warning(f"โš ๏ธ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).") break prompt_status.empty() if not included_files: st.error("๐Ÿšจ No code files included in prompt.") return None, [] prompt_parts.append("".join(code_segments)) json_structure_description = "{\n" structure_parts = [] if "generate_docs" in requested_analyses: structure_parts.append(' "documentation_suggestions": [...]') if "find_bugs" in requested_analyses: structure_parts.append(' "potential_bugs": [...]') if "check_style" in requested_analyses: structure_parts.append(' "style_issues": [...]') if "summarize_modules" in requested_analyses: structure_parts.append(' "module_summaries": [...]') if "suggest_refactoring" in requested_analyses: structure_parts.append(' "refactoring_suggestions": [...]') json_structure_description += ",\n".join(structure_parts) + "\n}" prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n" prompt_parts.append(prompt_footer) full_prompt = "".join(prompt_parts) return full_prompt, included_files def call_gemini_api(prompt): """ Calls the Gemini API using the provided prompt. Returns the parsed JSON insights or an error message. """ if not prompt: return None, "Prompt generation failed." if st.session_state.mock_api_call: st.info(" MOCK MODE: Simulating API call...") time.sleep(1) mock_json_response = json.dumps({ "documentation_suggestions": [], "potential_bugs": [], "style_issues": [], "module_summaries": [], "refactoring_suggestions": [] }) st.success("Mock response generated.") return json.loads(mock_json_response), None else: if not initialize_gemini_model(): return None, "Gemini Model Initialization Failed." if model is None: return None, "Gemini model not selected or available." try: api_status = st.empty() api_status.info(f"๐Ÿ“ก Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.") start_time = time.time() response = model.generate_content( prompt, generation_config=genai.types.GenerationConfig(temperature=0.2), safety_settings=[ {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"] ] ) end_time = time.time() api_status.success(f"โœ… Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.") time.sleep(1) api_status.empty() try: json_response_text = response.text.strip() if json_response_text.startswith("```json"): json_response_text = json_response_text[7:] if json_response_text.startswith("```"): json_response_text = json_response_text[3:] if json_response_text.endswith("```"): json_response_text = json_response_text[:-3] json_start = json_response_text.find('{') json_end = json_response_text.rfind('}') + 1 if json_start != -1 and json_end != -1 and json_end > json_start: final_json_text = json_response_text[json_start:json_end] insights = json.loads(final_json_text) return insights, None else: st.warning("โš ๏ธ Could not find valid JSON object.") return {"raw_response": response.text}, "AI response did not contain clear JSON object." except json.JSONDecodeError as json_err: st.error(f"๐Ÿšจ Error parsing JSON: {json_err}") st.code(response.text, language='text') return None, f"AI response not valid JSON: {json_err}" except AttributeError: st.error("๐Ÿšจ Unexpected API response structure (AttributeError).") st.code(f"Response object: {response}", language='text') return None, "Unexpected response structure (AttributeError)." except Exception as e: st.error(f"๐Ÿšจ Unexpected issue processing response: {e}") try: st.code(f"Response object: {response}", language='text') except Exception: pass return None, f"Unexpected response structure: {e}" except Exception as e: api_status.empty() st.error(f"๐Ÿšจ API call error: {e}") error_msg = f"API call failed: {e}" if hasattr(e, 'message'): if "429" in e.message: error_msg = "API Quota Exceeded or Rate Limit hit." elif "API key not valid" in e.message: error_msg = "Invalid Gemini API Key." elif "permission denied" in e.message.lower(): error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access." elif "blocked" in e.message.lower(): error_msg = "Content blocked due to safety settings." elif "block_reason: SAFETY" in str(e): error_msg = "Content blocked due to safety settings." return None, error_msg def display_results(results_json, requested_analyses): """ Displays the analysis results with pagination and allows JSON download. """ st.header("๐Ÿ“Š Analysis Report") if not isinstance(results_json, dict): st.error("Invalid results format.") st.json(results_json) return if "raw_response" in results_json: st.subheader("Raw AI Response (JSON Parsing Failed)") st.code(results_json["raw_response"], language='text') return display_config = { "generate_docs": { "key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"} }, "find_bugs": { "key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"} }, "check_style": { "key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"} }, "summarize_modules": { "key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"} }, "suggest_refactoring": { "key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"} }, } any_results_found = False for analysis_key in requested_analyses: if analysis_key in display_config: config = display_config[analysis_key] items = results_json.get(config["key"], []) total_items = len(items) st.subheader(f"{config['title']} ({total_items} found)") if items: any_results_found = True state_key = f"visible_{analysis_key}" if state_key not in st.session_state: st.session_state[state_key] = RESULTS_PAGE_SIZE visible_count = st.session_state[state_key] items_to_display = items[:visible_count] for item in items_to_display: details = [ f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file' else f"**{field_label}:** {item.get(field_key, 'N/A')}" for field_key, field_label in config["fields"].items() if item.get(field_key, 'N/A') != 'N/A' ] st.markdown("- " + " - ".join(details)) if 'suggestion' in item: st.code(item['suggestion'], language='text') elif 'description' in item: st.markdown(f" > {item['description']}") elif 'summary' in item: st.markdown(f" > {item['summary']}") if total_items > visible_count: if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"): st.session_state[state_key] += RESULTS_PAGE_SIZE st.rerun() else: st.markdown("_No items found for this category._") st.divider() if not any_results_found: st.info("No specific findings were identified.") st.download_button( label="Download Full Report (JSON)", data=json.dumps(results_json, indent=4), file_name="code_audit_report.json", mime="application/json" ) # --- Streamlit App Main Interface --- st.set_page_config(page_title="Codebase Audit Assistant", layout="wide") st.title("๐Ÿค– Codebase Audit & Documentation Assistant") # --- Sidebar with Enhancements --- with st.sidebar: # Dark Mode Toggle dark_mode = st.checkbox("Enable Dark Mode", value=False) if dark_mode: st.markdown( """ """, unsafe_allow_html=True ) st.header("โš™๏ธ Analysis Controls") st.session_state.mock_api_call = st.toggle( "๐Ÿงช Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API." ) st.divider() st.header("โ™Š Select Model") if not st.session_state.mock_api_call: st.session_state.available_models_dict = get_available_models() model_display_names = list(st.session_state.available_models_dict.keys()) if model_display_names: current_model_display_name = None if st.session_state.selected_model_name: for disp_name, internal_name in st.session_state.available_models_dict.items(): if internal_name == st.session_state.selected_model_name: current_model_display_name = disp_name break try: selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0 except ValueError: selected_index = 0 selected_display_name = st.selectbox( "Choose Gemini model:", options=model_display_names, index=selected_index, key="model_selector", help="Select the Gemini model to use for analysis." ) st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name) st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})") elif 'GEMINI_API_KEY' in st.secrets: st.warning("No compatible models found or error listing models. Check API Key permissions.") st.session_state.selected_model_name = None else: st.warning("Add GEMINI_API_KEY to secrets to list models.") st.session_state.selected_model_name = None else: st.info("Mock API Mode ACTIVE") st.session_state.selected_model_name = "mock_model" st.divider() st.header("๐Ÿ”Ž Select Analyses") selected_analyses = [ key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}") ] st.divider() st.header("๐Ÿ“„ How To Use") st.info( "1. Set API Key.\n" "2. Toggle Mock Mode if needed.\n" "3. Select Model (if not Mock).\n" "4. Select analyses.\n" "5. Upload ZIP.\n" "6. Click 'Analyze'.\n" "7. Review report." ) st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).") st.divider() st.warning("โš ๏ธ **Privacy:** Code sent to Google API if Mock Mode is OFF.") # Update title dynamically based on selected model if st.session_state.selected_model_name and not st.session_state.mock_api_call: st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.") elif st.session_state.mock_api_call: st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).") else: st.markdown("Upload codebase (`.zip`) for analysis.") # --- Main Content Area --- uploaded_file = st.file_uploader( "๐Ÿ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader", on_change=lambda: st.session_state.update( analysis_results=None, error_message=None, analysis_requested=False ) ) analysis_button_placeholder = st.empty() results_placeholder = st.container() if uploaded_file: st.success(f"โœ… File '{uploaded_file.name}' uploaded.") uploaded_file_bytes = uploaded_file.getvalue() file_id = f"{uploaded_file.name}-{uploaded_file.size}" code_files, total_chars, file_count, ignored_files = process_zip_file_cached( file_id, uploaded_file.size, uploaded_file_bytes ) if code_files is not None: st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}") # --- Interactive Metrics Visualization --- metrics = { "Metric": ["Files Analyzed", "Total Characters", "Token Estimate", "Ignored Files"], "Value": [file_count, total_chars, estimate_token_count(total_chars), len(ignored_files)] } df_metrics = pd.DataFrame(metrics) fig = px.bar(df_metrics, x="Metric", y="Value", title="Upload Summary Metrics") st.plotly_chart(fig) # --- End Metrics Visualization --- if ignored_files: with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): st.code("\n".join(ignored_files), language='text') model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready) analyze_button_label = "Analyze Codebase" if not model_ready: analyze_button_label = "Select Model First" elif analyze_button_disabled: analyze_button_label = "Select Analyses or Upload Valid Code" if analysis_button_placeholder.button( analyze_button_label, type="primary", disabled=analyze_button_disabled ): st.session_state.analysis_requested = True st.session_state.analysis_results = None st.session_state.error_message = None if not selected_analyses: st.warning("Please select analysis types.") elif file_count == 0: st.warning("No relevant code files found.") elif not model_ready: st.warning("Please select a Gemini model from the sidebar.") else: with results_placeholder: spinner_model_name = ( st.session_state.selected_model_name if not st.session_state.mock_api_call else "Mock Mode" ) spinner_msg = f"๐Ÿš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait." with st.spinner(spinner_msg): analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses) if analysis_prompt and included_files_in_prompt: results_json, error_msg = call_gemini_api(analysis_prompt) st.session_state.analysis_results = results_json st.session_state.error_message = error_msg elif not included_files_in_prompt: st.session_state.error_message = "Could not proceed: No files included." else: st.session_state.error_message = "Failed to generate analysis prompt." st.rerun() if st.session_state.analysis_requested: with results_placeholder: st.divider() if st.session_state.error_message: st.error(f"Analysis Failed: {st.session_state.error_message}") if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results: st.subheader("Raw AI Response") st.code(st.session_state.analysis_results["raw_response"], language='text') elif st.session_state.analysis_results: display_results(st.session_state.analysis_results, selected_analyses) else: st.info("Analysis initiated, but no results/errors stored.") elif not uploaded_file: results_placeholder.info("Upload a ZIP file to begin.") results_placeholder.divider() results_placeholder.markdown("_Assistant powered by Google Gemini._")