Spaces:

mgbam
/

AuditXCodeInsights

Sleeping

App Files Files Community

mgbam commited on Apr 7

Commit

94dc448

verified ·

1 Parent(s): 95d7700

Update app.py

Browse files

Files changed (1) hide show

app.py +360 -168

app.py CHANGED Viewed

@@ -3,23 +3,26 @@ import google.generativeai as genai
 import zipfile
 import io
 import json
-import os # Still needed for API key potentially, but not model names
 from pathlib import Path
 import time
 # --- Configuration ---
 # Model names are now discovered dynamically. Remove hardcoded names.
-MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
 RESULTS_PAGE_SIZE = 25
-AVAILABLE_ANALYSES = { # Keep analyses config
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
     "summarize_modules": "Summarize Complex Modules/Files",
-    "suggest_refactoring": "Suggest Refactoring Opportunities"
 }
-CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'} # Keep extensions
 # --- Session State Initialization ---
 # (Keep most session state, add one for the selected model)
@@ -30,17 +33,17 @@ if 'analysis_results' not in st.session_state:
 if 'error_message' not in st.session_state:
     st.session_state.error_message = None
 if 'analysis_requested' not in st.session_state:
-     st.session_state.analysis_requested = False
 if 'selected_model_name' not in st.session_state:
-     st.session_state.selected_model_name = None # Will hold the "models/..." name
 if 'available_models_dict' not in st.session_state:
-     st.session_state.available_models_dict = {} # Store display_name -> name mapping
 # --- Gemini API Setup & Model Discovery ---
-model = None # Global variable for the initialized model instance
 # --- NEW: Function to list available models ---
-@st.cache_data(ttl=3600) # Cache model list for an hour
 def get_available_models():
     """Lists models supporting 'generateContent' using the API key."""
     model_dict = {}
@@ -61,7 +64,7 @@ def get_available_models():
         return model_dict
     except Exception as e:
         st.error(f"🚨 Error listing available models: {e}")
-        return {} # Return empty on error
 def initialize_gemini_model():
     """Initializes the Gemini model based on the selected name."""
@@ -72,7 +75,7 @@ def initialize_gemini_model():
         try:
             if 'GEMINI_API_KEY' not in st.secrets:
                 st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
-                st.stop() # Stop if key missing for initialization
             # Configure API key (might be redundant if list_models worked, but safe)
             genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
             print(f"Initializing Gemini Model: {selected_name}")
@@ -82,170 +85,319 @@ def initialize_gemini_model():
             return True
         except Exception as e:
             st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
-            st.session_state.selected_model_name = None # Reset selection on error
             st.stop()
             return False
     elif st.session_state.mock_api_call:
-        return True # No init needed for mock
     elif model is not None and model.model_name == selected_name:
-         return True # Already initialized with the correct model
     elif model is not None and model.model_name != selected_name:
-         print(f"Model changed. Re-initializing...")
-         model = None # Reset model instance
-         return initialize_gemini_model() # Recurse to re-initialize with new name
     elif not selected_name and not st.session_state.mock_api_call:
-         # This case happens if no model is selected yet
-         return False # Cannot initialize without a selection
-    return False # Default case
 # --- Helper Functions ---
 # (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
 #  call_gemini_api, display_results - remain the same as the optimized version)
-# estimate_token_count
-def estimate_token_count(text): return len(text) // 3
-# process_zip_file_cached (no changes)
 @st.cache_data(max_entries=5)
 def process_zip_file_cached(file_id, file_size, file_content_bytes):
-    # ... (keep the exact same implementation as the previous optimized version) ...
-    code_files = {}; total_chars = 0; file_count = 0; ignored_files = []
-    status_placeholder = st.empty(); progress_bar = status_placeholder.progress(0)
     try:
         with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
-            members = zip_ref.infolist(); total_members = len(members)
             for i, member in enumerate(members):
-                if i % 10 == 0: progress_bar.progress(int((i / total_members) * 100))
-                if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename: continue
                 file_path = Path(member.filename)
                 if file_path.suffix.lower() in CODE_EXTENSIONS:
                     try:
                         with zip_ref.open(member) as file:
                             file_bytes = file.read()
-                            try: content = file_bytes.decode('utf-8')
                             except UnicodeDecodeError:
-                                try: content = file_bytes.decode('latin-1')
-                                except Exception as decode_err: ignored_files.append(f"{member.filename} (Decode Error: {decode_err})"); continue
-                            code_files[member.filename] = content; total_chars += len(content); file_count += 1
-                    except Exception as read_err: ignored_files.append(f"{member.filename} (Read Error: {read_err})")
                 else:
                     if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
-                       ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
-            progress_bar.progress(100); status_placeholder.empty()
-    except zipfile.BadZipFile: status_placeholder.empty(); st.error("🚨 Invalid ZIP."); return None, 0, 0, []
-    except Exception as e: status_placeholder.empty(); st.error(f"🚨 ZIP Error: {e}"); return None, 0, 0, []
-    if file_count == 0 and not ignored_files: st.warning("No code files found.")
-    elif file_count == 0 and ignored_files: st.warning("No code files found; some skipped.")
     return code_files, total_chars, file_count, ignored_files
-# construct_analysis_prompt (no changes)
 def construct_analysis_prompt(code_files_dict, requested_analyses):
-    # ... (keep the exact same implementation as the previous optimized version) ...
-    prompt_parts = ["Analyze the following codebase...\n\n"]; current_token_estimate = estimate_token_count(prompt_parts[0])
-    included_files = []; code_segments = []; prompt_status = st.empty()
-    if len(code_files_dict) > 50: prompt_status.info("Constructing prompt...")
     for filename, content in code_files_dict.items():
         segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
         segment_token_estimate = estimate_token_count(segment)
         if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
-            code_segments.append(segment); current_token_estimate += segment_token_estimate; included_files.append(filename)
-        else: st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens)."); break
     prompt_status.empty()
-    if not included_files: st.error("🚨 No code files included in prompt."); return None, []
     prompt_parts.append("".join(code_segments))
-    json_structure_description = "{\n"; structure_parts = []
-    if "generate_docs" in requested_analyses: structure_parts.append('    "documentation_suggestions": [...]') # Use shorthand for brevity
-    if "find_bugs" in requested_analyses: structure_parts.append('    "potential_bugs": [...]')
-    if "check_style" in requested_analyses: structure_parts.append('    "style_issues": [...]')
-    if "summarize_modules" in requested_analyses: structure_parts.append('    "module_summaries": [...]')
-    if "suggest_refactoring" in requested_analyses: structure_parts.append('    "refactoring_suggestions": [...]')
     json_structure_description += ",\n".join(structure_parts) + "\n}"
     prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
     prompt_parts.append(prompt_footer)
     full_prompt = "".join(prompt_parts)
     return full_prompt, included_files
-# call_gemini_api (no changes other than relying on the globally selected model)
 def call_gemini_api(prompt):
-    # ... (keep the exact same implementation as the previous optimized version,
-    #      it implicitly uses the 'model' variable initialized by initialize_gemini_model) ...
-    if not prompt: return None, "Prompt generation failed."
     # MOCK MODE
     if st.session_state.mock_api_call:
-        st.info(" MOCK MODE: Simulating API call..."); time.sleep(1)
-        mock_json_response = json.dumps({"documentation_suggestions": [],"potential_bugs": [],"style_issues": [],"module_summaries": [],"refactoring_suggestions": []})
-        st.success("Mock response generated."); return json.loads(mock_json_response), None
     # REAL API CALL
     else:
-        if not initialize_gemini_model(): return None, "Gemini Model Initialization Failed."
-        if model is None: return None, "Gemini model not selected or available." # Added check
         try:
             api_status = st.empty()
-            # Include model name in status message
             api_status.info(f"📡 Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
-            start_time = time.time(); response = model.generate_content(prompt, generation_config=genai.types.GenerationConfig(temperature=0.2), safety_settings=[{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]])
-            end_time = time.time(); api_status.success(f"✅ Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s."); time.sleep(1); api_status.empty()
-            try: # Keep JSON parsing logic
                 json_response_text = response.text.strip()
-                # ... (rest of JSON parsing identical to previous version) ...
-                if json_response_text.startswith("```json"): json_response_text = json_response_text[7:]
-                if json_response_text.startswith("```"): json_response_text = json_response_text[3:]
-                if json_response_text.endswith("```"): json_response_text = json_response_text[:-3]
-                json_start = json_response_text.find('{'); json_end = json_response_text.rfind('}') + 1
                 if json_start != -1 and json_end != -1 and json_end > json_start:
-                    final_json_text = json_response_text[json_start:json_end]; insights = json.loads(final_json_text); return insights, None
-                else: st.warning("⚠️ Could not find valid JSON object."); return {"raw_response": response.text}, "AI response did not contain clear JSON object."
-            # ... (keep error handling for JSONDecodeError, AttributeError etc. identical) ...
-            except json.JSONDecodeError as json_err: st.error(f"🚨 Error parsing JSON: {json_err}"); st.code(response.text, language='text'); return None, f"AI response not valid JSON: {json_err}"
-            except AttributeError: st.error(f"🚨 Unexpected API response structure (AttributeError)."); st.code(f"Response object: {response}", language='text'); return None, "Unexpected response structure (AttributeError)." # Simplified message
-            except Exception as e: st.error(f"🚨 Unexpected issue processing response: {e}"); try: st.code(f"Response object: {response}", language='text'); except: pass; return None, f"Unexpected response structure: {e}"
-        except Exception as e: # Keep API call error handling
-            api_status.empty(); st.error(f"🚨 API call error: {e}"); error_msg = f"API call failed: {e}"
-            # ... (keep specific error message logic identical) ...
             if hasattr(e, 'message'):
-                 if "429" in e.message: error_msg = "API Quota Exceeded or Rate Limit hit."
-                 elif "API key not valid" in e.message: error_msg = "Invalid Gemini API Key."
-                 elif "permission denied" in e.message.lower(): error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
-                 elif "blocked" in e.message.lower(): error_msg = "Content blocked due to safety settings."
-            elif "block_reason: SAFETY" in str(e): error_msg = "Content blocked due to safety settings."
             return None, error_msg
-# display_results (no changes needed from optimized version)
 def display_results(results_json, requested_analyses):
-    # ... (keep the exact same implementation as the previous optimized version with pagination) ...
     st.header("📊 Analysis Report")
-    if not isinstance(results_json, dict): st.error("Invalid results format."); st.json(results_json); return
-    if "raw_response" in results_json: st.subheader("Raw AI Response (JSON Parsing Failed)"); st.code(results_json["raw_response"], language='text'); return
-    display_config = { # Keep config same
-        "generate_docs": {"key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"}},
-        "find_bugs": {"key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"}},
-        "check_style": {"key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"}},
-        "summarize_modules": {"key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"}},
-        "suggest_refactoring": {"key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"}}
     }
     any_results_found = False
     for analysis_key in requested_analyses:
         if analysis_key in display_config:
-            config = display_config[analysis_key]; items = results_json.get(config["key"], [])
-            total_items = len(items); st.subheader(f"{config['title']} ({total_items} found)")
             if items:
-                any_results_found = True; state_key = f"visible_{analysis_key}"
-                if state_key not in st.session_state: st.session_state[state_key] = RESULTS_PAGE_SIZE
-                visible_count = st.session_state[state_key]; items_to_display = items[:visible_count]
-                for item in items_to_display: # Keep item display logic
-                    details = [f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file' else f"**{field_label}:** {item.get(field_key, 'N/A')}" for field_key, field_label in config["fields"].items() if item.get(field_key, 'N/A') != 'N/A']
                     st.markdown("- " + " - ".join(details))
-                    if 'suggestion' in item: st.code(item['suggestion'], language='text')
-                    elif 'description' in item: st.markdown(f"  > {item['description']}")
-                    elif 'summary' in item: st.markdown(f"  > {item['summary']}")
-                if total_items > visible_count: # Keep "Show More" logic
                     if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
-                        st.session_state[state_key] += RESULTS_PAGE_SIZE; st.rerun()
-            else: st.markdown("_No items found for this category._")
             st.divider()
-    if not any_results_found: st.info("No specific findings were identified.")
-    st.download_button(label="Download Full Report (JSON)", data=json.dumps(results_json, indent=4), file_name="code_audit_report.json", mime="application/json")
 # --- Streamlit App Main Interface ---
 st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
@@ -254,7 +406,11 @@ st.title("🤖 Codebase Audit & Documentation Assistant")
 # --- Sidebar ---
 with st.sidebar:
     st.header("⚙️ Analysis Controls")
-    st.session_state.mock_api_call = st.toggle("🧪 Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API.")
     st.divider()
     st.header("♊ Select Model")
@@ -268,17 +424,16 @@ with st.sidebar:
             # Try to find the index of the previously selected model
             current_model_display_name = None
             if st.session_state.selected_model_name:
-                 # Find display name matching the stored internal name
-                 for disp_name, internal_name in st.session_state.available_models_dict.items():
-                      if internal_name == st.session_state.selected_model_name:
-                           current_model_display_name = disp_name
-                           break
             try:
                 selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
             except ValueError:
-                 selected_index = 0 # Default to first if previous selection not found
             selected_display_name = st.selectbox(
                 "Choose Gemini model:",
@@ -291,42 +446,56 @@ with st.sidebar:
             st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
             st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
         elif 'GEMINI_API_KEY' in st.secrets:
-             st.warning("No compatible models found or error listing models. Check API Key permissions.")
-             st.session_state.selected_model_name = None # Ensure no model selected
         else:
-             st.warning("Add GEMINI_API_KEY to secrets to list models.")
-             st.session_state.selected_model_name = None
-    else: # Mock mode is active
         st.info("Mock API Mode ACTIVE")
-        st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode
     # --- End Dynamic Model Selection ---
     st.divider()
     st.header("🔎 Select Analyses")
-    selected_analyses = [key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}")]
     st.divider()
-    st.header("📄 How To Use") # Keep help text
-    st.info("1. Set API Key.\n2. Toggle Mock Mode if needed.\n3. Select Model (if not Mock).\n4. Select analyses.\n5. Upload ZIP.\n6. Click 'Analyze'.\n7. Review report.")
     st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
     st.divider()
     st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
 # Update title dynamically based on selected model
 if st.session_state.selected_model_name and not st.session_state.mock_api_call:
     st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
 elif st.session_state.mock_api_call:
-     st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
 else:
-     st.markdown("Upload codebase (`.zip`) for analysis.")
 # --- Main Content Area ---
-# (Keep the file uploader, button logic, and results display structure the same)
-uploaded_file = st.file_uploader("📁 Upload Codebase ZIP File", type=['zip'], key="file_uploader",
-                                 on_change=lambda: st.session_state.update(analysis_results=None, error_message=None, analysis_requested=False))
 analysis_button_placeholder = st.empty()
 results_placeholder = st.container()
@@ -334,50 +503,73 @@ if uploaded_file:
     st.success(f"✅ File '{uploaded_file.name}' uploaded.")
     uploaded_file_bytes = uploaded_file.getvalue()
     file_id = f"{uploaded_file.name}-{uploaded_file.size}"
-    code_files, total_chars, file_count, ignored_files = process_zip_file_cached(file_id, uploaded_file.size, uploaded_file_bytes)
     if code_files is not None:
         st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
         if ignored_files:
-             with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): st.code("\n".join(ignored_files), language='text')
         # Disable button if no model selected (and not in mock mode)
         model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
         analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
         analyze_button_label = "Analyze Codebase"
-        if not model_ready: analyze_button_label = "Select Model First"
-        elif analyze_button_disabled: analyze_button_label = "Select Analyses or Upload Valid Code"
-        if analysis_button_placeholder.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
-            st.session_state.analysis_requested = True; st.session_state.analysis_results = None; st.session_state.error_message = None
-            if not selected_analyses: st.warning("Please select analysis types.")
-            elif file_count == 0: st.warning("No relevant code files found.")
-            elif not model_ready: st.warning("Please select a Gemini model from the sidebar.") # Should be disabled, but safety check
             else:
                 with results_placeholder:
-                     spinner_model_name = st.session_state.selected_model_name if not st.session_state.mock_api_call else "Mock Mode"
-                     spinner_msg = f"🚀 Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
-                     with st.spinner(spinner_msg):
                         analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
                         if analysis_prompt and included_files_in_prompt:
                             results_json, error_msg = call_gemini_api(analysis_prompt)
-                            st.session_state.analysis_results = results_json; st.session_state.error_message = error_msg
-                        elif not included_files_in_prompt: st.session_state.error_message = "Could not proceed: No files included."
-                        else: st.session_state.error_message = "Failed to generate analysis prompt."
                 st.rerun()
 # Display results (Keep the same logic)
 if st.session_state.analysis_requested:
-     with results_placeholder:
-         st.divider()
-         if st.session_state.error_message:
-             st.error(f"Analysis Failed: {st.session_state.error_message}")
-             if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
-                 st.subheader("Raw AI Response"); st.code(st.session_state.analysis_results["raw_response"], language='text')
-         elif st.session_state.analysis_results:
-             # Pass selected_analyses to display_results in case it's needed later
-             display_results(st.session_state.analysis_results, selected_analyses)
-         else: st.info("Analysis initiated, but no results/errors stored.")
-elif not uploaded_file: results_placeholder.info("Upload a ZIP file to begin.")
 results_placeholder.divider()
-results_placeholder.markdown("_Assistant powered by Google Gemini._")

 import zipfile
 import io
 import json
+import os  # Still needed for API key potentially, but not model names
 from pathlib import Path
 import time
 # --- Configuration ---
 # Model names are now discovered dynamically. Remove hardcoded names.
+MAX_PROMPT_TOKENS_ESTIMATE = 800000  # Keep this estimate
 RESULTS_PAGE_SIZE = 25
+AVAILABLE_ANALYSES = {  # Keep analyses config
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
     "summarize_modules": "Summarize Complex Modules/Files",
+    "suggest_refactoring": "Suggest Refactoring Opportunities",
 }
+CODE_EXTENSIONS = {
+    '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb',
+    '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
+}  # Keep extensions
 # --- Session State Initialization ---
 # (Keep most session state, add one for the selected model)
 if 'error_message' not in st.session_state:
     st.session_state.error_message = None
 if 'analysis_requested' not in st.session_state:
+    st.session_state.analysis_requested = False
 if 'selected_model_name' not in st.session_state:
+    st.session_state.selected_model_name = None  # Will hold the "models/..." name
 if 'available_models_dict' not in st.session_state:
+    st.session_state.available_models_dict = {}  # Store display_name -> name mapping
 # --- Gemini API Setup & Model Discovery ---
+model = None  # Global variable for the initialized model instance
 # --- NEW: Function to list available models ---
+@st.cache_data(ttl=3600)  # Cache model list for an hour
 def get_available_models():
     """Lists models supporting 'generateContent' using the API key."""
     model_dict = {}
         return model_dict
     except Exception as e:
         st.error(f"🚨 Error listing available models: {e}")
+        return {}  # Return empty on error
 def initialize_gemini_model():
     """Initializes the Gemini model based on the selected name."""
         try:
             if 'GEMINI_API_KEY' not in st.secrets:
                 st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
+                st.stop()  # Stop if key missing for initialization
             # Configure API key (might be redundant if list_models worked, but safe)
             genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
             print(f"Initializing Gemini Model: {selected_name}")
             return True
         except Exception as e:
             st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
+            st.session_state.selected_model_name = None  # Reset selection on error
             st.stop()
             return False
     elif st.session_state.mock_api_call:
+        return True  # No init needed for mock mode
     elif model is not None and model.model_name == selected_name:
+        return True  # Already initialized with the correct model
     elif model is not None and model.model_name != selected_name:
+        print("Model changed. Re-initializing...")
+        model = None  # Reset model instance
+        return initialize_gemini_model()  # Recurse to re-initialize with new name
     elif not selected_name and not st.session_state.mock_api_call:
+        # This case happens if no model is selected yet
+        return False  # Cannot initialize without a selection
+    return False  # Default case
 # --- Helper Functions ---
 # (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
 #  call_gemini_api, display_results - remain the same as the optimized version)
+def estimate_token_count(text):
+    """Estimates the number of tokens based on text length."""
+    return len(text) // 3
 @st.cache_data(max_entries=5)
 def process_zip_file_cached(file_id, file_size, file_content_bytes):
+    """
+    Processes a ZIP file and extracts code files.
+    Returns a tuple of (code_files dict, total_chars, file_count, ignored_files list).
+    """
+    code_files = {}
+    total_chars = 0
+    file_count = 0
+    ignored_files = []
+    status_placeholder = st.empty()
+    progress_bar = status_placeholder.progress(0)
     try:
         with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
+            members = zip_ref.infolist()
+            total_members = len(members)
             for i, member in enumerate(members):
+                if i % 10 == 0:
+                    progress_bar.progress(int((i / total_members) * 100))
+                if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename:
+                    continue
                 file_path = Path(member.filename)
                 if file_path.suffix.lower() in CODE_EXTENSIONS:
                     try:
                         with zip_ref.open(member) as file:
                             file_bytes = file.read()
+                            try:
+                                content = file_bytes.decode('utf-8')
                             except UnicodeDecodeError:
+                                try:
+                                    content = file_bytes.decode('latin-1')
+                                except Exception as decode_err:
+                                    ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
+                                    continue
+                            code_files[member.filename] = content
+                            total_chars += len(content)
+                            file_count += 1
+                    except Exception as read_err:
+                        ignored_files.append(f"{member.filename} (Read Error: {read_err})")
                 else:
                     if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
+                        ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
+            progress_bar.progress(100)
+            status_placeholder.empty()
+    except zipfile.BadZipFile:
+        status_placeholder.empty()
+        st.error("🚨 Invalid ZIP.")
+        return None, 0, 0, []
+    except Exception as e:
+        status_placeholder.empty()
+        st.error(f"🚨 ZIP Error: {e}")
+        return None, 0, 0, []
+    if file_count == 0:
+        if not ignored_files:
+            st.warning("No code files found.")
+        else:
+            st.warning("No code files found; some skipped.")
     return code_files, total_chars, file_count, ignored_files
 def construct_analysis_prompt(code_files_dict, requested_analyses):
+    """
+    Constructs the prompt for analysis by including code files and JSON structure for expected output.
+    Returns the full prompt and a list of included files.
+    """
+    prompt_parts = ["Analyze the following codebase...\n\n"]
+    current_token_estimate = estimate_token_count(prompt_parts[0])
+    included_files = []
+    code_segments = []
+    prompt_status = st.empty()
+    if len(code_files_dict) > 50:
+        prompt_status.info("Constructing prompt...")
     for filename, content in code_files_dict.items():
         segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
         segment_token_estimate = estimate_token_count(segment)
         if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
+            code_segments.append(segment)
+            current_token_estimate += segment_token_estimate
+            included_files.append(filename)
+        else:
+            st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
+            break
     prompt_status.empty()
+    if not included_files:
+        st.error("🚨 No code files included in prompt.")
+        return None, []
     prompt_parts.append("".join(code_segments))
+    json_structure_description = "{\n"
+    structure_parts = []
+    if "generate_docs" in requested_analyses:
+        structure_parts.append('    "documentation_suggestions": [...]')
+    if "find_bugs" in requested_analyses:
+        structure_parts.append('    "potential_bugs": [...]')
+    if "check_style" in requested_analyses:
+        structure_parts.append('    "style_issues": [...]')
+    if "summarize_modules" in requested_analyses:
+        structure_parts.append('    "module_summaries": [...]')
+    if "suggest_refactoring" in requested_analyses:
+        structure_parts.append('    "refactoring_suggestions": [...]')
     json_structure_description += ",\n".join(structure_parts) + "\n}"
     prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
     prompt_parts.append(prompt_footer)
     full_prompt = "".join(prompt_parts)
     return full_prompt, included_files
 def call_gemini_api(prompt):
+    """
+    Calls the Gemini API using the provided prompt.
+    Returns the parsed JSON insights or an error message.
+    """
+    if not prompt:
+        return None, "Prompt generation failed."
     # MOCK MODE
     if st.session_state.mock_api_call:
+        st.info(" MOCK MODE: Simulating API call...")
+        time.sleep(1)
+        mock_json_response = json.dumps({
+            "documentation_suggestions": [],
+            "potential_bugs": [],
+            "style_issues": [],
+            "module_summaries": [],
+            "refactoring_suggestions": []
+        })
+        st.success("Mock response generated.")
+        return json.loads(mock_json_response), None
     # REAL API CALL
     else:
+        if not initialize_gemini_model():
+            return None, "Gemini Model Initialization Failed."
+        if model is None:
+            return None, "Gemini model not selected or available."  # Added check
         try:
             api_status = st.empty()
             api_status.info(f"📡 Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
+            start_time = time.time()
+            response = model.generate_content(
+                prompt,
+                generation_config=genai.types.GenerationConfig(temperature=0.2),
+                safety_settings=[
+                    {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
+                    for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
+                              "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
+                ]
+            )
+            end_time = time.time()
+            api_status.success(f"✅ Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.")
+            time.sleep(1)
+            api_status.empty()
+            try:
                 json_response_text = response.text.strip()
+                # Remove markdown code fences if present
+                if json_response_text.startswith("```json"):
+                    json_response_text = json_response_text[7:]
+                if json_response_text.startswith("```"):
+                    json_response_text = json_response_text[3:]
+                if json_response_text.endswith("```"):
+                    json_response_text = json_response_text[:-3]
+                json_start = json_response_text.find('{')
+                json_end = json_response_text.rfind('}') + 1
                 if json_start != -1 and json_end != -1 and json_end > json_start:
+                    final_json_text = json_response_text[json_start:json_end]
+                    insights = json.loads(final_json_text)
+                    return insights, None
+                else:
+                    st.warning("⚠️ Could not find valid JSON object.")
+                    return {"raw_response": response.text}, "AI response did not contain clear JSON object."
+            except json.JSONDecodeError as json_err:
+                st.error(f"🚨 Error parsing JSON: {json_err}")
+                st.code(response.text, language='text')
+                return None, f"AI response not valid JSON: {json_err}"
+            except AttributeError:
+                st.error("🚨 Unexpected API response structure (AttributeError).")
+                st.code(f"Response object: {response}", language='text')
+                return None, "Unexpected response structure (AttributeError)."
+            except Exception as e:
+                st.error(f"🚨 Unexpected issue processing response: {e}")
+                try:
+                    st.code(f"Response object: {response}", language='text')
+                except Exception:
+                    pass
+                return None, f"Unexpected response structure: {e}"
+        except Exception as e:
+            api_status.empty()
+            st.error(f"🚨 API call error: {e}")
+            error_msg = f"API call failed: {e}"
             if hasattr(e, 'message'):
+                if "429" in e.message:
+                    error_msg = "API Quota Exceeded or Rate Limit hit."
+                elif "API key not valid" in e.message:
+                    error_msg = "Invalid Gemini API Key."
+                elif "permission denied" in e.message.lower():
+                    error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
+                elif "blocked" in e.message.lower():
+                    error_msg = "Content blocked due to safety settings."
+            elif "block_reason: SAFETY" in str(e):
+                error_msg = "Content blocked due to safety settings."
             return None, error_msg
 def display_results(results_json, requested_analyses):
+    """
+    Displays the analysis results with pagination and allows JSON download.
+    """
     st.header("📊 Analysis Report")
+    if not isinstance(results_json, dict):
+        st.error("Invalid results format.")
+        st.json(results_json)
+        return
+    if "raw_response" in results_json:
+        st.subheader("Raw AI Response (JSON Parsing Failed)")
+        st.code(results_json["raw_response"], language='text')
+        return
+    display_config = {
+        "generate_docs": {
+            "key": "documentation_suggestions",
+            "title": AVAILABLE_ANALYSES["generate_docs"],
+            "fields": {"file": "File", "line": "Line"}
+        },
+        "find_bugs": {
+            "key": "potential_bugs",
+            "title": AVAILABLE_ANALYSES["find_bugs"],
+            "fields": {"file": "File", "line": "Line", "severity": "Severity"}
+        },
+        "check_style": {
+            "key": "style_issues",
+            "title": AVAILABLE_ANALYSES["check_style"],
+            "fields": {"file": "File", "line": "Line"}
+        },
+        "summarize_modules": {
+            "key": "module_summaries",
+            "title": AVAILABLE_ANALYSES["summarize_modules"],
+            "fields": {"file": "File"}
+        },
+        "suggest_refactoring": {
+            "key": "refactoring_suggestions",
+            "title": AVAILABLE_ANALYSES["suggest_refactoring"],
+            "fields": {"file": "File", "line": "Line", "area": "Area"}
+        },
     }
     any_results_found = False
     for analysis_key in requested_analyses:
         if analysis_key in display_config:
+            config = display_config[analysis_key]
+            items = results_json.get(config["key"], [])
+            total_items = len(items)
+            st.subheader(f"{config['title']} ({total_items} found)")
             if items:
+                any_results_found = True
+                state_key = f"visible_{analysis_key}"
+                if state_key not in st.session_state:
+                    st.session_state[state_key] = RESULTS_PAGE_SIZE
+                visible_count = st.session_state[state_key]
+                items_to_display = items[:visible_count]
+                for item in items_to_display:
+                    details = [
+                        f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file'
+                        else f"**{field_label}:** {item.get(field_key, 'N/A')}"
+                        for field_key, field_label in config["fields"].items()
+                        if item.get(field_key, 'N/A') != 'N/A'
+                    ]
                     st.markdown("- " + " - ".join(details))
+                    if 'suggestion' in item:
+                        st.code(item['suggestion'], language='text')
+                    elif 'description' in item:
+                        st.markdown(f"  > {item['description']}")
+                    elif 'summary' in item:
+                        st.markdown(f"  > {item['summary']}")
+                if total_items > visible_count:
                     if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
+                        st.session_state[state_key] += RESULTS_PAGE_SIZE
+                        st.rerun()
+            else:
+                st.markdown("_No items found for this category._")
             st.divider()
+    if not any_results_found:
+        st.info("No specific findings were identified.")
+    st.download_button(
+        label="Download Full Report (JSON)",
+        data=json.dumps(results_json, indent=4),
+        file_name="code_audit_report.json",
+        mime="application/json"
+    )
 # --- Streamlit App Main Interface ---
 st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
 # --- Sidebar ---
 with st.sidebar:
     st.header("⚙️ Analysis Controls")
+    st.session_state.mock_api_call = st.toggle(
+        "🧪 Enable Mock API Mode",
+        value=st.session_state.mock_api_call,
+        help="Use fake data instead of calling Gemini API."
+    )
     st.divider()
     st.header("♊ Select Model")
             # Try to find the index of the previously selected model
             current_model_display_name = None
             if st.session_state.selected_model_name:
+                # Find display name matching the stored internal name
+                for disp_name, internal_name in st.session_state.available_models_dict.items():
+                    if internal_name == st.session_state.selected_model_name:
+                        current_model_display_name = disp_name
+                        break
             try:
                 selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
             except ValueError:
+                selected_index = 0  # Default to first if previous selection not found
             selected_display_name = st.selectbox(
                 "Choose Gemini model:",
             st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
             st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
         elif 'GEMINI_API_KEY' in st.secrets:
+            st.warning("No compatible models found or error listing models. Check API Key permissions.")
+            st.session_state.selected_model_name = None  # Ensure no model selected
         else:
+            st.warning("Add GEMINI_API_KEY to secrets to list models.")
+            st.session_state.selected_model_name = None
+    else:  # Mock mode is active
         st.info("Mock API Mode ACTIVE")
+        st.session_state.selected_model_name = "mock_model"  # Use a placeholder name for mock mode
     # --- End Dynamic Model Selection ---
     st.divider()
     st.header("🔎 Select Analyses")
+    selected_analyses = [
+        key for key, name in AVAILABLE_ANALYSES.items()
+        if st.checkbox(name, value=True, key=f"cb_{key}")
+    ]
     st.divider()
+    st.header("📄 How To Use")
+    st.info(
+        "1. Set API Key.\n"
+        "2. Toggle Mock Mode if needed.\n"
+        "3. Select Model (if not Mock).\n"
+        "4. Select analyses.\n"
+        "5. Upload ZIP.\n"
+        "6. Click 'Analyze'.\n"
+        "7. Review report."
+    )
     st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
     st.divider()
     st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
 # Update title dynamically based on selected model
 if st.session_state.selected_model_name and not st.session_state.mock_api_call:
     st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
 elif st.session_state.mock_api_call:
+    st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
 else:
+    st.markdown("Upload codebase (`.zip`) for analysis.")
 # --- Main Content Area ---
+uploaded_file = st.file_uploader(
+    "📁 Upload Codebase ZIP File",
+    type=['zip'],
+    key="file_uploader",
+    on_change=lambda: st.session_state.update(
+        analysis_results=None,
+        error_message=None,
+        analysis_requested=False
+    )
+)
 analysis_button_placeholder = st.empty()
 results_placeholder = st.container()
     st.success(f"✅ File '{uploaded_file.name}' uploaded.")
     uploaded_file_bytes = uploaded_file.getvalue()
     file_id = f"{uploaded_file.name}-{uploaded_file.size}"
+    code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
+        file_id, uploaded_file.size, uploaded_file_bytes
+    )
     if code_files is not None:
         st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
         if ignored_files:
+            with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
+                st.code("\n".join(ignored_files), language='text')
         # Disable button if no model selected (and not in mock mode)
         model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
         analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
         analyze_button_label = "Analyze Codebase"
+        if not model_ready:
+            analyze_button_label = "Select Model First"
+        elif analyze_button_disabled:
+            analyze_button_label = "Select Analyses or Upload Valid Code"
+        if analysis_button_placeholder.button(
+            analyze_button_label,
+            type="primary",
+            disabled=analyze_button_disabled
+        ):
+            st.session_state.analysis_requested = True
+            st.session_state.analysis_results = None
+            st.session_state.error_message = None
+            if not selected_analyses:
+                st.warning("Please select analysis types.")
+            elif file_count == 0:
+                st.warning("No relevant code files found.")
+            elif not model_ready:
+                st.warning("Please select a Gemini model from the sidebar.")
             else:
                 with results_placeholder:
+                    spinner_model_name = (
+                        st.session_state.selected_model_name
+                        if not st.session_state.mock_api_call
+                        else "Mock Mode"
+                    )
+                    spinner_msg = f"🚀 Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
+                    with st.spinner(spinner_msg):
                         analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
                         if analysis_prompt and included_files_in_prompt:
                             results_json, error_msg = call_gemini_api(analysis_prompt)
+                            st.session_state.analysis_results = results_json
+                            st.session_state.error_message = error_msg
+                        elif not included_files_in_prompt:
+                            st.session_state.error_message = "Could not proceed: No files included."
+                        else:
+                            st.session_state.error_message = "Failed to generate analysis prompt."
                 st.rerun()
 # Display results (Keep the same logic)
 if st.session_state.analysis_requested:
+    with results_placeholder:
+        st.divider()
+        if st.session_state.error_message:
+            st.error(f"Analysis Failed: {st.session_state.error_message}")
+            if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
+                st.subheader("Raw AI Response")
+                st.code(st.session_state.analysis_results["raw_response"], language='text')
+        elif st.session_state.analysis_results:
+            display_results(st.session_state.analysis_results, selected_analyses)
+        else:
+            st.info("Analysis initiated, but no results/errors stored.")
+elif not uploaded_file:
+    results_placeholder.info("Upload a ZIP file to begin.")
 results_placeholder.divider()
+results_placeholder.markdown("_Assistant powered by Google Gemini._")