Spaces:

mgbam
/

AuditXCodeInsights

Sleeping

App Files Files Community

mgbam commited on Apr 6

Commit

95b3eeb

verified ·

1 Parent(s): 5477235

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -201

app.py CHANGED Viewed

@@ -5,34 +5,38 @@ import io
 import json
 import os
 from pathlib import Path
-import time  # Added for simulating mock delay
 # --- Configuration ---
 GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
-MAX_PROMPT_TOKENS_ESTIMATE = 800000  # Adjust as needed
 AVAILABLE_ANALYSES = {
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
     "summarize_modules": "Summarize Complex Modules/Files",
     "suggest_refactoring": "Suggest Refactoring Opportunities"
 }
-CODE_EXTENSIONS = {
-    '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go',
-    '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
-}
 # --- Session State Initialization ---
 if 'mock_api_call' not in st.session_state:
-    st.session_state.mock_api_call = False  # Default to using the real API
 # --- Gemini API Setup ---
 model = None
 def initialize_gemini_model():
-    """Initializes the Gemini model if not in mock mode."""
     global model
     if model is None and not st.session_state.mock_api_call:
         try:
@@ -48,38 +52,39 @@ def initialize_gemini_model():
             st.stop()
             return False
     elif st.session_state.mock_api_call:
-        print("Running in Mock Mode. Skipping Gemini initialization.")
         return True  # Allow proceeding in mock mode
     elif model is not None:
-        print("Gemini Model already initialized.")
         return True
     return False
 # --- Helper Functions ---
 def estimate_token_count(text):
-    """Roughly estimate token count (assumes ~3-4 characters per token)."""
     return len(text) // 3
-def process_zip_file(uploaded_file):
-    """
-    Extracts code files and their content from the uploaded ZIP file.
-    Returns:
-        code_files (dict): Mapping of file paths to content.
-        total_chars (int): Total number of characters in included files.
-        file_count (int): Count of processed code files.
-        ignored_files (list): List of files skipped or not processed.
-    """
     code_files = {}
     total_chars = 0
     file_count = 0
     ignored_files = []
     try:
-        with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref:
-            for member in zip_ref.infolist():
-                # Skip directories, hidden files, and files with '__' in the name
                 if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
                     continue
@@ -87,11 +92,12 @@ def process_zip_file(uploaded_file):
                 if file_path.suffix.lower() in CODE_EXTENSIONS:
                     try:
                         with zip_ref.open(member) as file:
                             try:
-                                content = file.read().decode('utf-8')
                             except UnicodeDecodeError:
                                 try:
-                                    content = file.read().decode('latin-1')
                                 except Exception as decode_err:
                                     ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
                                     continue
@@ -102,31 +108,42 @@ def process_zip_file(uploaded_file):
                     except Exception as read_err:
                         ignored_files.append(f"{member.filename} (Read Error: {read_err})")
                 else:
-                    # Only add to ignored if it's not explicitly ignored by path rules above
                     if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
                         ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
     except zipfile.BadZipFile:
         st.error("🚨 Invalid or corrupted ZIP file.")
         return None, 0, 0, []
     except Exception as e:
         st.error(f"🚨 Error processing ZIP file: {e}")
         return None, 0, 0, []
     return code_files, total_chars, file_count, ignored_files
 def construct_analysis_prompt(code_files_dict, requested_analyses):
-    """
-    Constructs the prompt for Gemini, including code content and a JSON structure request.
-    Returns:
-        full_prompt (str): The complete prompt.
-        included_files (list): List of file names included in the prompt.
-    """
-    prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
-    current_token_estimate = estimate_token_count(prompt_content)
     included_files = []
-    concatenated_code = ""
     for filename, content in code_files_dict.items():
         file_marker = f"--- START FILE: {filename} ---\n"
@@ -136,20 +153,23 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
         segment_token_estimate = estimate_token_count(segment)
         if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
-            concatenated_code += segment
             current_token_estimate += segment_token_estimate
             included_files.append(filename)
         else:
-            st.warning(f"⚠️ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).")
             break
     if not included_files:
         st.error("🚨 No code files could be included within the estimated token limit.")
         return None, []
-    prompt_content += concatenated_code
-    # Build the expected JSON structure dynamically based on the selected analyses
     json_structure_description = "{\n"
     structure_parts = []
     if "generate_docs" in requested_analyses:
@@ -162,6 +182,7 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
         structure_parts.append('    "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
     if "suggest_refactoring" in requested_analyses:
         structure_parts.append('    "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
     json_structure_description += ",\n".join(structure_parts)
     json_structure_description += "\n}"
@@ -176,40 +197,32 @@ Respond ONLY with a single, valid JSON object adhering strictly to the following
 **JSON Output Only:**
 """
-    full_prompt = prompt_content + prompt_footer
     return full_prompt, included_files
 def call_gemini_api(prompt):
-    """
-    Calls the Gemini API (or simulates it in mock mode) with the provided prompt.
-    Returns:
-        insights (dict): The parsed JSON response from the API.
-        error_message (str): An error message if something went wrong.
-    """
     if not prompt:
         return None, "Prompt generation failed."
-    # --- MOCK MODE LOGIC ---
     if st.session_state.mock_api_call:
-        st.info(" MOCK MODE: Simulating API call...")
-        time.sleep(2)  # Simulate network/processing delay
-        # Simulated successful response
         mock_json_response = json.dumps({
             "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n    \"\"\"Processes the input data using mock logic.\"\"\""}],
             "potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
-            "style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}],
-            "module_summaries": [
-                {"file": "mock/core.py", "summary": "This file contains the core mock processing logic."},
-                {"file": "mock/utils.py", "summary": "Utility functions for mocking."}
-            ],
-            "refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}]
         })
-        st.success("Mock response generated successfully.")
         return json.loads(mock_json_response), None
-    # --- REAL API CALL LOGIC ---
     else:
         if not initialize_gemini_model():
             return None, "Gemini Model Initialization Failed."
@@ -217,7 +230,10 @@ def call_gemini_api(prompt):
             return None, "Gemini model not available."
         try:
-            st.write(f"📡 Sending request to {GEMINI_MODEL_NAME}...")
             response = model.generate_content(
                 prompt,
                 generation_config=genai.types.GenerationConfig(temperature=0.2),
@@ -228,37 +244,34 @@ def call_gemini_api(prompt):
                     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                 ]
             )
-            st.write("✅ Response received from AI.")
             try:
                 json_response_text = response.text.strip()
-                # Remove potential markdown code block fences
                 if json_response_text.startswith("```json"):
                     json_response_text = json_response_text[7:]
                 if json_response_text.startswith("```"):
                     json_response_text = json_response_text[3:]
                 if json_response_text.endswith("```"):
                     json_response_text = json_response_text[:-3]
-                # Extract JSON object boundaries
                 json_start = json_response_text.find('{')
                 json_end = json_response_text.rfind('}') + 1
                 if json_start != -1 and json_end != -1 and json_end > json_start:
                     final_json_text = json_response_text[json_start:json_end]
                     insights = json.loads(final_json_text)
                     return insights, None
                 else:
-                    st.warning("⚠️ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.")
                     return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
             except json.JSONDecodeError as json_err:
                 st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
-                st.error("Raw AI Response:")
                 st.code(response.text, language='text')
                 return None, f"AI response was not valid JSON: {json_err}"
             except AttributeError:
-                st.error("🚨 Unexpected API response structure.")
                 st.code(f"Response object: {response}", language='text')
                 try:
                     block_reason = response.prompt_feedback.block_reason
@@ -266,7 +279,7 @@ def call_gemini_api(prompt):
                         return None, f"Content blocked by API. Reason: {block_reason}"
                 except Exception:
                     pass
-                return None, "Unexpected response structure from API."
             except Exception as e:
                 st.error(f"🚨 Unexpected issue processing AI response: {e}")
                 try:
@@ -274,95 +287,82 @@ def call_gemini_api(prompt):
                 except Exception:
                     pass
                 return None, f"Unexpected response structure: {e}"
         except Exception as e:
             st.error(f"🚨 An error occurred during API call: {e}")
             error_msg = f"API call failed: {e}"
             if hasattr(e, 'message'):
                 if "429" in e.message:
-                    error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard."
                 elif "API key not valid" in e.message:
-                    error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`."
                 elif "blocked" in e.message.lower():
-                    error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
             elif "block_reason: SAFETY" in str(e):
-                error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
             return None, error_msg
 def display_results(results_json, requested_analyses):
-    """Renders the analysis results in the Streamlit interface."""
     st.header("📊 Analysis Report")
     if not isinstance(results_json, dict):
         st.error("Invalid results format received.")
         st.json(results_json)
         return
     if "raw_response" in results_json:
         st.subheader("Raw AI Response (JSON Parsing Failed)")
         st.code(results_json["raw_response"], language='text')
         return
-    def display_list_items(items, fields):
-        if items:
-            for item in items:
-                details = []
-                for field_key, field_label in fields.items():
-                    value = item.get(field_key, 'N/A')
-                    if value != 'N/A':
-                        details.append(f"**{field_label}:** {value}")
-                st.markdown("- " + " - ".join(details))
-                # Display multi-line outputs when applicable
-                if 'suggestion' in item:
-                    st.code(item['suggestion'], language='text')
-                elif 'description' in item:
-                    st.markdown(f"  > {item['description']}")
-                elif 'summary' in item:
-                    st.markdown(f"  > {item['summary']}")
-        else:
-            st.markdown("_No items found for this category._")
-        st.divider()
     display_config = {
-        "generate_docs": {
-            "key": "documentation_suggestions",
-            "title": AVAILABLE_ANALYSES["generate_docs"],
-            "fields": {"file": "File", "line": "Line"}
-        },
-        "find_bugs": {
-            "key": "potential_bugs",
-            "title": AVAILABLE_ANALYSES["find_bugs"],
-            "fields": {"file": "File", "line": "Line", "severity": "Severity"}
-        },
-        "check_style": {
-            "key": "style_issues",
-            "title": AVAILABLE_ANALYSES["check_style"],
-            "fields": {"file": "File", "line": "Line"}
-        },
-        "summarize_modules": {
-            "key": "module_summaries",
-            "title": AVAILABLE_ANALYSES["summarize_modules"],
-            "fields": {"file": "File"}
-        },
-        "suggest_refactoring": {
-            "key": "refactoring_suggestions",
-            "title": AVAILABLE_ANALYSES["suggest_refactoring"],
-            "fields": {"file": "File", "line": "Line", "area": "Area"}
-        }
     }
-    any_results = False
     for analysis_key in requested_analyses:
         if analysis_key in display_config:
             config = display_config[analysis_key]
-            st.subheader(config["title"])
             items = results_json.get(config["key"], [])
-            display_list_items(items, config["fields"])
             if items:
-                any_results = True
-    if not any_results:
         st.info("No specific findings were identified in the analysis based on your selections.")
     st.download_button(
@@ -374,54 +374,36 @@ def display_results(results_json, requested_analyses):
 # --- Streamlit App Main Interface ---
 st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
-st.title("🤖 Codebase Audit & Documentation Assistant")
-st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
-# Sidebar controls
 with st.sidebar:
     st.header("⚙️ Analysis Controls")
-    st.session_state.mock_api_call = st.toggle(
-        "🧪 Enable Mock API Mode (for Testing)",
-        value=st.session_state.mock_api_call,
-        help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing."
-    )
-    if st.session_state.mock_api_call:
-        st.info("Mock API Mode ACTIVE")
-    else:
-        st.info("Using REAL Gemini API")
     st.divider()
     st.header("🔎 Select Analyses")
-    selected_analyses = []
-    for key, name in AVAILABLE_ANALYSES.items():
-        if st.checkbox(name, value=True, key=f"cb_{key}"):
-            selected_analyses.append(key)
     st.divider()
     st.header("📄 How To Use")
-    st.info(
-        "1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n"
-        "2. Toggle Mock Mode if needed.\n"
-        "3. Select desired analyses.\n"
-        "4. Create a **ZIP archive** of your codebase.\n"
-        "5. Upload the `.zip` file.\n"
-        "6. Click 'Analyze Codebase'.\n"
-        "7. Review the report."
-    )
-    st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
     st.divider()
-    st.warning("⚠️ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.")
-# Main content area
-uploaded_file = st.file_uploader("📁 Upload Codebase ZIP File", type=['zip'], key="file_uploader")
-analysis_triggered = False
-results_cache = None  # To store results briefly
 if uploaded_file:
     st.success(f"✅ File '{uploaded_file.name}' uploaded.")
-    with st.spinner("Inspecting ZIP file..."):
-        code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file)
     if code_files is not None:
         st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
@@ -431,42 +413,44 @@ if uploaded_file:
         analyze_button_disabled = (not selected_analyses or file_count == 0)
         analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
-        if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
-            analysis_triggered = True
             if not selected_analyses:
-                st.warning("Please select at least one analysis type from the sidebar.")
             elif file_count == 0:
-                st.warning("No relevant code files found in the ZIP archive to analyze.")
             else:
-                st.divider()
-                with st.spinner(f"🚀 Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."):
-                    analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
-                    if analysis_prompt and included_files_in_prompt:
-                        st.write(f"Analyzing {len(included_files_in_prompt)} files...")
-                        results_json, error_message = call_gemini_api(analysis_prompt)
-                        results_cache = (results_json, error_message)
-                    elif not included_files_in_prompt:
-                        results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).")
-                    else:
-                        results_cache = (None, "Failed to generate analysis prompt.")
-    else:
-        # Error during ZIP processing (error already displayed)
-        pass
-if analysis_triggered and results_cache:
-    results_json, error_message = results_cache
-    st.divider()
-    if error_message:
-        st.error(f"Analysis Failed: {error_message}")
-        if results_json and isinstance(results_json, dict) and "raw_response" in results_json:
-            st.subheader("Raw AI Response")
-            st.code(results_json["raw_response"], language='text')
-    elif results_json:
-        display_results(results_json, selected_analyses)
-    else:
-        st.error("Analysis did not return results or an unknown error occurred.")
 elif not uploaded_file:
-    st.info("Upload a ZIP file containing your source code to begin.")
-st.divider()
-st.markdown("_Assistant powered by Google Gemini._")

 import json
 import os
 from pathlib import Path
+import time
 # --- Configuration ---
 GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
+MAX_PROMPT_TOKENS_ESTIMATE = 800000
+RESULTS_PAGE_SIZE = 25  # Number of items to show per category initially
 AVAILABLE_ANALYSES = {
+    # ... (keep the same)
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
     "summarize_modules": "Summarize Complex Modules/Files",
     "suggest_refactoring": "Suggest Refactoring Opportunities"
 }
+CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'}
 # --- Session State Initialization ---
 if 'mock_api_call' not in st.session_state:
+    st.session_state.mock_api_call = False
+if 'analysis_results' not in st.session_state:
+    st.session_state.analysis_results = None  # Store results here
+if 'error_message' not in st.session_state:
+    st.session_state.error_message = None
+if 'analysis_requested' not in st.session_state:
+    st.session_state.analysis_requested = False  # Flag to know when analysis is done
 # --- Gemini API Setup ---
 model = None
 def initialize_gemini_model():
+    """Initializes the Gemini API model unless running in mock mode."""
     global model
     if model is None and not st.session_state.mock_api_call:
         try:
             st.stop()
             return False
     elif st.session_state.mock_api_call:
+        # Running in Mock Mode. Skipping Gemini initialization.
         return True  # Allow proceeding in mock mode
     elif model is not None:
+        # Gemini Model already initialized.
         return True
     return False
 # --- Helper Functions ---
 def estimate_token_count(text):
+    """Roughly estimate token count (assuming ~3 characters per token)."""
     return len(text) // 3
+# --- OPTIMIZATION: Cache ZIP processing ---
+@st.cache_data(max_entries=5)  # Cache results for recent uploads
+def process_zip_file_cached(file_id, file_size, file_content_bytes):
+    """Extracts code files and their content. Cached function."""
     code_files = {}
     total_chars = 0
     file_count = 0
     ignored_files = []
+    status_placeholder = st.empty()  # For progress bar
+    progress_bar = status_placeholder.progress(0)
     try:
+        with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
+            members = zip_ref.infolist()
+            total_members = len(members)
+            for i, member in enumerate(members):
+                # Update progress bar periodically (every 10 files)
+                if i % 10 == 0:
+                    progress_bar.progress(int((i / total_members) * 100))
                 if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
                     continue
                 if file_path.suffix.lower() in CODE_EXTENSIONS:
                     try:
                         with zip_ref.open(member) as file:
+                            file_bytes = file.read()
                             try:
+                                content = file_bytes.decode('utf-8')
                             except UnicodeDecodeError:
                                 try:
+                                    content = file_bytes.decode('latin-1')
                                 except Exception as decode_err:
                                     ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
                                     continue
                     except Exception as read_err:
                         ignored_files.append(f"{member.filename} (Read Error: {read_err})")
                 else:
                     if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
                         ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
+            progress_bar.progress(100)  # Ensure it completes
+            status_placeholder.empty()  # Remove progress bar after completion
     except zipfile.BadZipFile:
+        status_placeholder.empty()
         st.error("🚨 Invalid or corrupted ZIP file.")
         return None, 0, 0, []
     except Exception as e:
+        status_placeholder.empty()
         st.error(f"🚨 Error processing ZIP file: {e}")
         return None, 0, 0, []
+    if file_count == 0 and not ignored_files:
+        st.warning("No files with recognized code extensions found in the ZIP.")
+    elif file_count == 0 and ignored_files:
+        st.warning("No files with recognized code extensions found. Some files were skipped.")
+    print(f"Cache miss or new file: Processed ZIP {file_id}")  # Debug print
     return code_files, total_chars, file_count, ignored_files
 def construct_analysis_prompt(code_files_dict, requested_analyses):
+    """Constructs the prompt for Gemini, including code content and JSON structure request."""
+    prompt_parts = ["Analyze the following codebase provided as a collection of file paths and their content.\n\n"]
+    current_token_estimate = estimate_token_count(prompt_parts[0])
     included_files = []
+    # Use join for potentially faster concatenation
+    code_segments = []
+    # Provide feedback for large codebases
+    prompt_status = st.empty()
+    if len(code_files_dict) > 50:
+        prompt_status.write("Constructing prompt (processing files)...")
     for filename, content in code_files_dict.items():
         file_marker = f"--- START FILE: {filename} ---\n"
         segment_token_estimate = estimate_token_count(segment)
         if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
+            code_segments.append(segment)
             current_token_estimate += segment_token_estimate
             included_files.append(filename)
         else:
+            st.warning(f"⚠️ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate:,} tokens).")
             break
+    prompt_status.empty()  # Clear status message
     if not included_files:
         st.error("🚨 No code files could be included within the estimated token limit.")
         return None, []
+    concatenated_code = "".join(code_segments)
+    prompt_parts.append(concatenated_code)
+    # Generate the expected JSON structure description based on selected analyses
     json_structure_description = "{\n"
     structure_parts = []
     if "generate_docs" in requested_analyses:
         structure_parts.append('    "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
     if "suggest_refactoring" in requested_analyses:
         structure_parts.append('    "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
     json_structure_description += ",\n".join(structure_parts)
     json_structure_description += "\n}"
 **JSON Output Only:**
 """
+    prompt_parts.append(prompt_footer)
+    full_prompt = "".join(prompt_parts)
     return full_prompt, included_files
 def call_gemini_api(prompt):
+    """Calls the Gemini API or returns mock data based on session state."""
     if not prompt:
         return None, "Prompt generation failed."
+    # MOCK MODE LOGIC
     if st.session_state.mock_api_call:
+        st.info("MOCK MODE: Simulating API call...")
+        st.write("...")  # Minimal feedback in mock mode
+        time.sleep(1)  # Shorter mock delay
         mock_json_response = json.dumps({
             "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n    \"\"\"Processes the input data using mock logic.\"\"\""}],
             "potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
+            "style_issues": [],
+            "module_summaries": [],
+            "refactoring_suggestions": []
         })
+        st.success("Mock response generated.")
         return json.loads(mock_json_response), None
+    # REAL API CALL LOGIC
     else:
         if not initialize_gemini_model():
             return None, "Gemini Model Initialization Failed."
             return None, "Gemini model not available."
         try:
+            api_status = st.empty()
+            token_estimate = estimate_token_count(prompt)
+            api_status.info(f"📡 Sending request to {GEMINI_MODEL_NAME} (Estimated prompt tokens: {token_estimate:,})... This can take several minutes depending on code size and model load.")
+            start_time = time.time()
             response = model.generate_content(
                 prompt,
                 generation_config=genai.types.GenerationConfig(temperature=0.2),
                     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                 ]
             )
+            end_time = time.time()
+            api_status.success(f"✅ Response received from AI in {end_time - start_time:.2f} seconds.")
+            time.sleep(1)
+            api_status.empty()
             try:
                 json_response_text = response.text.strip()
                 if json_response_text.startswith("```json"):
                     json_response_text = json_response_text[7:]
                 if json_response_text.startswith("```"):
                     json_response_text = json_response_text[3:]
                 if json_response_text.endswith("```"):
                     json_response_text = json_response_text[:-3]
                 json_start = json_response_text.find('{')
                 json_end = json_response_text.rfind('}') + 1
                 if json_start != -1 and json_end != -1 and json_end > json_start:
                     final_json_text = json_response_text[json_start:json_end]
                     insights = json.loads(final_json_text)
                     return insights, None
                 else:
+                    st.warning("⚠️ Could not find valid JSON object boundaries ({...}) in response.")
                     return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
             except json.JSONDecodeError as json_err:
                 st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
                 st.code(response.text, language='text')
                 return None, f"AI response was not valid JSON: {json_err}"
             except AttributeError:
+                st.error("🚨 Unexpected API response structure (AttributeError).")
                 st.code(f"Response object: {response}", language='text')
                 try:
                     block_reason = response.prompt_feedback.block_reason
                         return None, f"Content blocked by API. Reason: {block_reason}"
                 except Exception:
                     pass
+                return None, "Unexpected response structure from API (AttributeError)."
             except Exception as e:
                 st.error(f"🚨 Unexpected issue processing AI response: {e}")
                 try:
                 except Exception:
                     pass
                 return None, f"Unexpected response structure: {e}"
         except Exception as e:
+            api_status.empty()
             st.error(f"🚨 An error occurred during API call: {e}")
             error_msg = f"API call failed: {e}"
             if hasattr(e, 'message'):
                 if "429" in e.message:
+                    error_msg = "API Quota Exceeded or Rate Limit hit."
                 elif "API key not valid" in e.message:
+                    error_msg = "Invalid Gemini API Key."
                 elif "blocked" in e.message.lower():
+                    error_msg = "Content blocked due to safety settings."
             elif "block_reason: SAFETY" in str(e):
+                error_msg = "Content blocked due to safety settings."
             return None, error_msg
 def display_results(results_json, requested_analyses):
+    """Renders the analysis results with pagination."""
     st.header("📊 Analysis Report")
     if not isinstance(results_json, dict):
         st.error("Invalid results format received.")
         st.json(results_json)
         return
     if "raw_response" in results_json:
         st.subheader("Raw AI Response (JSON Parsing Failed)")
         st.code(results_json["raw_response"], language='text')
         return
     display_config = {
+        "generate_docs": {"key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"}},
+        "find_bugs": {"key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"}},
+        "check_style": {"key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"}},
+        "summarize_modules": {"key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"}},
+        "suggest_refactoring": {"key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"}}
     }
+    any_results_found = False
     for analysis_key in requested_analyses:
         if analysis_key in display_config:
             config = display_config[analysis_key]
             items = results_json.get(config["key"], [])
+            total_items = len(items)
+            st.subheader(f"{config['title']} ({total_items} found)")
             if items:
+                any_results_found = True
+                state_key = f"visible_{analysis_key}"
+                if state_key not in st.session_state:
+                    st.session_state[state_key] = RESULTS_PAGE_SIZE
+                visible_count = st.session_state[state_key]
+                items_to_display = items[:visible_count]
+                for item in items_to_display:
+                    details = []
+                    for field_key, field_label in config["fields"].items():
+                        value = item.get(field_key, 'N/A')
+                        if value != 'N/A':
+                            details.append(f"**{field_label}:** `{value}`" if field_key == 'file' else f"**{field_label}:** {value}")
+                    st.markdown("- " + " - ".join(details))
+                    if 'suggestion' in item:
+                        st.code(item['suggestion'], language='text')
+                    elif 'description' in item:
+                        st.markdown(f"  > {item['description']}")
+                    elif 'summary' in item:
+                        st.markdown(f"  > {item['summary']}")
+                if total_items > visible_count:
+                    if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
+                        st.session_state[state_key] += RESULTS_PAGE_SIZE
+                        st.rerun()
+            else:
+                st.markdown("_No items found for this category._")
+            st.divider()
+    if not any_results_found:
         st.info("No specific findings were identified in the analysis based on your selections.")
     st.download_button(
 # --- Streamlit App Main Interface ---
 st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
+st.title("🤖 Codebase Audit Assistant")
+st.markdown(f"Upload codebase (`.zip`) for analysis via **{GEMINI_MODEL_NAME}**.")
 with st.sidebar:
     st.header("⚙️ Analysis Controls")
+    st.session_state.mock_api_call = st.toggle("🧪 Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API.")
+    st.info("Mock API Mode ACTIVE" if st.session_state.mock_api_call else "Using REAL Gemini API")
     st.divider()
     st.header("🔎 Select Analyses")
+    selected_analyses = [key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}")]
     st.divider()
     st.header("📄 How To Use")
+    st.info("1. Set API Key (if not in Mock Mode).\n2. Toggle Mock Mode if needed.\n3. Select analyses.\n4. Create & Upload a **ZIP** of your code.\n5. Click 'Analyze Codebase'.\n6. Review the report.")
+    st.info(f"Note: Only common code extensions are supported. Analysis is limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} estimated tokens).")
     st.divider()
+    st.warning("⚠️ **Privacy:** Code is sent to the Google API if Mock Mode is OFF.")
+uploaded_file = st.file_uploader("📁 Upload Codebase ZIP File", type=['zip'], key="file_uploader",
+                                 on_change=lambda: st.session_state.update(analysis_results=None, error_message=None, analysis_requested=False))
+analysis_button_placeholder = st.empty()  # Placeholder for the button
+results_placeholder = st.container()        # Container for results display
 if uploaded_file:
     st.success(f"✅ File '{uploaded_file.name}' uploaded.")
+    uploaded_file_bytes = uploaded_file.getvalue()
+    file_id = f"{uploaded_file.name}-{uploaded_file.size}"
+    code_files, total_chars, file_count, ignored_files = process_zip_file_cached(file_id, uploaded_file.size, uploaded_file_bytes)
     if code_files is not None:
         st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
         analyze_button_disabled = (not selected_analyses or file_count == 0)
         analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
+        if analysis_button_placeholder.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
+            st.session_state.analysis_requested = True
+            st.session_state.analysis_results = None
+            st.session_state.error_message = None
             if not selected_analyses:
+                st.warning("Please select analysis types.")
             elif file_count == 0:
+                st.warning("No relevant code files found.")
             else:
+                with results_placeholder:
+                    with st.spinner(f"🚀 Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... Please wait."):
+                        analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
+                        if analysis_prompt and included_files_in_prompt:
+                            results_json, error_msg = call_gemini_api(analysis_prompt)
+                            st.session_state.analysis_results = results_json
+                            st.session_state.error_message = error_msg
+                        elif not included_files_in_prompt:
+                            st.session_state.error_message = "Could not proceed: No files included (check token limits/errors)."
+                        else:
+                            st.session_state.error_message = "Failed to generate analysis prompt."
+                st.rerun()
+if st.session_state.analysis_requested:
+    with results_placeholder:
+        st.divider()
+        if st.session_state.error_message:
+            st.error(f"Analysis Failed: {st.session_state.error_message}")
+            if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
+                st.subheader("Raw AI Response")
+                st.code(st.session_state.analysis_results["raw_response"], language='text')
+        elif st.session_state.analysis_results:
+            display_results(st.session_state.analysis_results, selected_analyses)
+        else:
+            st.info("Analysis initiated, but no results or errors were stored. Please try again.")
 elif not uploaded_file:
+    results_placeholder.info("Upload a ZIP file containing your source code to begin.")
+results_placeholder.divider()
+results_placeholder.markdown("_Assistant powered by Google Gemini._")