mgbam commited on
Commit
95b3eeb
Β·
verified Β·
1 Parent(s): 5477235

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -201
app.py CHANGED
@@ -5,34 +5,38 @@ import io
5
  import json
6
  import os
7
  from pathlib import Path
8
- import time # Added for simulating mock delay
9
 
10
  # --- Configuration ---
11
  GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
12
- MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed
 
13
 
14
  AVAILABLE_ANALYSES = {
 
15
  "generate_docs": "Generate Missing Docstrings/Comments",
16
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
17
  "check_style": "Check Style Guide Compliance (General)",
18
  "summarize_modules": "Summarize Complex Modules/Files",
19
  "suggest_refactoring": "Suggest Refactoring Opportunities"
20
  }
21
-
22
- CODE_EXTENSIONS = {
23
- '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go',
24
- '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
25
- }
26
 
27
  # --- Session State Initialization ---
28
  if 'mock_api_call' not in st.session_state:
29
- st.session_state.mock_api_call = False # Default to using the real API
 
 
 
 
 
 
30
 
31
  # --- Gemini API Setup ---
32
  model = None
33
 
34
  def initialize_gemini_model():
35
- """Initializes the Gemini model if not in mock mode."""
36
  global model
37
  if model is None and not st.session_state.mock_api_call:
38
  try:
@@ -48,38 +52,39 @@ def initialize_gemini_model():
48
  st.stop()
49
  return False
50
  elif st.session_state.mock_api_call:
51
- print("Running in Mock Mode. Skipping Gemini initialization.")
52
  return True # Allow proceeding in mock mode
53
  elif model is not None:
54
- print("Gemini Model already initialized.")
55
  return True
56
  return False
57
 
58
  # --- Helper Functions ---
59
 
60
  def estimate_token_count(text):
61
- """Roughly estimate token count (assumes ~3-4 characters per token)."""
62
  return len(text) // 3
63
 
64
- def process_zip_file(uploaded_file):
65
- """
66
- Extracts code files and their content from the uploaded ZIP file.
67
-
68
- Returns:
69
- code_files (dict): Mapping of file paths to content.
70
- total_chars (int): Total number of characters in included files.
71
- file_count (int): Count of processed code files.
72
- ignored_files (list): List of files skipped or not processed.
73
- """
74
  code_files = {}
75
  total_chars = 0
76
  file_count = 0
77
  ignored_files = []
 
 
78
 
79
  try:
80
- with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref:
81
- for member in zip_ref.infolist():
82
- # Skip directories, hidden files, and files with '__' in the name
 
 
 
 
 
83
  if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
84
  continue
85
 
@@ -87,11 +92,12 @@ def process_zip_file(uploaded_file):
87
  if file_path.suffix.lower() in CODE_EXTENSIONS:
88
  try:
89
  with zip_ref.open(member) as file:
 
90
  try:
91
- content = file.read().decode('utf-8')
92
  except UnicodeDecodeError:
93
  try:
94
- content = file.read().decode('latin-1')
95
  except Exception as decode_err:
96
  ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
97
  continue
@@ -102,31 +108,42 @@ def process_zip_file(uploaded_file):
102
  except Exception as read_err:
103
  ignored_files.append(f"{member.filename} (Read Error: {read_err})")
104
  else:
105
- # Only add to ignored if it's not explicitly ignored by path rules above
106
  if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
107
  ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
108
 
 
 
 
109
  except zipfile.BadZipFile:
 
110
  st.error("🚨 Invalid or corrupted ZIP file.")
111
  return None, 0, 0, []
112
  except Exception as e:
 
113
  st.error(f"🚨 Error processing ZIP file: {e}")
114
  return None, 0, 0, []
115
 
 
 
 
 
 
 
116
  return code_files, total_chars, file_count, ignored_files
117
 
118
  def construct_analysis_prompt(code_files_dict, requested_analyses):
119
- """
120
- Constructs the prompt for Gemini, including code content and a JSON structure request.
121
-
122
- Returns:
123
- full_prompt (str): The complete prompt.
124
- included_files (list): List of file names included in the prompt.
125
- """
126
- prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
127
- current_token_estimate = estimate_token_count(prompt_content)
128
  included_files = []
129
- concatenated_code = ""
 
 
 
 
 
 
 
130
 
131
  for filename, content in code_files_dict.items():
132
  file_marker = f"--- START FILE: {filename} ---\n"
@@ -136,20 +153,23 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
136
  segment_token_estimate = estimate_token_count(segment)
137
 
138
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
139
- concatenated_code += segment
140
  current_token_estimate += segment_token_estimate
141
  included_files.append(filename)
142
  else:
143
- st.warning(f"⚠️ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).")
144
  break
145
 
 
 
146
  if not included_files:
147
  st.error("🚨 No code files could be included within the estimated token limit.")
148
  return None, []
149
 
150
- prompt_content += concatenated_code
 
151
 
152
- # Build the expected JSON structure dynamically based on the selected analyses
153
  json_structure_description = "{\n"
154
  structure_parts = []
155
  if "generate_docs" in requested_analyses:
@@ -162,6 +182,7 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
162
  structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
163
  if "suggest_refactoring" in requested_analyses:
164
  structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
 
165
  json_structure_description += ",\n".join(structure_parts)
166
  json_structure_description += "\n}"
167
 
@@ -176,40 +197,32 @@ Respond ONLY with a single, valid JSON object adhering strictly to the following
176
 
177
  **JSON Output Only:**
178
  """
179
- full_prompt = prompt_content + prompt_footer
 
180
  return full_prompt, included_files
181
 
182
  def call_gemini_api(prompt):
183
- """
184
- Calls the Gemini API (or simulates it in mock mode) with the provided prompt.
185
-
186
- Returns:
187
- insights (dict): The parsed JSON response from the API.
188
- error_message (str): An error message if something went wrong.
189
- """
190
  if not prompt:
191
  return None, "Prompt generation failed."
192
 
193
- # --- MOCK MODE LOGIC ---
194
  if st.session_state.mock_api_call:
195
- st.info(" MOCK MODE: Simulating API call...")
196
- time.sleep(2) # Simulate network/processing delay
 
197
 
198
- # Simulated successful response
199
  mock_json_response = json.dumps({
200
  "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
201
  "potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
202
- "style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}],
203
- "module_summaries": [
204
- {"file": "mock/core.py", "summary": "This file contains the core mock processing logic."},
205
- {"file": "mock/utils.py", "summary": "Utility functions for mocking."}
206
- ],
207
- "refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}]
208
  })
209
- st.success("Mock response generated successfully.")
210
  return json.loads(mock_json_response), None
211
 
212
- # --- REAL API CALL LOGIC ---
213
  else:
214
  if not initialize_gemini_model():
215
  return None, "Gemini Model Initialization Failed."
@@ -217,7 +230,10 @@ def call_gemini_api(prompt):
217
  return None, "Gemini model not available."
218
 
219
  try:
220
- st.write(f"πŸ“‘ Sending request to {GEMINI_MODEL_NAME}...")
 
 
 
221
  response = model.generate_content(
222
  prompt,
223
  generation_config=genai.types.GenerationConfig(temperature=0.2),
@@ -228,37 +244,34 @@ def call_gemini_api(prompt):
228
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
229
  ]
230
  )
231
- st.write("βœ… Response received from AI.")
 
 
 
232
 
233
  try:
234
  json_response_text = response.text.strip()
235
- # Remove potential markdown code block fences
236
  if json_response_text.startswith("```json"):
237
  json_response_text = json_response_text[7:]
238
  if json_response_text.startswith("```"):
239
  json_response_text = json_response_text[3:]
240
  if json_response_text.endswith("```"):
241
  json_response_text = json_response_text[:-3]
242
-
243
- # Extract JSON object boundaries
244
  json_start = json_response_text.find('{')
245
  json_end = json_response_text.rfind('}') + 1
246
-
247
  if json_start != -1 and json_end != -1 and json_end > json_start:
248
  final_json_text = json_response_text[json_start:json_end]
249
  insights = json.loads(final_json_text)
250
  return insights, None
251
  else:
252
- st.warning("⚠️ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.")
253
  return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
254
-
255
  except json.JSONDecodeError as json_err:
256
  st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
257
- st.error("Raw AI Response:")
258
  st.code(response.text, language='text')
259
  return None, f"AI response was not valid JSON: {json_err}"
260
  except AttributeError:
261
- st.error("🚨 Unexpected API response structure.")
262
  st.code(f"Response object: {response}", language='text')
263
  try:
264
  block_reason = response.prompt_feedback.block_reason
@@ -266,7 +279,7 @@ def call_gemini_api(prompt):
266
  return None, f"Content blocked by API. Reason: {block_reason}"
267
  except Exception:
268
  pass
269
- return None, "Unexpected response structure from API."
270
  except Exception as e:
271
  st.error(f"🚨 Unexpected issue processing AI response: {e}")
272
  try:
@@ -274,95 +287,82 @@ def call_gemini_api(prompt):
274
  except Exception:
275
  pass
276
  return None, f"Unexpected response structure: {e}"
277
-
278
  except Exception as e:
 
279
  st.error(f"🚨 An error occurred during API call: {e}")
280
  error_msg = f"API call failed: {e}"
281
  if hasattr(e, 'message'):
282
  if "429" in e.message:
283
- error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard."
284
  elif "API key not valid" in e.message:
285
- error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`."
286
  elif "blocked" in e.message.lower():
287
- error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
288
  elif "block_reason: SAFETY" in str(e):
289
- error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
290
-
291
  return None, error_msg
292
 
293
  def display_results(results_json, requested_analyses):
294
- """Renders the analysis results in the Streamlit interface."""
295
  st.header("πŸ“Š Analysis Report")
296
-
297
  if not isinstance(results_json, dict):
298
  st.error("Invalid results format received.")
299
  st.json(results_json)
300
  return
301
-
302
  if "raw_response" in results_json:
303
  st.subheader("Raw AI Response (JSON Parsing Failed)")
304
  st.code(results_json["raw_response"], language='text')
305
  return
306
 
307
- def display_list_items(items, fields):
308
- if items:
309
- for item in items:
310
- details = []
311
- for field_key, field_label in fields.items():
312
- value = item.get(field_key, 'N/A')
313
- if value != 'N/A':
314
- details.append(f"**{field_label}:** {value}")
315
- st.markdown("- " + " - ".join(details))
316
- # Display multi-line outputs when applicable
317
- if 'suggestion' in item:
318
- st.code(item['suggestion'], language='text')
319
- elif 'description' in item:
320
- st.markdown(f" > {item['description']}")
321
- elif 'summary' in item:
322
- st.markdown(f" > {item['summary']}")
323
- else:
324
- st.markdown("_No items found for this category._")
325
- st.divider()
326
-
327
  display_config = {
328
- "generate_docs": {
329
- "key": "documentation_suggestions",
330
- "title": AVAILABLE_ANALYSES["generate_docs"],
331
- "fields": {"file": "File", "line": "Line"}
332
- },
333
- "find_bugs": {
334
- "key": "potential_bugs",
335
- "title": AVAILABLE_ANALYSES["find_bugs"],
336
- "fields": {"file": "File", "line": "Line", "severity": "Severity"}
337
- },
338
- "check_style": {
339
- "key": "style_issues",
340
- "title": AVAILABLE_ANALYSES["check_style"],
341
- "fields": {"file": "File", "line": "Line"}
342
- },
343
- "summarize_modules": {
344
- "key": "module_summaries",
345
- "title": AVAILABLE_ANALYSES["summarize_modules"],
346
- "fields": {"file": "File"}
347
- },
348
- "suggest_refactoring": {
349
- "key": "refactoring_suggestions",
350
- "title": AVAILABLE_ANALYSES["suggest_refactoring"],
351
- "fields": {"file": "File", "line": "Line", "area": "Area"}
352
- }
353
  }
354
 
355
- any_results = False
356
  for analysis_key in requested_analyses:
357
  if analysis_key in display_config:
358
  config = display_config[analysis_key]
359
- st.subheader(config["title"])
360
  items = results_json.get(config["key"], [])
361
- display_list_items(items, config["fields"])
 
 
 
362
  if items:
363
- any_results = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
- if not any_results:
366
  st.info("No specific findings were identified in the analysis based on your selections.")
367
 
368
  st.download_button(
@@ -374,54 +374,36 @@ def display_results(results_json, requested_analyses):
374
 
375
  # --- Streamlit App Main Interface ---
376
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
 
 
377
 
378
- st.title("πŸ€– Codebase Audit & Documentation Assistant")
379
- st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
380
-
381
- # Sidebar controls
382
  with st.sidebar:
383
  st.header("βš™οΈ Analysis Controls")
384
- st.session_state.mock_api_call = st.toggle(
385
- "πŸ§ͺ Enable Mock API Mode (for Testing)",
386
- value=st.session_state.mock_api_call,
387
- help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing."
388
- )
389
- if st.session_state.mock_api_call:
390
- st.info("Mock API Mode ACTIVE")
391
- else:
392
- st.info("Using REAL Gemini API")
393
-
394
  st.divider()
395
  st.header("πŸ”Ž Select Analyses")
396
- selected_analyses = []
397
- for key, name in AVAILABLE_ANALYSES.items():
398
- if st.checkbox(name, value=True, key=f"cb_{key}"):
399
- selected_analyses.append(key)
400
-
401
  st.divider()
402
  st.header("πŸ“„ How To Use")
403
- st.info(
404
- "1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n"
405
- "2. Toggle Mock Mode if needed.\n"
406
- "3. Select desired analyses.\n"
407
- "4. Create a **ZIP archive** of your codebase.\n"
408
- "5. Upload the `.zip` file.\n"
409
- "6. Click 'Analyze Codebase'.\n"
410
- "7. Review the report."
411
- )
412
- st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
413
  st.divider()
414
- st.warning("⚠️ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.")
415
 
416
- # Main content area
417
- uploaded_file = st.file_uploader("πŸ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader")
418
- analysis_triggered = False
419
- results_cache = None # To store results briefly
 
420
 
421
  if uploaded_file:
422
  st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
423
- with st.spinner("Inspecting ZIP file..."):
424
- code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file)
 
 
 
425
 
426
  if code_files is not None:
427
  st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
@@ -431,42 +413,44 @@ if uploaded_file:
431
 
432
  analyze_button_disabled = (not selected_analyses or file_count == 0)
433
  analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
434
- if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
435
- analysis_triggered = True
 
 
 
436
  if not selected_analyses:
437
- st.warning("Please select at least one analysis type from the sidebar.")
438
  elif file_count == 0:
439
- st.warning("No relevant code files found in the ZIP archive to analyze.")
440
  else:
441
- st.divider()
442
- with st.spinner(f"πŸš€ Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."):
443
- analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
444
- if analysis_prompt and included_files_in_prompt:
445
- st.write(f"Analyzing {len(included_files_in_prompt)} files...")
446
- results_json, error_message = call_gemini_api(analysis_prompt)
447
- results_cache = (results_json, error_message)
448
- elif not included_files_in_prompt:
449
- results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).")
450
- else:
451
- results_cache = (None, "Failed to generate analysis prompt.")
452
- else:
453
- # Error during ZIP processing (error already displayed)
454
- pass
 
 
 
 
 
 
 
 
 
 
 
455
 
456
- if analysis_triggered and results_cache:
457
- results_json, error_message = results_cache
458
- st.divider()
459
- if error_message:
460
- st.error(f"Analysis Failed: {error_message}")
461
- if results_json and isinstance(results_json, dict) and "raw_response" in results_json:
462
- st.subheader("Raw AI Response")
463
- st.code(results_json["raw_response"], language='text')
464
- elif results_json:
465
- display_results(results_json, selected_analyses)
466
- else:
467
- st.error("Analysis did not return results or an unknown error occurred.")
468
  elif not uploaded_file:
469
- st.info("Upload a ZIP file containing your source code to begin.")
470
 
471
- st.divider()
472
- st.markdown("_Assistant powered by Google Gemini._")
 
5
  import json
6
  import os
7
  from pathlib import Path
8
+ import time
9
 
10
  # --- Configuration ---
11
  GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
12
+ MAX_PROMPT_TOKENS_ESTIMATE = 800000
13
+ RESULTS_PAGE_SIZE = 25 # Number of items to show per category initially
14
 
15
  AVAILABLE_ANALYSES = {
16
+ # ... (keep the same)
17
  "generate_docs": "Generate Missing Docstrings/Comments",
18
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
19
  "check_style": "Check Style Guide Compliance (General)",
20
  "summarize_modules": "Summarize Complex Modules/Files",
21
  "suggest_refactoring": "Suggest Refactoring Opportunities"
22
  }
23
+ CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'}
 
 
 
 
24
 
25
  # --- Session State Initialization ---
26
  if 'mock_api_call' not in st.session_state:
27
+ st.session_state.mock_api_call = False
28
+ if 'analysis_results' not in st.session_state:
29
+ st.session_state.analysis_results = None # Store results here
30
+ if 'error_message' not in st.session_state:
31
+ st.session_state.error_message = None
32
+ if 'analysis_requested' not in st.session_state:
33
+ st.session_state.analysis_requested = False # Flag to know when analysis is done
34
 
35
  # --- Gemini API Setup ---
36
  model = None
37
 
38
  def initialize_gemini_model():
39
+ """Initializes the Gemini API model unless running in mock mode."""
40
  global model
41
  if model is None and not st.session_state.mock_api_call:
42
  try:
 
52
  st.stop()
53
  return False
54
  elif st.session_state.mock_api_call:
55
+ # Running in Mock Mode. Skipping Gemini initialization.
56
  return True # Allow proceeding in mock mode
57
  elif model is not None:
58
+ # Gemini Model already initialized.
59
  return True
60
  return False
61
 
62
  # --- Helper Functions ---
63
 
64
  def estimate_token_count(text):
65
+ """Roughly estimate token count (assuming ~3 characters per token)."""
66
  return len(text) // 3
67
 
68
+ # --- OPTIMIZATION: Cache ZIP processing ---
69
+ @st.cache_data(max_entries=5) # Cache results for recent uploads
70
+ def process_zip_file_cached(file_id, file_size, file_content_bytes):
71
+ """Extracts code files and their content. Cached function."""
 
 
 
 
 
 
72
  code_files = {}
73
  total_chars = 0
74
  file_count = 0
75
  ignored_files = []
76
+ status_placeholder = st.empty() # For progress bar
77
+ progress_bar = status_placeholder.progress(0)
78
 
79
  try:
80
+ with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
81
+ members = zip_ref.infolist()
82
+ total_members = len(members)
83
+ for i, member in enumerate(members):
84
+ # Update progress bar periodically (every 10 files)
85
+ if i % 10 == 0:
86
+ progress_bar.progress(int((i / total_members) * 100))
87
+
88
  if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
89
  continue
90
 
 
92
  if file_path.suffix.lower() in CODE_EXTENSIONS:
93
  try:
94
  with zip_ref.open(member) as file:
95
+ file_bytes = file.read()
96
  try:
97
+ content = file_bytes.decode('utf-8')
98
  except UnicodeDecodeError:
99
  try:
100
+ content = file_bytes.decode('latin-1')
101
  except Exception as decode_err:
102
  ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
103
  continue
 
108
  except Exception as read_err:
109
  ignored_files.append(f"{member.filename} (Read Error: {read_err})")
110
  else:
 
111
  if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
112
  ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
113
 
114
+ progress_bar.progress(100) # Ensure it completes
115
+ status_placeholder.empty() # Remove progress bar after completion
116
+
117
  except zipfile.BadZipFile:
118
+ status_placeholder.empty()
119
  st.error("🚨 Invalid or corrupted ZIP file.")
120
  return None, 0, 0, []
121
  except Exception as e:
122
+ status_placeholder.empty()
123
  st.error(f"🚨 Error processing ZIP file: {e}")
124
  return None, 0, 0, []
125
 
126
+ if file_count == 0 and not ignored_files:
127
+ st.warning("No files with recognized code extensions found in the ZIP.")
128
+ elif file_count == 0 and ignored_files:
129
+ st.warning("No files with recognized code extensions found. Some files were skipped.")
130
+
131
+ print(f"Cache miss or new file: Processed ZIP {file_id}") # Debug print
132
  return code_files, total_chars, file_count, ignored_files
133
 
134
  def construct_analysis_prompt(code_files_dict, requested_analyses):
135
+ """Constructs the prompt for Gemini, including code content and JSON structure request."""
136
+ prompt_parts = ["Analyze the following codebase provided as a collection of file paths and their content.\n\n"]
137
+ current_token_estimate = estimate_token_count(prompt_parts[0])
 
 
 
 
 
 
138
  included_files = []
139
+
140
+ # Use join for potentially faster concatenation
141
+ code_segments = []
142
+
143
+ # Provide feedback for large codebases
144
+ prompt_status = st.empty()
145
+ if len(code_files_dict) > 50:
146
+ prompt_status.write("Constructing prompt (processing files)...")
147
 
148
  for filename, content in code_files_dict.items():
149
  file_marker = f"--- START FILE: {filename} ---\n"
 
153
  segment_token_estimate = estimate_token_count(segment)
154
 
155
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
156
+ code_segments.append(segment)
157
  current_token_estimate += segment_token_estimate
158
  included_files.append(filename)
159
  else:
160
+ st.warning(f"⚠️ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate:,} tokens).")
161
  break
162
 
163
+ prompt_status.empty() # Clear status message
164
+
165
  if not included_files:
166
  st.error("🚨 No code files could be included within the estimated token limit.")
167
  return None, []
168
 
169
+ concatenated_code = "".join(code_segments)
170
+ prompt_parts.append(concatenated_code)
171
 
172
+ # Generate the expected JSON structure description based on selected analyses
173
  json_structure_description = "{\n"
174
  structure_parts = []
175
  if "generate_docs" in requested_analyses:
 
182
  structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
183
  if "suggest_refactoring" in requested_analyses:
184
  structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
185
+
186
  json_structure_description += ",\n".join(structure_parts)
187
  json_structure_description += "\n}"
188
 
 
197
 
198
  **JSON Output Only:**
199
  """
200
+ prompt_parts.append(prompt_footer)
201
+ full_prompt = "".join(prompt_parts)
202
  return full_prompt, included_files
203
 
204
  def call_gemini_api(prompt):
205
+ """Calls the Gemini API or returns mock data based on session state."""
 
 
 
 
 
 
206
  if not prompt:
207
  return None, "Prompt generation failed."
208
 
209
+ # MOCK MODE LOGIC
210
  if st.session_state.mock_api_call:
211
+ st.info("MOCK MODE: Simulating API call...")
212
+ st.write("...") # Minimal feedback in mock mode
213
+ time.sleep(1) # Shorter mock delay
214
 
 
215
  mock_json_response = json.dumps({
216
  "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
217
  "potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
218
+ "style_issues": [],
219
+ "module_summaries": [],
220
+ "refactoring_suggestions": []
 
 
 
221
  })
222
+ st.success("Mock response generated.")
223
  return json.loads(mock_json_response), None
224
 
225
+ # REAL API CALL LOGIC
226
  else:
227
  if not initialize_gemini_model():
228
  return None, "Gemini Model Initialization Failed."
 
230
  return None, "Gemini model not available."
231
 
232
  try:
233
+ api_status = st.empty()
234
+ token_estimate = estimate_token_count(prompt)
235
+ api_status.info(f"πŸ“‘ Sending request to {GEMINI_MODEL_NAME} (Estimated prompt tokens: {token_estimate:,})... This can take several minutes depending on code size and model load.")
236
+ start_time = time.time()
237
  response = model.generate_content(
238
  prompt,
239
  generation_config=genai.types.GenerationConfig(temperature=0.2),
 
244
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
245
  ]
246
  )
247
+ end_time = time.time()
248
+ api_status.success(f"βœ… Response received from AI in {end_time - start_time:.2f} seconds.")
249
+ time.sleep(1)
250
+ api_status.empty()
251
 
252
  try:
253
  json_response_text = response.text.strip()
 
254
  if json_response_text.startswith("```json"):
255
  json_response_text = json_response_text[7:]
256
  if json_response_text.startswith("```"):
257
  json_response_text = json_response_text[3:]
258
  if json_response_text.endswith("```"):
259
  json_response_text = json_response_text[:-3]
 
 
260
  json_start = json_response_text.find('{')
261
  json_end = json_response_text.rfind('}') + 1
 
262
  if json_start != -1 and json_end != -1 and json_end > json_start:
263
  final_json_text = json_response_text[json_start:json_end]
264
  insights = json.loads(final_json_text)
265
  return insights, None
266
  else:
267
+ st.warning("⚠️ Could not find valid JSON object boundaries ({...}) in response.")
268
  return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
 
269
  except json.JSONDecodeError as json_err:
270
  st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
 
271
  st.code(response.text, language='text')
272
  return None, f"AI response was not valid JSON: {json_err}"
273
  except AttributeError:
274
+ st.error("🚨 Unexpected API response structure (AttributeError).")
275
  st.code(f"Response object: {response}", language='text')
276
  try:
277
  block_reason = response.prompt_feedback.block_reason
 
279
  return None, f"Content blocked by API. Reason: {block_reason}"
280
  except Exception:
281
  pass
282
+ return None, "Unexpected response structure from API (AttributeError)."
283
  except Exception as e:
284
  st.error(f"🚨 Unexpected issue processing AI response: {e}")
285
  try:
 
287
  except Exception:
288
  pass
289
  return None, f"Unexpected response structure: {e}"
 
290
  except Exception as e:
291
+ api_status.empty()
292
  st.error(f"🚨 An error occurred during API call: {e}")
293
  error_msg = f"API call failed: {e}"
294
  if hasattr(e, 'message'):
295
  if "429" in e.message:
296
+ error_msg = "API Quota Exceeded or Rate Limit hit."
297
  elif "API key not valid" in e.message:
298
+ error_msg = "Invalid Gemini API Key."
299
  elif "blocked" in e.message.lower():
300
+ error_msg = "Content blocked due to safety settings."
301
  elif "block_reason: SAFETY" in str(e):
302
+ error_msg = "Content blocked due to safety settings."
 
303
  return None, error_msg
304
 
305
  def display_results(results_json, requested_analyses):
306
+ """Renders the analysis results with pagination."""
307
  st.header("πŸ“Š Analysis Report")
 
308
  if not isinstance(results_json, dict):
309
  st.error("Invalid results format received.")
310
  st.json(results_json)
311
  return
 
312
  if "raw_response" in results_json:
313
  st.subheader("Raw AI Response (JSON Parsing Failed)")
314
  st.code(results_json["raw_response"], language='text')
315
  return
316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  display_config = {
318
+ "generate_docs": {"key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"}},
319
+ "find_bugs": {"key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"}},
320
+ "check_style": {"key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"}},
321
+ "summarize_modules": {"key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"}},
322
+ "suggest_refactoring": {"key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  }
324
 
325
+ any_results_found = False
326
  for analysis_key in requested_analyses:
327
  if analysis_key in display_config:
328
  config = display_config[analysis_key]
 
329
  items = results_json.get(config["key"], [])
330
+ total_items = len(items)
331
+
332
+ st.subheader(f"{config['title']} ({total_items} found)")
333
+
334
  if items:
335
+ any_results_found = True
336
+ state_key = f"visible_{analysis_key}"
337
+ if state_key not in st.session_state:
338
+ st.session_state[state_key] = RESULTS_PAGE_SIZE
339
+
340
+ visible_count = st.session_state[state_key]
341
+ items_to_display = items[:visible_count]
342
+
343
+ for item in items_to_display:
344
+ details = []
345
+ for field_key, field_label in config["fields"].items():
346
+ value = item.get(field_key, 'N/A')
347
+ if value != 'N/A':
348
+ details.append(f"**{field_label}:** `{value}`" if field_key == 'file' else f"**{field_label}:** {value}")
349
+ st.markdown("- " + " - ".join(details))
350
+ if 'suggestion' in item:
351
+ st.code(item['suggestion'], language='text')
352
+ elif 'description' in item:
353
+ st.markdown(f" > {item['description']}")
354
+ elif 'summary' in item:
355
+ st.markdown(f" > {item['summary']}")
356
+
357
+ if total_items > visible_count:
358
+ if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
359
+ st.session_state[state_key] += RESULTS_PAGE_SIZE
360
+ st.rerun()
361
+ else:
362
+ st.markdown("_No items found for this category._")
363
+ st.divider()
364
 
365
+ if not any_results_found:
366
  st.info("No specific findings were identified in the analysis based on your selections.")
367
 
368
  st.download_button(
 
374
 
375
  # --- Streamlit App Main Interface ---
376
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
377
+ st.title("πŸ€– Codebase Audit Assistant")
378
+ st.markdown(f"Upload codebase (`.zip`) for analysis via **{GEMINI_MODEL_NAME}**.")
379
 
 
 
 
 
380
  with st.sidebar:
381
  st.header("βš™οΈ Analysis Controls")
382
+ st.session_state.mock_api_call = st.toggle("πŸ§ͺ Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API.")
383
+ st.info("Mock API Mode ACTIVE" if st.session_state.mock_api_call else "Using REAL Gemini API")
 
 
 
 
 
 
 
 
384
  st.divider()
385
  st.header("πŸ”Ž Select Analyses")
386
+ selected_analyses = [key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}")]
 
 
 
 
387
  st.divider()
388
  st.header("πŸ“„ How To Use")
389
+ st.info("1. Set API Key (if not in Mock Mode).\n2. Toggle Mock Mode if needed.\n3. Select analyses.\n4. Create & Upload a **ZIP** of your code.\n5. Click 'Analyze Codebase'.\n6. Review the report.")
390
+ st.info(f"Note: Only common code extensions are supported. Analysis is limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} estimated tokens).")
 
 
 
 
 
 
 
 
391
  st.divider()
392
+ st.warning("⚠️ **Privacy:** Code is sent to the Google API if Mock Mode is OFF.")
393
 
394
+ uploaded_file = st.file_uploader("πŸ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader",
395
+ on_change=lambda: st.session_state.update(analysis_results=None, error_message=None, analysis_requested=False))
396
+
397
+ analysis_button_placeholder = st.empty() # Placeholder for the button
398
+ results_placeholder = st.container() # Container for results display
399
 
400
  if uploaded_file:
401
  st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
402
+
403
+ uploaded_file_bytes = uploaded_file.getvalue()
404
+ file_id = f"{uploaded_file.name}-{uploaded_file.size}"
405
+
406
+ code_files, total_chars, file_count, ignored_files = process_zip_file_cached(file_id, uploaded_file.size, uploaded_file_bytes)
407
 
408
  if code_files is not None:
409
  st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
 
413
 
414
  analyze_button_disabled = (not selected_analyses or file_count == 0)
415
  analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
416
+ if analysis_button_placeholder.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
417
+ st.session_state.analysis_requested = True
418
+ st.session_state.analysis_results = None
419
+ st.session_state.error_message = None
420
+
421
  if not selected_analyses:
422
+ st.warning("Please select analysis types.")
423
  elif file_count == 0:
424
+ st.warning("No relevant code files found.")
425
  else:
426
+ with results_placeholder:
427
+ with st.spinner(f"πŸš€ Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... Please wait."):
428
+ analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
429
+ if analysis_prompt and included_files_in_prompt:
430
+ results_json, error_msg = call_gemini_api(analysis_prompt)
431
+ st.session_state.analysis_results = results_json
432
+ st.session_state.error_message = error_msg
433
+ elif not included_files_in_prompt:
434
+ st.session_state.error_message = "Could not proceed: No files included (check token limits/errors)."
435
+ else:
436
+ st.session_state.error_message = "Failed to generate analysis prompt."
437
+ st.rerun()
438
+
439
+ if st.session_state.analysis_requested:
440
+ with results_placeholder:
441
+ st.divider()
442
+ if st.session_state.error_message:
443
+ st.error(f"Analysis Failed: {st.session_state.error_message}")
444
+ if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
445
+ st.subheader("Raw AI Response")
446
+ st.code(st.session_state.analysis_results["raw_response"], language='text')
447
+ elif st.session_state.analysis_results:
448
+ display_results(st.session_state.analysis_results, selected_analyses)
449
+ else:
450
+ st.info("Analysis initiated, but no results or errors were stored. Please try again.")
451
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  elif not uploaded_file:
453
+ results_placeholder.info("Upload a ZIP file containing your source code to begin.")
454
 
455
+ results_placeholder.divider()
456
+ results_placeholder.markdown("_Assistant powered by Google Gemini._")