mgbam commited on
Commit
94dc448
Β·
verified Β·
1 Parent(s): 95d7700

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -168
app.py CHANGED
@@ -3,23 +3,26 @@ import google.generativeai as genai
3
  import zipfile
4
  import io
5
  import json
6
- import os # Still needed for API key potentially, but not model names
7
  from pathlib import Path
8
  import time
9
 
10
  # --- Configuration ---
11
  # Model names are now discovered dynamically. Remove hardcoded names.
12
- MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
13
  RESULTS_PAGE_SIZE = 25
14
 
15
- AVAILABLE_ANALYSES = { # Keep analyses config
16
  "generate_docs": "Generate Missing Docstrings/Comments",
17
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
18
  "check_style": "Check Style Guide Compliance (General)",
19
  "summarize_modules": "Summarize Complex Modules/Files",
20
- "suggest_refactoring": "Suggest Refactoring Opportunities"
21
  }
22
- CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'} # Keep extensions
 
 
 
23
 
24
  # --- Session State Initialization ---
25
  # (Keep most session state, add one for the selected model)
@@ -30,17 +33,17 @@ if 'analysis_results' not in st.session_state:
30
  if 'error_message' not in st.session_state:
31
  st.session_state.error_message = None
32
  if 'analysis_requested' not in st.session_state:
33
- st.session_state.analysis_requested = False
34
  if 'selected_model_name' not in st.session_state:
35
- st.session_state.selected_model_name = None # Will hold the "models/..." name
36
  if 'available_models_dict' not in st.session_state:
37
- st.session_state.available_models_dict = {} # Store display_name -> name mapping
38
 
39
  # --- Gemini API Setup & Model Discovery ---
40
- model = None # Global variable for the initialized model instance
41
 
42
  # --- NEW: Function to list available models ---
43
- @st.cache_data(ttl=3600) # Cache model list for an hour
44
  def get_available_models():
45
  """Lists models supporting 'generateContent' using the API key."""
46
  model_dict = {}
@@ -61,7 +64,7 @@ def get_available_models():
61
  return model_dict
62
  except Exception as e:
63
  st.error(f"🚨 Error listing available models: {e}")
64
- return {} # Return empty on error
65
 
66
  def initialize_gemini_model():
67
  """Initializes the Gemini model based on the selected name."""
@@ -72,7 +75,7 @@ def initialize_gemini_model():
72
  try:
73
  if 'GEMINI_API_KEY' not in st.secrets:
74
  st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
75
- st.stop() # Stop if key missing for initialization
76
  # Configure API key (might be redundant if list_models worked, but safe)
77
  genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
78
  print(f"Initializing Gemini Model: {selected_name}")
@@ -82,170 +85,319 @@ def initialize_gemini_model():
82
  return True
83
  except Exception as e:
84
  st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
85
- st.session_state.selected_model_name = None # Reset selection on error
86
  st.stop()
87
  return False
88
  elif st.session_state.mock_api_call:
89
- return True # No init needed for mock
90
  elif model is not None and model.model_name == selected_name:
91
- return True # Already initialized with the correct model
92
  elif model is not None and model.model_name != selected_name:
93
- print(f"Model changed. Re-initializing...")
94
- model = None # Reset model instance
95
- return initialize_gemini_model() # Recurse to re-initialize with new name
96
  elif not selected_name and not st.session_state.mock_api_call:
97
- # This case happens if no model is selected yet
98
- return False # Cannot initialize without a selection
99
- return False # Default case
100
 
101
  # --- Helper Functions ---
102
  # (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
103
  # call_gemini_api, display_results - remain the same as the optimized version)
104
- # estimate_token_count
105
- def estimate_token_count(text): return len(text) // 3
106
 
107
- # process_zip_file_cached (no changes)
 
 
 
108
  @st.cache_data(max_entries=5)
109
  def process_zip_file_cached(file_id, file_size, file_content_bytes):
110
- # ... (keep the exact same implementation as the previous optimized version) ...
111
- code_files = {}; total_chars = 0; file_count = 0; ignored_files = []
112
- status_placeholder = st.empty(); progress_bar = status_placeholder.progress(0)
 
 
 
 
 
 
 
113
  try:
114
  with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
115
- members = zip_ref.infolist(); total_members = len(members)
 
116
  for i, member in enumerate(members):
117
- if i % 10 == 0: progress_bar.progress(int((i / total_members) * 100))
118
- if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename: continue
 
 
119
  file_path = Path(member.filename)
120
  if file_path.suffix.lower() in CODE_EXTENSIONS:
121
  try:
122
  with zip_ref.open(member) as file:
123
  file_bytes = file.read()
124
- try: content = file_bytes.decode('utf-8')
 
125
  except UnicodeDecodeError:
126
- try: content = file_bytes.decode('latin-1')
127
- except Exception as decode_err: ignored_files.append(f"{member.filename} (Decode Error: {decode_err})"); continue
128
- code_files[member.filename] = content; total_chars += len(content); file_count += 1
129
- except Exception as read_err: ignored_files.append(f"{member.filename} (Read Error: {read_err})")
 
 
 
 
 
 
130
  else:
131
  if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
132
- ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
133
- progress_bar.progress(100); status_placeholder.empty()
134
- except zipfile.BadZipFile: status_placeholder.empty(); st.error("🚨 Invalid ZIP."); return None, 0, 0, []
135
- except Exception as e: status_placeholder.empty(); st.error(f"🚨 ZIP Error: {e}"); return None, 0, 0, []
136
- if file_count == 0 and not ignored_files: st.warning("No code files found.")
137
- elif file_count == 0 and ignored_files: st.warning("No code files found; some skipped.")
 
 
 
 
 
 
 
 
 
 
138
  return code_files, total_chars, file_count, ignored_files
139
 
140
-
141
- # construct_analysis_prompt (no changes)
142
  def construct_analysis_prompt(code_files_dict, requested_analyses):
143
- # ... (keep the exact same implementation as the previous optimized version) ...
144
- prompt_parts = ["Analyze the following codebase...\n\n"]; current_token_estimate = estimate_token_count(prompt_parts[0])
145
- included_files = []; code_segments = []; prompt_status = st.empty()
146
- if len(code_files_dict) > 50: prompt_status.info("Constructing prompt...")
 
 
 
 
 
 
 
 
 
147
  for filename, content in code_files_dict.items():
148
  segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
149
  segment_token_estimate = estimate_token_count(segment)
150
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
151
- code_segments.append(segment); current_token_estimate += segment_token_estimate; included_files.append(filename)
152
- else: st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens)."); break
 
 
 
 
153
  prompt_status.empty()
154
- if not included_files: st.error("🚨 No code files included in prompt."); return None, []
 
 
 
 
155
  prompt_parts.append("".join(code_segments))
156
- json_structure_description = "{\n"; structure_parts = []
157
- if "generate_docs" in requested_analyses: structure_parts.append(' "documentation_suggestions": [...]') # Use shorthand for brevity
158
- if "find_bugs" in requested_analyses: structure_parts.append(' "potential_bugs": [...]')
159
- if "check_style" in requested_analyses: structure_parts.append(' "style_issues": [...]')
160
- if "summarize_modules" in requested_analyses: structure_parts.append(' "module_summaries": [...]')
161
- if "suggest_refactoring" in requested_analyses: structure_parts.append(' "refactoring_suggestions": [...]')
 
 
 
 
 
 
 
 
162
  json_structure_description += ",\n".join(structure_parts) + "\n}"
163
  prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
164
  prompt_parts.append(prompt_footer)
 
165
  full_prompt = "".join(prompt_parts)
166
  return full_prompt, included_files
167
 
168
- # call_gemini_api (no changes other than relying on the globally selected model)
169
  def call_gemini_api(prompt):
170
- # ... (keep the exact same implementation as the previous optimized version,
171
- # it implicitly uses the 'model' variable initialized by initialize_gemini_model) ...
172
- if not prompt: return None, "Prompt generation failed."
 
 
 
 
173
  # MOCK MODE
174
  if st.session_state.mock_api_call:
175
- st.info(" MOCK MODE: Simulating API call..."); time.sleep(1)
176
- mock_json_response = json.dumps({"documentation_suggestions": [],"potential_bugs": [],"style_issues": [],"module_summaries": [],"refactoring_suggestions": []})
177
- st.success("Mock response generated."); return json.loads(mock_json_response), None
 
 
 
 
 
 
 
 
178
  # REAL API CALL
179
  else:
180
- if not initialize_gemini_model(): return None, "Gemini Model Initialization Failed."
181
- if model is None: return None, "Gemini model not selected or available." # Added check
 
 
182
  try:
183
  api_status = st.empty()
184
- # Include model name in status message
185
  api_status.info(f"πŸ“‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
186
- start_time = time.time(); response = model.generate_content(prompt, generation_config=genai.types.GenerationConfig(temperature=0.2), safety_settings=[{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]])
187
- end_time = time.time(); api_status.success(f"βœ… Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s."); time.sleep(1); api_status.empty()
188
- try: # Keep JSON parsing logic
 
 
 
 
 
 
 
 
 
 
 
 
189
  json_response_text = response.text.strip()
190
- # ... (rest of JSON parsing identical to previous version) ...
191
- if json_response_text.startswith("```json"): json_response_text = json_response_text[7:]
192
- if json_response_text.startswith("```"): json_response_text = json_response_text[3:]
193
- if json_response_text.endswith("```"): json_response_text = json_response_text[:-3]
194
- json_start = json_response_text.find('{'); json_end = json_response_text.rfind('}') + 1
 
 
 
 
195
  if json_start != -1 and json_end != -1 and json_end > json_start:
196
- final_json_text = json_response_text[json_start:json_end]; insights = json.loads(final_json_text); return insights, None
197
- else: st.warning("⚠️ Could not find valid JSON object."); return {"raw_response": response.text}, "AI response did not contain clear JSON object."
198
- # ... (keep error handling for JSONDecodeError, AttributeError etc. identical) ...
199
- except json.JSONDecodeError as json_err: st.error(f"🚨 Error parsing JSON: {json_err}"); st.code(response.text, language='text'); return None, f"AI response not valid JSON: {json_err}"
200
- except AttributeError: st.error(f"🚨 Unexpected API response structure (AttributeError)."); st.code(f"Response object: {response}", language='text'); return None, "Unexpected response structure (AttributeError)." # Simplified message
201
- except Exception as e: st.error(f"🚨 Unexpected issue processing response: {e}"); try: st.code(f"Response object: {response}", language='text'); except: pass; return None, f"Unexpected response structure: {e}"
202
- except Exception as e: # Keep API call error handling
203
- api_status.empty(); st.error(f"🚨 API call error: {e}"); error_msg = f"API call failed: {e}"
204
- # ... (keep specific error message logic identical) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  if hasattr(e, 'message'):
206
- if "429" in e.message: error_msg = "API Quota Exceeded or Rate Limit hit."
207
- elif "API key not valid" in e.message: error_msg = "Invalid Gemini API Key."
208
- elif "permission denied" in e.message.lower(): error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
209
- elif "blocked" in e.message.lower(): error_msg = "Content blocked due to safety settings."
210
- elif "block_reason: SAFETY" in str(e): error_msg = "Content blocked due to safety settings."
 
 
 
 
 
211
  return None, error_msg
212
 
213
- # display_results (no changes needed from optimized version)
214
  def display_results(results_json, requested_analyses):
215
- # ... (keep the exact same implementation as the previous optimized version with pagination) ...
 
 
216
  st.header("πŸ“Š Analysis Report")
217
- if not isinstance(results_json, dict): st.error("Invalid results format."); st.json(results_json); return
218
- if "raw_response" in results_json: st.subheader("Raw AI Response (JSON Parsing Failed)"); st.code(results_json["raw_response"], language='text'); return
219
- display_config = { # Keep config same
220
- "generate_docs": {"key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"}},
221
- "find_bugs": {"key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"}},
222
- "check_style": {"key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"}},
223
- "summarize_modules": {"key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"}},
224
- "suggest_refactoring": {"key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  }
226
  any_results_found = False
227
  for analysis_key in requested_analyses:
228
  if analysis_key in display_config:
229
- config = display_config[analysis_key]; items = results_json.get(config["key"], [])
230
- total_items = len(items); st.subheader(f"{config['title']} ({total_items} found)")
 
 
231
  if items:
232
- any_results_found = True; state_key = f"visible_{analysis_key}"
233
- if state_key not in st.session_state: st.session_state[state_key] = RESULTS_PAGE_SIZE
234
- visible_count = st.session_state[state_key]; items_to_display = items[:visible_count]
235
- for item in items_to_display: # Keep item display logic
236
- details = [f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file' else f"**{field_label}:** {item.get(field_key, 'N/A')}" for field_key, field_label in config["fields"].items() if item.get(field_key, 'N/A') != 'N/A']
 
 
 
 
 
 
 
 
237
  st.markdown("- " + " - ".join(details))
238
- if 'suggestion' in item: st.code(item['suggestion'], language='text')
239
- elif 'description' in item: st.markdown(f" > {item['description']}")
240
- elif 'summary' in item: st.markdown(f" > {item['summary']}")
241
- if total_items > visible_count: # Keep "Show More" logic
 
 
 
242
  if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
243
- st.session_state[state_key] += RESULTS_PAGE_SIZE; st.rerun()
244
- else: st.markdown("_No items found for this category._")
 
 
245
  st.divider()
246
- if not any_results_found: st.info("No specific findings were identified.")
247
- st.download_button(label="Download Full Report (JSON)", data=json.dumps(results_json, indent=4), file_name="code_audit_report.json", mime="application/json")
248
-
 
 
 
 
 
249
 
250
  # --- Streamlit App Main Interface ---
251
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
@@ -254,7 +406,11 @@ st.title("πŸ€– Codebase Audit & Documentation Assistant")
254
  # --- Sidebar ---
255
  with st.sidebar:
256
  st.header("βš™οΈ Analysis Controls")
257
- st.session_state.mock_api_call = st.toggle("πŸ§ͺ Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API.")
 
 
 
 
258
 
259
  st.divider()
260
  st.header("β™Š Select Model")
@@ -268,17 +424,16 @@ with st.sidebar:
268
  # Try to find the index of the previously selected model
269
  current_model_display_name = None
270
  if st.session_state.selected_model_name:
271
- # Find display name matching the stored internal name
272
- for disp_name, internal_name in st.session_state.available_models_dict.items():
273
- if internal_name == st.session_state.selected_model_name:
274
- current_model_display_name = disp_name
275
- break
276
 
277
  try:
278
  selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
279
  except ValueError:
280
- selected_index = 0 # Default to first if previous selection not found
281
-
282
 
283
  selected_display_name = st.selectbox(
284
  "Choose Gemini model:",
@@ -291,42 +446,56 @@ with st.sidebar:
291
  st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
292
  st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
293
  elif 'GEMINI_API_KEY' in st.secrets:
294
- st.warning("No compatible models found or error listing models. Check API Key permissions.")
295
- st.session_state.selected_model_name = None # Ensure no model selected
296
  else:
297
- st.warning("Add GEMINI_API_KEY to secrets to list models.")
298
- st.session_state.selected_model_name = None
299
-
300
- else: # Mock mode is active
301
  st.info("Mock API Mode ACTIVE")
302
- st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode
303
  # --- End Dynamic Model Selection ---
304
 
305
-
306
  st.divider()
307
  st.header("πŸ”Ž Select Analyses")
308
- selected_analyses = [key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}")]
 
 
 
309
  st.divider()
310
- st.header("πŸ“„ How To Use") # Keep help text
311
- st.info("1. Set API Key.\n2. Toggle Mock Mode if needed.\n3. Select Model (if not Mock).\n4. Select analyses.\n5. Upload ZIP.\n6. Click 'Analyze'.\n7. Review report.")
 
 
 
 
 
 
 
 
312
  st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
313
  st.divider()
314
  st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
315
 
316
-
317
  # Update title dynamically based on selected model
318
  if st.session_state.selected_model_name and not st.session_state.mock_api_call:
319
  st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
320
  elif st.session_state.mock_api_call:
321
- st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
322
  else:
323
- st.markdown("Upload codebase (`.zip`) for analysis.")
324
-
325
 
326
  # --- Main Content Area ---
327
- # (Keep the file uploader, button logic, and results display structure the same)
328
- uploaded_file = st.file_uploader("πŸ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader",
329
- on_change=lambda: st.session_state.update(analysis_results=None, error_message=None, analysis_requested=False))
 
 
 
 
 
 
 
330
  analysis_button_placeholder = st.empty()
331
  results_placeholder = st.container()
332
 
@@ -334,50 +503,73 @@ if uploaded_file:
334
  st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
335
  uploaded_file_bytes = uploaded_file.getvalue()
336
  file_id = f"{uploaded_file.name}-{uploaded_file.size}"
337
- code_files, total_chars, file_count, ignored_files = process_zip_file_cached(file_id, uploaded_file.size, uploaded_file_bytes)
 
 
338
  if code_files is not None:
339
  st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
340
  if ignored_files:
341
- with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): st.code("\n".join(ignored_files), language='text')
 
342
 
343
  # Disable button if no model selected (and not in mock mode)
344
  model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
345
  analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
346
  analyze_button_label = "Analyze Codebase"
347
- if not model_ready: analyze_button_label = "Select Model First"
348
- elif analyze_button_disabled: analyze_button_label = "Select Analyses or Upload Valid Code"
349
-
350
- if analysis_button_placeholder.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
351
- st.session_state.analysis_requested = True; st.session_state.analysis_results = None; st.session_state.error_message = None
352
- if not selected_analyses: st.warning("Please select analysis types.")
353
- elif file_count == 0: st.warning("No relevant code files found.")
354
- elif not model_ready: st.warning("Please select a Gemini model from the sidebar.") # Should be disabled, but safety check
 
 
 
 
 
 
 
 
 
 
 
355
  else:
356
  with results_placeholder:
357
- spinner_model_name = st.session_state.selected_model_name if not st.session_state.mock_api_call else "Mock Mode"
358
- spinner_msg = f"πŸš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
359
- with st.spinner(spinner_msg):
 
 
 
 
360
  analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
361
  if analysis_prompt and included_files_in_prompt:
362
  results_json, error_msg = call_gemini_api(analysis_prompt)
363
- st.session_state.analysis_results = results_json; st.session_state.error_message = error_msg
364
- elif not included_files_in_prompt: st.session_state.error_message = "Could not proceed: No files included."
365
- else: st.session_state.error_message = "Failed to generate analysis prompt."
 
 
 
366
  st.rerun()
367
 
368
  # Display results (Keep the same logic)
369
  if st.session_state.analysis_requested:
370
- with results_placeholder:
371
- st.divider()
372
- if st.session_state.error_message:
373
- st.error(f"Analysis Failed: {st.session_state.error_message}")
374
- if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
375
- st.subheader("Raw AI Response"); st.code(st.session_state.analysis_results["raw_response"], language='text')
376
- elif st.session_state.analysis_results:
377
- # Pass selected_analyses to display_results in case it's needed later
378
- display_results(st.session_state.analysis_results, selected_analyses)
379
- else: st.info("Analysis initiated, but no results/errors stored.")
380
- elif not uploaded_file: results_placeholder.info("Upload a ZIP file to begin.")
 
 
381
 
382
  results_placeholder.divider()
383
- results_placeholder.markdown("_Assistant powered by Google Gemini._")
 
3
  import zipfile
4
  import io
5
  import json
6
+ import os # Still needed for API key potentially, but not model names
7
  from pathlib import Path
8
  import time
9
 
10
  # --- Configuration ---
11
  # Model names are now discovered dynamically. Remove hardcoded names.
12
+ MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
13
  RESULTS_PAGE_SIZE = 25
14
 
15
+ AVAILABLE_ANALYSES = { # Keep analyses config
16
  "generate_docs": "Generate Missing Docstrings/Comments",
17
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
18
  "check_style": "Check Style Guide Compliance (General)",
19
  "summarize_modules": "Summarize Complex Modules/Files",
20
+ "suggest_refactoring": "Suggest Refactoring Opportunities",
21
  }
22
+ CODE_EXTENSIONS = {
23
+ '.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb',
24
+ '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
25
+ } # Keep extensions
26
 
27
  # --- Session State Initialization ---
28
  # (Keep most session state, add one for the selected model)
 
33
  if 'error_message' not in st.session_state:
34
  st.session_state.error_message = None
35
  if 'analysis_requested' not in st.session_state:
36
+ st.session_state.analysis_requested = False
37
  if 'selected_model_name' not in st.session_state:
38
+ st.session_state.selected_model_name = None # Will hold the "models/..." name
39
  if 'available_models_dict' not in st.session_state:
40
+ st.session_state.available_models_dict = {} # Store display_name -> name mapping
41
 
42
  # --- Gemini API Setup & Model Discovery ---
43
+ model = None # Global variable for the initialized model instance
44
 
45
  # --- NEW: Function to list available models ---
46
+ @st.cache_data(ttl=3600) # Cache model list for an hour
47
  def get_available_models():
48
  """Lists models supporting 'generateContent' using the API key."""
49
  model_dict = {}
 
64
  return model_dict
65
  except Exception as e:
66
  st.error(f"🚨 Error listing available models: {e}")
67
+ return {} # Return empty on error
68
 
69
  def initialize_gemini_model():
70
  """Initializes the Gemini model based on the selected name."""
 
75
  try:
76
  if 'GEMINI_API_KEY' not in st.secrets:
77
  st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
78
+ st.stop() # Stop if key missing for initialization
79
  # Configure API key (might be redundant if list_models worked, but safe)
80
  genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
81
  print(f"Initializing Gemini Model: {selected_name}")
 
85
  return True
86
  except Exception as e:
87
  st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
88
+ st.session_state.selected_model_name = None # Reset selection on error
89
  st.stop()
90
  return False
91
  elif st.session_state.mock_api_call:
92
+ return True # No init needed for mock mode
93
  elif model is not None and model.model_name == selected_name:
94
+ return True # Already initialized with the correct model
95
  elif model is not None and model.model_name != selected_name:
96
+ print("Model changed. Re-initializing...")
97
+ model = None # Reset model instance
98
+ return initialize_gemini_model() # Recurse to re-initialize with new name
99
  elif not selected_name and not st.session_state.mock_api_call:
100
+ # This case happens if no model is selected yet
101
+ return False # Cannot initialize without a selection
102
+ return False # Default case
103
 
104
  # --- Helper Functions ---
105
  # (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
106
  # call_gemini_api, display_results - remain the same as the optimized version)
 
 
107
 
108
+ def estimate_token_count(text):
109
+ """Estimates the number of tokens based on text length."""
110
+ return len(text) // 3
111
+
112
  @st.cache_data(max_entries=5)
113
  def process_zip_file_cached(file_id, file_size, file_content_bytes):
114
+ """
115
+ Processes a ZIP file and extracts code files.
116
+ Returns a tuple of (code_files dict, total_chars, file_count, ignored_files list).
117
+ """
118
+ code_files = {}
119
+ total_chars = 0
120
+ file_count = 0
121
+ ignored_files = []
122
+ status_placeholder = st.empty()
123
+ progress_bar = status_placeholder.progress(0)
124
  try:
125
  with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
126
+ members = zip_ref.infolist()
127
+ total_members = len(members)
128
  for i, member in enumerate(members):
129
+ if i % 10 == 0:
130
+ progress_bar.progress(int((i / total_members) * 100))
131
+ if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename:
132
+ continue
133
  file_path = Path(member.filename)
134
  if file_path.suffix.lower() in CODE_EXTENSIONS:
135
  try:
136
  with zip_ref.open(member) as file:
137
  file_bytes = file.read()
138
+ try:
139
+ content = file_bytes.decode('utf-8')
140
  except UnicodeDecodeError:
141
+ try:
142
+ content = file_bytes.decode('latin-1')
143
+ except Exception as decode_err:
144
+ ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
145
+ continue
146
+ code_files[member.filename] = content
147
+ total_chars += len(content)
148
+ file_count += 1
149
+ except Exception as read_err:
150
+ ignored_files.append(f"{member.filename} (Read Error: {read_err})")
151
  else:
152
  if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
153
+ ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
154
+ progress_bar.progress(100)
155
+ status_placeholder.empty()
156
+ except zipfile.BadZipFile:
157
+ status_placeholder.empty()
158
+ st.error("🚨 Invalid ZIP.")
159
+ return None, 0, 0, []
160
+ except Exception as e:
161
+ status_placeholder.empty()
162
+ st.error(f"🚨 ZIP Error: {e}")
163
+ return None, 0, 0, []
164
+ if file_count == 0:
165
+ if not ignored_files:
166
+ st.warning("No code files found.")
167
+ else:
168
+ st.warning("No code files found; some skipped.")
169
  return code_files, total_chars, file_count, ignored_files
170
 
 
 
171
  def construct_analysis_prompt(code_files_dict, requested_analyses):
172
+ """
173
+ Constructs the prompt for analysis by including code files and JSON structure for expected output.
174
+ Returns the full prompt and a list of included files.
175
+ """
176
+ prompt_parts = ["Analyze the following codebase...\n\n"]
177
+ current_token_estimate = estimate_token_count(prompt_parts[0])
178
+ included_files = []
179
+ code_segments = []
180
+ prompt_status = st.empty()
181
+
182
+ if len(code_files_dict) > 50:
183
+ prompt_status.info("Constructing prompt...")
184
+
185
  for filename, content in code_files_dict.items():
186
  segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
187
  segment_token_estimate = estimate_token_count(segment)
188
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
189
+ code_segments.append(segment)
190
+ current_token_estimate += segment_token_estimate
191
+ included_files.append(filename)
192
+ else:
193
+ st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
194
+ break
195
  prompt_status.empty()
196
+
197
+ if not included_files:
198
+ st.error("🚨 No code files included in prompt.")
199
+ return None, []
200
+
201
  prompt_parts.append("".join(code_segments))
202
+ json_structure_description = "{\n"
203
+ structure_parts = []
204
+
205
+ if "generate_docs" in requested_analyses:
206
+ structure_parts.append(' "documentation_suggestions": [...]')
207
+ if "find_bugs" in requested_analyses:
208
+ structure_parts.append(' "potential_bugs": [...]')
209
+ if "check_style" in requested_analyses:
210
+ structure_parts.append(' "style_issues": [...]')
211
+ if "summarize_modules" in requested_analyses:
212
+ structure_parts.append(' "module_summaries": [...]')
213
+ if "suggest_refactoring" in requested_analyses:
214
+ structure_parts.append(' "refactoring_suggestions": [...]')
215
+
216
  json_structure_description += ",\n".join(structure_parts) + "\n}"
217
  prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
218
  prompt_parts.append(prompt_footer)
219
+
220
  full_prompt = "".join(prompt_parts)
221
  return full_prompt, included_files
222
 
 
223
  def call_gemini_api(prompt):
224
+ """
225
+ Calls the Gemini API using the provided prompt.
226
+ Returns the parsed JSON insights or an error message.
227
+ """
228
+ if not prompt:
229
+ return None, "Prompt generation failed."
230
+
231
  # MOCK MODE
232
  if st.session_state.mock_api_call:
233
+ st.info(" MOCK MODE: Simulating API call...")
234
+ time.sleep(1)
235
+ mock_json_response = json.dumps({
236
+ "documentation_suggestions": [],
237
+ "potential_bugs": [],
238
+ "style_issues": [],
239
+ "module_summaries": [],
240
+ "refactoring_suggestions": []
241
+ })
242
+ st.success("Mock response generated.")
243
+ return json.loads(mock_json_response), None
244
  # REAL API CALL
245
  else:
246
+ if not initialize_gemini_model():
247
+ return None, "Gemini Model Initialization Failed."
248
+ if model is None:
249
+ return None, "Gemini model not selected or available." # Added check
250
  try:
251
  api_status = st.empty()
 
252
  api_status.info(f"πŸ“‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
253
+ start_time = time.time()
254
+ response = model.generate_content(
255
+ prompt,
256
+ generation_config=genai.types.GenerationConfig(temperature=0.2),
257
+ safety_settings=[
258
+ {"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
259
+ for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
260
+ "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
261
+ ]
262
+ )
263
+ end_time = time.time()
264
+ api_status.success(f"βœ… Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.")
265
+ time.sleep(1)
266
+ api_status.empty()
267
+ try:
268
  json_response_text = response.text.strip()
269
+ # Remove markdown code fences if present
270
+ if json_response_text.startswith("```json"):
271
+ json_response_text = json_response_text[7:]
272
+ if json_response_text.startswith("```"):
273
+ json_response_text = json_response_text[3:]
274
+ if json_response_text.endswith("```"):
275
+ json_response_text = json_response_text[:-3]
276
+ json_start = json_response_text.find('{')
277
+ json_end = json_response_text.rfind('}') + 1
278
  if json_start != -1 and json_end != -1 and json_end > json_start:
279
+ final_json_text = json_response_text[json_start:json_end]
280
+ insights = json.loads(final_json_text)
281
+ return insights, None
282
+ else:
283
+ st.warning("⚠️ Could not find valid JSON object.")
284
+ return {"raw_response": response.text}, "AI response did not contain clear JSON object."
285
+ except json.JSONDecodeError as json_err:
286
+ st.error(f"🚨 Error parsing JSON: {json_err}")
287
+ st.code(response.text, language='text')
288
+ return None, f"AI response not valid JSON: {json_err}"
289
+ except AttributeError:
290
+ st.error("🚨 Unexpected API response structure (AttributeError).")
291
+ st.code(f"Response object: {response}", language='text')
292
+ return None, "Unexpected response structure (AttributeError)."
293
+ except Exception as e:
294
+ st.error(f"🚨 Unexpected issue processing response: {e}")
295
+ try:
296
+ st.code(f"Response object: {response}", language='text')
297
+ except Exception:
298
+ pass
299
+ return None, f"Unexpected response structure: {e}"
300
+ except Exception as e:
301
+ api_status.empty()
302
+ st.error(f"🚨 API call error: {e}")
303
+ error_msg = f"API call failed: {e}"
304
  if hasattr(e, 'message'):
305
+ if "429" in e.message:
306
+ error_msg = "API Quota Exceeded or Rate Limit hit."
307
+ elif "API key not valid" in e.message:
308
+ error_msg = "Invalid Gemini API Key."
309
+ elif "permission denied" in e.message.lower():
310
+ error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
311
+ elif "blocked" in e.message.lower():
312
+ error_msg = "Content blocked due to safety settings."
313
+ elif "block_reason: SAFETY" in str(e):
314
+ error_msg = "Content blocked due to safety settings."
315
  return None, error_msg
316
 
 
317
  def display_results(results_json, requested_analyses):
318
+ """
319
+ Displays the analysis results with pagination and allows JSON download.
320
+ """
321
  st.header("πŸ“Š Analysis Report")
322
+ if not isinstance(results_json, dict):
323
+ st.error("Invalid results format.")
324
+ st.json(results_json)
325
+ return
326
+ if "raw_response" in results_json:
327
+ st.subheader("Raw AI Response (JSON Parsing Failed)")
328
+ st.code(results_json["raw_response"], language='text')
329
+ return
330
+
331
+ display_config = {
332
+ "generate_docs": {
333
+ "key": "documentation_suggestions",
334
+ "title": AVAILABLE_ANALYSES["generate_docs"],
335
+ "fields": {"file": "File", "line": "Line"}
336
+ },
337
+ "find_bugs": {
338
+ "key": "potential_bugs",
339
+ "title": AVAILABLE_ANALYSES["find_bugs"],
340
+ "fields": {"file": "File", "line": "Line", "severity": "Severity"}
341
+ },
342
+ "check_style": {
343
+ "key": "style_issues",
344
+ "title": AVAILABLE_ANALYSES["check_style"],
345
+ "fields": {"file": "File", "line": "Line"}
346
+ },
347
+ "summarize_modules": {
348
+ "key": "module_summaries",
349
+ "title": AVAILABLE_ANALYSES["summarize_modules"],
350
+ "fields": {"file": "File"}
351
+ },
352
+ "suggest_refactoring": {
353
+ "key": "refactoring_suggestions",
354
+ "title": AVAILABLE_ANALYSES["suggest_refactoring"],
355
+ "fields": {"file": "File", "line": "Line", "area": "Area"}
356
+ },
357
  }
358
  any_results_found = False
359
  for analysis_key in requested_analyses:
360
  if analysis_key in display_config:
361
+ config = display_config[analysis_key]
362
+ items = results_json.get(config["key"], [])
363
+ total_items = len(items)
364
+ st.subheader(f"{config['title']} ({total_items} found)")
365
  if items:
366
+ any_results_found = True
367
+ state_key = f"visible_{analysis_key}"
368
+ if state_key not in st.session_state:
369
+ st.session_state[state_key] = RESULTS_PAGE_SIZE
370
+ visible_count = st.session_state[state_key]
371
+ items_to_display = items[:visible_count]
372
+ for item in items_to_display:
373
+ details = [
374
+ f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file'
375
+ else f"**{field_label}:** {item.get(field_key, 'N/A')}"
376
+ for field_key, field_label in config["fields"].items()
377
+ if item.get(field_key, 'N/A') != 'N/A'
378
+ ]
379
  st.markdown("- " + " - ".join(details))
380
+ if 'suggestion' in item:
381
+ st.code(item['suggestion'], language='text')
382
+ elif 'description' in item:
383
+ st.markdown(f" > {item['description']}")
384
+ elif 'summary' in item:
385
+ st.markdown(f" > {item['summary']}")
386
+ if total_items > visible_count:
387
  if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
388
+ st.session_state[state_key] += RESULTS_PAGE_SIZE
389
+ st.rerun()
390
+ else:
391
+ st.markdown("_No items found for this category._")
392
  st.divider()
393
+ if not any_results_found:
394
+ st.info("No specific findings were identified.")
395
+ st.download_button(
396
+ label="Download Full Report (JSON)",
397
+ data=json.dumps(results_json, indent=4),
398
+ file_name="code_audit_report.json",
399
+ mime="application/json"
400
+ )
401
 
402
  # --- Streamlit App Main Interface ---
403
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
 
406
  # --- Sidebar ---
407
  with st.sidebar:
408
  st.header("βš™οΈ Analysis Controls")
409
+ st.session_state.mock_api_call = st.toggle(
410
+ "πŸ§ͺ Enable Mock API Mode",
411
+ value=st.session_state.mock_api_call,
412
+ help="Use fake data instead of calling Gemini API."
413
+ )
414
 
415
  st.divider()
416
  st.header("β™Š Select Model")
 
424
  # Try to find the index of the previously selected model
425
  current_model_display_name = None
426
  if st.session_state.selected_model_name:
427
+ # Find display name matching the stored internal name
428
+ for disp_name, internal_name in st.session_state.available_models_dict.items():
429
+ if internal_name == st.session_state.selected_model_name:
430
+ current_model_display_name = disp_name
431
+ break
432
 
433
  try:
434
  selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
435
  except ValueError:
436
+ selected_index = 0 # Default to first if previous selection not found
 
437
 
438
  selected_display_name = st.selectbox(
439
  "Choose Gemini model:",
 
446
  st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
447
  st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
448
  elif 'GEMINI_API_KEY' in st.secrets:
449
+ st.warning("No compatible models found or error listing models. Check API Key permissions.")
450
+ st.session_state.selected_model_name = None # Ensure no model selected
451
  else:
452
+ st.warning("Add GEMINI_API_KEY to secrets to list models.")
453
+ st.session_state.selected_model_name = None
454
+ else: # Mock mode is active
 
455
  st.info("Mock API Mode ACTIVE")
456
+ st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode
457
  # --- End Dynamic Model Selection ---
458
 
 
459
  st.divider()
460
  st.header("πŸ”Ž Select Analyses")
461
+ selected_analyses = [
462
+ key for key, name in AVAILABLE_ANALYSES.items()
463
+ if st.checkbox(name, value=True, key=f"cb_{key}")
464
+ ]
465
  st.divider()
466
+ st.header("πŸ“„ How To Use")
467
+ st.info(
468
+ "1. Set API Key.\n"
469
+ "2. Toggle Mock Mode if needed.\n"
470
+ "3. Select Model (if not Mock).\n"
471
+ "4. Select analyses.\n"
472
+ "5. Upload ZIP.\n"
473
+ "6. Click 'Analyze'.\n"
474
+ "7. Review report."
475
+ )
476
  st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
477
  st.divider()
478
  st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
479
 
 
480
  # Update title dynamically based on selected model
481
  if st.session_state.selected_model_name and not st.session_state.mock_api_call:
482
  st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
483
  elif st.session_state.mock_api_call:
484
+ st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
485
  else:
486
+ st.markdown("Upload codebase (`.zip`) for analysis.")
 
487
 
488
  # --- Main Content Area ---
489
+ uploaded_file = st.file_uploader(
490
+ "πŸ“ Upload Codebase ZIP File",
491
+ type=['zip'],
492
+ key="file_uploader",
493
+ on_change=lambda: st.session_state.update(
494
+ analysis_results=None,
495
+ error_message=None,
496
+ analysis_requested=False
497
+ )
498
+ )
499
  analysis_button_placeholder = st.empty()
500
  results_placeholder = st.container()
501
 
 
503
  st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
504
  uploaded_file_bytes = uploaded_file.getvalue()
505
  file_id = f"{uploaded_file.name}-{uploaded_file.size}"
506
+ code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
507
+ file_id, uploaded_file.size, uploaded_file_bytes
508
+ )
509
  if code_files is not None:
510
  st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
511
  if ignored_files:
512
+ with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
513
+ st.code("\n".join(ignored_files), language='text')
514
 
515
  # Disable button if no model selected (and not in mock mode)
516
  model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
517
  analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
518
  analyze_button_label = "Analyze Codebase"
519
+ if not model_ready:
520
+ analyze_button_label = "Select Model First"
521
+ elif analyze_button_disabled:
522
+ analyze_button_label = "Select Analyses or Upload Valid Code"
523
+
524
+ if analysis_button_placeholder.button(
525
+ analyze_button_label,
526
+ type="primary",
527
+ disabled=analyze_button_disabled
528
+ ):
529
+ st.session_state.analysis_requested = True
530
+ st.session_state.analysis_results = None
531
+ st.session_state.error_message = None
532
+ if not selected_analyses:
533
+ st.warning("Please select analysis types.")
534
+ elif file_count == 0:
535
+ st.warning("No relevant code files found.")
536
+ elif not model_ready:
537
+ st.warning("Please select a Gemini model from the sidebar.")
538
  else:
539
  with results_placeholder:
540
+ spinner_model_name = (
541
+ st.session_state.selected_model_name
542
+ if not st.session_state.mock_api_call
543
+ else "Mock Mode"
544
+ )
545
+ spinner_msg = f"πŸš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
546
+ with st.spinner(spinner_msg):
547
  analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
548
  if analysis_prompt and included_files_in_prompt:
549
  results_json, error_msg = call_gemini_api(analysis_prompt)
550
+ st.session_state.analysis_results = results_json
551
+ st.session_state.error_message = error_msg
552
+ elif not included_files_in_prompt:
553
+ st.session_state.error_message = "Could not proceed: No files included."
554
+ else:
555
+ st.session_state.error_message = "Failed to generate analysis prompt."
556
  st.rerun()
557
 
558
  # Display results (Keep the same logic)
559
  if st.session_state.analysis_requested:
560
+ with results_placeholder:
561
+ st.divider()
562
+ if st.session_state.error_message:
563
+ st.error(f"Analysis Failed: {st.session_state.error_message}")
564
+ if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
565
+ st.subheader("Raw AI Response")
566
+ st.code(st.session_state.analysis_results["raw_response"], language='text')
567
+ elif st.session_state.analysis_results:
568
+ display_results(st.session_state.analysis_results, selected_analyses)
569
+ else:
570
+ st.info("Analysis initiated, but no results/errors stored.")
571
+ elif not uploaded_file:
572
+ results_placeholder.info("Upload a ZIP file to begin.")
573
 
574
  results_placeholder.divider()
575
+ results_placeholder.markdown("_Assistant powered by Google Gemini._")