mgbam commited on
Commit
3a80282
Β·
verified Β·
1 Parent(s): ae429aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -214
app.py CHANGED
@@ -5,15 +5,12 @@ import io
5
  import json
6
  import os
7
  from pathlib import Path
 
8
 
9
  # --- Configuration ---
10
  GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
11
- # Maximum estimated tokens to try fitting into a single prompt
12
- # Adjust based on typical file sizes and Gemini limits/performance
13
- # 1M tokens is roughly 4MB-5MB of text, but structure matters. Start lower.
14
- MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Be conservative initially
15
 
16
- # Define the types of analysis available
17
  AVAILABLE_ANALYSES = {
18
  "generate_docs": "Generate Missing Docstrings/Comments",
19
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
@@ -22,26 +19,45 @@ AVAILABLE_ANALYSES = {
22
  "suggest_refactoring": "Suggest Refactoring Opportunities"
23
  }
24
 
25
- # Define common code file extensions to include
26
  CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'}
27
 
 
 
 
 
 
28
  # --- Gemini API Setup ---
29
- try:
30
- if 'GEMINI_API_KEY' not in st.secrets:
31
- st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
32
- st.stop()
33
- genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
34
- model = genai.GenerativeModel(GEMINI_MODEL_NAME)
35
- print("Gemini Model Initialized.")
36
- except Exception as e:
37
- st.error(f"🚨 Error initializing Gemini SDK: {e}")
38
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # --- Helper Functions ---
41
 
42
  def estimate_token_count(text):
43
- """Roughly estimate token count (4 chars per token is a common rule of thumb)."""
44
- return len(text) // 3 # Be generous here
45
 
46
  def process_zip_file(uploaded_file):
47
  """Extracts code files and their content from the uploaded zip file."""
@@ -53,16 +69,13 @@ def process_zip_file(uploaded_file):
53
  try:
54
  with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref:
55
  for member in zip_ref.infolist():
56
- # Skip directories and files in hidden folders like .git, __pycache__
57
- if member.is_dir() or member.filename.startswith('.') or '__' in member.filename:
58
  continue
59
 
60
  file_path = Path(member.filename)
61
- # Check if the file extension is in our allowed list
62
  if file_path.suffix.lower() in CODE_EXTENSIONS:
63
  try:
64
  with zip_ref.open(member) as file:
65
- # Decode defensively, try common encodings
66
  try:
67
  content = file.read().decode('utf-8')
68
  except UnicodeDecodeError:
@@ -70,7 +83,7 @@ def process_zip_file(uploaded_file):
70
  content = file.read().decode('latin-1')
71
  except Exception as decode_err:
72
  ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
73
- continue # Skip if undecodable
74
 
75
  code_files[member.filename] = content
76
  total_chars += len(content)
@@ -78,7 +91,9 @@ def process_zip_file(uploaded_file):
78
  except Exception as read_err:
79
  ignored_files.append(f"{member.filename} (Read Error: {read_err})")
80
  else:
81
- ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
 
 
82
 
83
  except zipfile.BadZipFile:
84
  st.error("🚨 Invalid or corrupted ZIP file.")
@@ -93,16 +108,14 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
93
  """Constructs the prompt for Gemini, including code content and JSON structure request."""
94
  prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
95
  current_token_estimate = estimate_token_count(prompt_content)
96
-
97
- # Concatenate file content with markers
98
  included_files = []
99
  concatenated_code = ""
 
100
  for filename, content in code_files_dict.items():
101
  file_marker = f"--- START FILE: {filename} ---\n"
102
  file_content = f"{content}\n"
103
  file_end_marker = f"--- END FILE: {filename} ---\n\n"
104
  segment = file_marker + file_content + file_end_marker
105
-
106
  segment_token_estimate = estimate_token_count(segment)
107
 
108
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
@@ -110,8 +123,8 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
110
  current_token_estimate += segment_token_estimate
111
  included_files.append(filename)
112
  else:
113
- st.warning(f"⚠️ Codebase likely exceeds context window estimate ({MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis will be performed only on the first {len(included_files)} files ({current_token_estimate} tokens). Consider analyzing smaller parts separately.")
114
- break # Stop adding files if limit reached
115
 
116
  if not included_files:
117
  st.error("🚨 No code files could be included within the estimated token limit.")
@@ -119,25 +132,22 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
119
 
120
  prompt_content += concatenated_code
121
 
122
- # Define the requested JSON structure based on selections
123
  json_structure_description = "{\n"
 
 
124
  if "generate_docs" in requested_analyses:
125
- json_structure_description += ' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}],\n'
126
  if "find_bugs" in requested_analyses:
127
- json_structure_description += ' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}],\n'
128
  if "check_style" in requested_analyses:
129
- json_structure_description += ' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}],\n'
130
  if "summarize_modules" in requested_analyses:
131
- json_structure_description += ' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}],\n'
132
  if "suggest_refactoring" in requested_analyses:
133
- json_structure_description += ' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}],\n'
134
-
135
- # Remove trailing comma and add closing brace
136
- if json_structure_description.endswith(',\n'):
137
- json_structure_description = json_structure_description[:-2] + "\n}"
138
- else:
139
- json_structure_description += "}" # Handle case where no sections selected (though UI should prevent)
140
 
 
 
141
 
142
  prompt_footer = f"""
143
  **Analysis Task:**
@@ -151,72 +161,136 @@ Respond ONLY with a single, valid JSON object adhering strictly to the following
151
  **JSON Output Only:**
152
  """
153
  full_prompt = prompt_content + prompt_footer
154
- # print(f"--- PROMPT (First 500 chars): ---\n{full_prompt[:500]}\n--------------------------") # Debug: Print start of prompt
 
155
  return full_prompt, included_files
156
 
157
 
158
  def call_gemini_api(prompt):
159
- """Calls the Gemini API and attempts to parse the JSON response."""
160
  if not prompt:
161
  return None, "Prompt generation failed."
162
 
163
- try:
164
- st.write(f"πŸ“‘ Sending request to {GEMINI_MODEL_NAME}...") # Progress update
165
- response = model.generate_content(
166
- prompt,
167
- generation_config=genai.types.GenerationConfig(
168
- # candidate_count=1, # Default is 1
169
- # stop_sequences=['...'], # Optional stop sequences
170
- # max_output_tokens=..., # Can be useful, but JSON structure might vary
171
- temperature=0.2 # Lower temperature for more deterministic code analysis
172
- ),
173
- safety_settings=[ # Adjust as needed, might need to be less strict for code
174
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
175
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
176
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
177
- # Be cautious with dangerous content, code analysis might trigger it
178
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
179
- ]
180
- )
181
- st.write("βœ… Response received from AI.")
182
-
183
- # Debug: Print raw response
184
- # print(f"--- RAW API RESPONSE ---\n{response.text}\n------------------------")
185
-
186
- # Attempt to parse the JSON response - more robust extraction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  try:
188
- # Find the start and end of the JSON block
189
- json_start = response.text.find('{')
190
- json_end = response.text.rfind('}') + 1
191
- if json_start != -1 and json_end != -1:
192
- json_response_text = response.text[json_start:json_end]
193
- insights = json.loads(json_response_text)
194
- return insights, None
195
- else:
196
- # Fallback if no {} found - maybe simple text response?
197
- st.warning("⚠️ Could not find JSON structure in response. Displaying raw text.")
198
- return {"raw_response": response.text}, "AI response was not valid JSON, showing raw text."
199
-
200
- except json.JSONDecodeError as json_err:
201
- st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
202
- st.error("Raw AI Response:")
203
- st.code(response.text, language='text')
204
- return None, f"AI response was not valid JSON: {json_err}"
205
- except Exception as e:
206
- st.error(f"🚨 Unexpected issue processing AI response: {e}")
207
- try: st.code(f"Response object: {response}", language='text')
208
- except: pass
209
- return None, f"Unexpected response structure: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- except Exception as e:
212
- st.error(f"🚨 An error occurred during API call: {e}")
213
- # Add specific error checks if possible (e.g., quota, safety blocks)
214
- error_msg = f"API call failed: {e}"
215
- if "block_reason: SAFETY" in str(e):
216
- error_msg = "Content blocked due to safety settings. Code or AI response may have triggered filters."
217
- elif "429" in str(e): # Crude check for quota error
218
- error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google AI Studio dashboard."
219
- return None, error_msg
 
 
 
 
 
 
220
 
221
 
222
  def display_results(results_json, requested_analyses):
@@ -225,72 +299,73 @@ def display_results(results_json, requested_analyses):
225
 
226
  if not isinstance(results_json, dict):
227
  st.error("Invalid results format received.")
228
- st.json(results_json) # Show what was received
229
  return
230
 
231
- # Handle raw response fallback
232
  if "raw_response" in results_json:
233
  st.subheader("Raw AI Response (JSON Parsing Failed)")
234
  st.code(results_json["raw_response"], language='text')
235
  return
236
 
237
- # Display each requested section
238
- if "generate_docs" in requested_analyses:
239
- st.subheader(AVAILABLE_ANALYSES["generate_docs"])
240
- suggestions = results_json.get("documentation_suggestions", [])
241
- if suggestions:
242
- for item in suggestions:
243
- st.markdown(f"- **File:** `{item.get('file', 'N/A')}` (Line: {item.get('line', 'N/A')})")
244
- st.code(item.get('suggestion', ''), language='text') # Show suggestion as code/text
245
- else:
246
- st.markdown("_No documentation suggestions provided._")
247
- st.divider()
248
-
249
- if "find_bugs" in requested_analyses:
250
- st.subheader(AVAILABLE_ANALYSES["find_bugs"])
251
- bugs = results_json.get("potential_bugs", [])
252
- if bugs:
253
- for item in bugs:
254
- st.markdown(f"- **File:** `{item.get('file', 'N/A')}` (Line: {item.get('line', 'N/A')}) - **Severity:** {item.get('severity', 'Unknown')}")
255
- st.markdown(f" Description: {item.get('description', 'N/A')}")
256
- else:
257
- st.markdown("_No potential bugs identified._")
258
- st.divider()
259
-
260
- if "check_style" in requested_analyses:
261
- st.subheader(AVAILABLE_ANALYSES["check_style"])
262
- issues = results_json.get("style_issues", [])
263
- if issues:
264
- for item in issues:
265
- st.markdown(f"- **File:** `{item.get('file', 'N/A')}` (Line: {item.get('line', 'N/A')})")
266
- st.markdown(f" Issue: {item.get('description', 'N/A')}")
267
- else:
268
- st.markdown("_No style issues identified._")
269
- st.divider()
270
-
271
- if "summarize_modules" in requested_analyses:
272
- st.subheader(AVAILABLE_ANALYSES["summarize_modules"])
273
- summaries = results_json.get("module_summaries", [])
274
- if summaries:
275
- for item in summaries:
276
- st.markdown(f"**File:** `{item.get('file', 'N/A')}`")
277
- st.markdown(f"> {item.get('summary', 'N/A')}") # Blockquote for summary
278
- else:
279
- st.markdown("_No module summaries provided._")
280
- st.divider()
281
-
282
- if "suggest_refactoring" in requested_analyses:
283
- st.subheader(AVAILABLE_ANALYSES["suggest_refactoring"])
284
- suggestions = results_json.get("refactoring_suggestions", [])
285
- if suggestions:
286
- for item in suggestions:
287
- st.markdown(f"- **File:** `{item.get('file', 'N/A')}` (Line: {item.get('line', 'N/A')}) - **Area:** {item.get('area', 'N/A')}")
288
- st.markdown(f" Suggestion: {item.get('suggestion', 'N/A')}")
289
  else:
290
- st.markdown("_No refactoring suggestions provided._")
291
  st.divider()
292
 
293
- # Option to download the raw JSON results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  st.download_button(
295
  label="Download Full Report (JSON)",
296
  data=json.dumps(results_json, indent=4),
@@ -298,85 +373,112 @@ def display_results(results_json, requested_analyses):
298
  mime="application/json"
299
  )
300
 
301
-
302
  # --- Streamlit App Main Interface ---
303
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
304
 
305
  st.title("πŸ€– Codebase Audit & Documentation Assistant")
306
  st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
307
- st.warning("⚠️ **Privacy Notice:** Your code content will be sent to the Google Gemini API for analysis. Do not upload highly sensitive or proprietary code if you are not comfortable with this.")
308
-
309
- # Sidebar for options
310
- st.sidebar.header("πŸ› οΈ Analysis Options")
311
- selected_analyses = []
312
- for key, name in AVAILABLE_ANALYSES.items():
313
- if st.sidebar.checkbox(name, value=True, key=f"cb_{key}"):
314
- selected_analyses.append(key)
315
-
316
- st.sidebar.header("πŸ“„ How To Use")
317
- st.sidebar.info(
318
- "1. Ensure `GEMINI_API_KEY` is in `.streamlit/secrets.toml`.\n"
319
- "2. Select desired analyses in the sidebar.\n"
320
- "3. Create a **ZIP archive** of your codebase.\n"
321
- "4. Upload the `.zip` file below.\n"
322
- "5. Click 'Analyze Codebase'.\n"
323
- "6. Review the report generated."
324
- )
325
- st.sidebar.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) within the ZIP will be processed. Analysis might be limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
 
328
  # Main content area
329
- uploaded_file = st.file_uploader("πŸ“ Upload Codebase ZIP File", type=['zip'])
 
 
 
330
 
331
  if uploaded_file:
332
- st.success(f"βœ… File '{uploaded_file.name}' uploaded successfully.")
333
 
334
- # Process the zip file immediately to give feedback
335
  with st.spinner("Inspecting ZIP file..."):
336
  code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file)
337
 
338
  if code_files is not None:
339
- st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Estimated tokens: ~{estimate_token_count(total_chars):,}")
340
  if ignored_files:
341
  with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
342
- st.json(ignored_files)
343
-
344
- # Analysis Button
345
- analyze_button = st.button("Analyze Codebase", type="primary", disabled=(not selected_analyses or file_count == 0))
346
-
347
- if not selected_analyses and analyze_button:
348
- st.warning("Please select at least one analysis type from the sidebar.")
349
- elif file_count == 0 and analyze_button:
350
- st.warning("No relevant code files found in the ZIP archive to analyze.")
351
-
352
-
353
- if analyze_button and selected_analyses and file_count > 0:
354
- st.divider()
355
- with st.spinner(f"πŸš€ Preparing prompt and contacting {GEMINI_MODEL_NAME}... This may take several minutes for large codebases."):
356
- # 1. Construct the prompt
357
- analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
358
-
359
- if analysis_prompt and included_files_in_prompt:
360
- st.write(f"Analyzing {len(included_files_in_prompt)} files...")
361
- # 2. Call the API
362
- results_json, error_message = call_gemini_api(analysis_prompt)
363
-
364
- # 3. Display Results
365
- if error_message:
366
- st.error(f"Analysis Failed: {error_message}")
367
- elif results_json:
368
- display_results(results_json, selected_analyses)
369
- else:
370
- st.error("Analysis did not return results or an unknown error occurred.")
371
- elif not included_files_in_prompt:
372
- st.error("Could not proceed: No files were included in the prompt (likely due to token limits or processing errors).")
373
-
374
-
 
 
 
 
 
 
 
 
 
 
 
375
  else:
376
- # Error message already shown by process_zip_file
377
- pass
378
 
379
- else:
380
  st.info("Upload a ZIP file containing your source code to begin.")
381
 
382
  st.divider()
 
5
  import json
6
  import os
7
  from pathlib import Path
8
+ import time # Added for simulating mock delay
9
 
10
  # --- Configuration ---
11
  GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
12
+ MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed
 
 
 
13
 
 
14
  AVAILABLE_ANALYSES = {
15
  "generate_docs": "Generate Missing Docstrings/Comments",
16
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
 
19
  "suggest_refactoring": "Suggest Refactoring Opportunities"
20
  }
21
 
 
22
  CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'}
23
 
24
+ # --- Session State Initialization ---
25
+ # Initialize session state for mock mode toggle if it doesn't exist
26
+ if 'mock_api_call' not in st.session_state:
27
+ st.session_state.mock_api_call = False # Default to using the real API
28
+
29
  # --- Gemini API Setup ---
30
+ # Defer full initialization until needed if mock mode might be used first
31
+ model = None
32
+ def initialize_gemini_model():
33
+ global model
34
+ if model is None and not st.session_state.mock_api_call:
35
+ try:
36
+ if 'GEMINI_API_KEY' not in st.secrets:
37
+ st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
38
+ st.stop()
39
+ genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
40
+ model = genai.GenerativeModel(GEMINI_MODEL_NAME)
41
+ print("Gemini Model Initialized.")
42
+ return True
43
+ except Exception as e:
44
+ st.error(f"🚨 Error initializing Gemini SDK: {e}")
45
+ st.stop()
46
+ return False
47
+ elif st.session_state.mock_api_call:
48
+ print("Running in Mock Mode. Skipping Gemini initialization.")
49
+ return True # Allow proceeding in mock mode
50
+ elif model is not None:
51
+ print("Gemini Model already initialized.")
52
+ return True
53
+ return False
54
+
55
 
56
  # --- Helper Functions ---
57
 
58
  def estimate_token_count(text):
59
+ """Roughly estimate token count (3-4 chars per token)."""
60
+ return len(text) // 3
61
 
62
  def process_zip_file(uploaded_file):
63
  """Extracts code files and their content from the uploaded zip file."""
 
69
  try:
70
  with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref:
71
  for member in zip_ref.infolist():
72
+ if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
 
73
  continue
74
 
75
  file_path = Path(member.filename)
 
76
  if file_path.suffix.lower() in CODE_EXTENSIONS:
77
  try:
78
  with zip_ref.open(member) as file:
 
79
  try:
80
  content = file.read().decode('utf-8')
81
  except UnicodeDecodeError:
 
83
  content = file.read().decode('latin-1')
84
  except Exception as decode_err:
85
  ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
86
+ continue
87
 
88
  code_files[member.filename] = content
89
  total_chars += len(content)
 
91
  except Exception as read_err:
92
  ignored_files.append(f"{member.filename} (Read Error: {read_err})")
93
  else:
94
+ # Only add to ignored if it's not explicitly ignored by path rules above
95
+ if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
96
+ ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
97
 
98
  except zipfile.BadZipFile:
99
  st.error("🚨 Invalid or corrupted ZIP file.")
 
108
  """Constructs the prompt for Gemini, including code content and JSON structure request."""
109
  prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
110
  current_token_estimate = estimate_token_count(prompt_content)
 
 
111
  included_files = []
112
  concatenated_code = ""
113
+
114
  for filename, content in code_files_dict.items():
115
  file_marker = f"--- START FILE: {filename} ---\n"
116
  file_content = f"{content}\n"
117
  file_end_marker = f"--- END FILE: {filename} ---\n\n"
118
  segment = file_marker + file_content + file_end_marker
 
119
  segment_token_estimate = estimate_token_count(segment)
120
 
121
  if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
 
123
  current_token_estimate += segment_token_estimate
124
  included_files.append(filename)
125
  else:
126
+ st.warning(f"⚠️ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).")
127
+ break
128
 
129
  if not included_files:
130
  st.error("🚨 No code files could be included within the estimated token limit.")
 
132
 
133
  prompt_content += concatenated_code
134
 
 
135
  json_structure_description = "{\n"
136
+ # Dynamically build the JSON structure based on selection
137
+ structure_parts = []
138
  if "generate_docs" in requested_analyses:
139
+ structure_parts.append(' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}]')
140
  if "find_bugs" in requested_analyses:
141
+ structure_parts.append(' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}]')
142
  if "check_style" in requested_analyses:
143
+ structure_parts.append(' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}]')
144
  if "summarize_modules" in requested_analyses:
145
+ structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
146
  if "suggest_refactoring" in requested_analyses:
147
+ structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
 
 
 
 
 
 
148
 
149
+ json_structure_description += ",\n".join(structure_parts)
150
+ json_structure_description += "\n}"
151
 
152
  prompt_footer = f"""
153
  **Analysis Task:**
 
161
  **JSON Output Only:**
162
  """
163
  full_prompt = prompt_content + prompt_footer
164
+ # print(f"--- PROMPT (First 500 chars): ---\n{full_prompt[:500]}\n--------------------------")
165
+ # print(f"--- PROMPT (Last 500 chars): ---\n{full_prompt[-500:]}\n--------------------------")
166
  return full_prompt, included_files
167
 
168
 
169
  def call_gemini_api(prompt):
170
+ """Calls the Gemini API or returns mock data based on session state."""
171
  if not prompt:
172
  return None, "Prompt generation failed."
173
 
174
+ # --- MOCK MODE LOGIC ---
175
+ if st.session_state.mock_api_call:
176
+ st.info(" MOCK MODE: Simulating API call...")
177
+ time.sleep(2) # Simulate network/processing delay
178
+
179
+ # --- CHOOSE YOUR MOCK RESPONSE ---
180
+ # Option 1: Simulate successful response with some data
181
+ mock_json_response = json.dumps({
182
+ "documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
183
+ "potential_bugs": [{"file":"mock/utils.py", "line": 22, "description":"Potential division by zero if denominator is not checked.", "severity":"Medium"}],
184
+ "style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}],
185
+ "module_summaries": [{"file": "mock/core.py", "summary": "This file contains the core mock processing logic."}, {"file":"mock/utils.py", "summary": "Utility functions for mocking."}],
186
+ "refactoring_suggestions": [{"file":"mock/utils.py", "line": 30, "area":"calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}]
187
+ })
188
+ st.success("Mock response generated successfully.")
189
+ return json.loads(mock_json_response), None # Return insights, no error
190
+
191
+ # Option 2: Simulate API error
192
+ # st.error("Simulating API error.")
193
+ # return None, "MOCK ERROR: Simulated API Quota Exceeded."
194
+
195
+ # Option 3: Simulate invalid JSON response
196
+ # st.warning("Simulating invalid JSON response from AI.")
197
+ # return {"raw_response": "{malformed json'"}, "AI response was not valid JSON, showing raw text."
198
+ #
199
+ # Option 4: Simulate empty results
200
+ # mock_empty_json = json.dumps({
201
+ # "documentation_suggestions": [], "potential_bugs": [], "style_issues": [],
202
+ # "module_summaries": [], "refactoring_suggestions": []
203
+ # })
204
+ # st.success("Mock response generated (empty results).")
205
+ # return json.loads(mock_empty_json), None
206
+ # --- END MOCK MODE LOGIC ---
207
+
208
+
209
+ # --- REAL API CALL LOGIC ---
210
+ else:
211
+ if not initialize_gemini_model(): # Ensure model is ready
212
+ return None, "Gemini Model Initialization Failed."
213
+ if model is None: # Should not happen if initialize check passed, but safeguard
214
+ return None, "Gemini model not available."
215
+
216
  try:
217
+ st.write(f"πŸ“‘ Sending request to {GEMINI_MODEL_NAME}...")
218
+ response = model.generate_content(
219
+ prompt,
220
+ generation_config=genai.types.GenerationConfig(temperature=0.2),
221
+ safety_settings=[
222
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
223
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
224
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
225
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
226
+ ]
227
+ )
228
+ st.write("βœ… Response received from AI.")
229
+
230
+ # Debug: Print raw response text
231
+ # print(f"--- RAW API RESPONSE ---\n{response.text}\n------------------------")
232
+
233
+ try:
234
+ # Try to extract JSON robustly
235
+ json_response_text = response.text.strip()
236
+ # Handle potential markdown code block fences
237
+ if json_response_text.startswith("```json"):
238
+ json_response_text = json_response_text[7:]
239
+ if json_response_text.startswith("```"): # Handle case where ```json wasn't used
240
+ json_response_text = json_response_text[3:]
241
+ if json_response_text.endswith("```"):
242
+ json_response_text = json_response_text[:-3]
243
+
244
+ # Find the first '{' and the last '}'
245
+ json_start = json_response_text.find('{')
246
+ json_end = json_response_text.rfind('}') + 1
247
+
248
+ if json_start != -1 and json_end != -1 and json_end > json_start:
249
+ final_json_text = json_response_text[json_start:json_end]
250
+ insights = json.loads(final_json_text)
251
+ return insights, None
252
+ else:
253
+ st.warning("⚠️ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.")
254
+ return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
255
+
256
+ except json.JSONDecodeError as json_err:
257
+ st.error(f"🚨 Error parsing JSON response from AI: {json_err}")
258
+ st.error("Raw AI Response:")
259
+ st.code(response.text, language='text')
260
+ return None, f"AI response was not valid JSON: {json_err}"
261
+ except AttributeError:
262
+ # Handle cases where response structure might be different (e.g. blocked)
263
+ st.error(f"🚨 Unexpected API response structure.")
264
+ st.code(f"Response object: {response}", language='text') # Log the problematic response
265
+ # Try to get blocked reason if available
266
+ try:
267
+ block_reason = response.prompt_feedback.block_reason
268
+ if block_reason:
269
+ return None, f"Content blocked by API. Reason: {block_reason}"
270
+ except Exception:
271
+ pass # Ignore if feedback structure isn't as expected
272
+ return None, "Unexpected response structure from API."
273
+ except Exception as e:
274
+ st.error(f"🚨 Unexpected issue processing AI response: {e}")
275
+ try: st.code(f"Response object: {response}", language='text')
276
+ except: pass
277
+ return None, f"Unexpected response structure: {e}"
278
 
279
+ except Exception as e:
280
+ st.error(f"🚨 An error occurred during API call: {e}")
281
+ error_msg = f"API call failed: {e}"
282
+ # Improved error identification
283
+ if hasattr(e, 'message'): # For google.api_core.exceptions
284
+ if "429" in e.message:
285
+ error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard."
286
+ elif "API key not valid" in e.message:
287
+ error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`."
288
+ elif "blocked" in e.message.lower(): # General check for safety blocks
289
+ error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
290
+ elif "block_reason: SAFETY" in str(e): # Fallback check
291
+ error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
292
+
293
+ return None, error_msg
294
 
295
 
296
  def display_results(results_json, requested_analyses):
 
299
 
300
  if not isinstance(results_json, dict):
301
  st.error("Invalid results format received.")
302
+ st.json(results_json)
303
  return
304
 
 
305
  if "raw_response" in results_json:
306
  st.subheader("Raw AI Response (JSON Parsing Failed)")
307
  st.code(results_json["raw_response"], language='text')
308
  return
309
 
310
+ # Define display functions for clarity
311
+ def display_list_items(items, fields):
312
+ if items:
313
+ for item in items:
314
+ details = []
315
+ for field_key, field_label in fields.items():
316
+ value = item.get(field_key, 'N/A')
317
+ if value != 'N/A': # Only show if value exists
318
+ details.append(f"**{field_label}:** {value}")
319
+ st.markdown("- " + " - ".join(details))
320
+ # Handle specific multi-line outputs like suggestions/summaries
321
+ if 'suggestion' in item:
322
+ st.code(item['suggestion'], language='text')
323
+ elif 'description' in item:
324
+ st.markdown(f" > {item['description']}") # Indent description
325
+ elif 'summary' in item:
326
+ st.markdown(f" > {item['summary']}") # Indent summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  else:
328
+ st.markdown("_No items found for this category._")
329
  st.divider()
330
 
331
+ # Map keys to display configurations
332
+ display_config = {
333
+ "generate_docs": {
334
+ "key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"],
335
+ "fields": {"file": "File", "line": "Line"} # Suggestion shown by st.code
336
+ },
337
+ "find_bugs": {
338
+ "key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"],
339
+ "fields": {"file": "File", "line": "Line", "severity": "Severity"} # Description shown separately
340
+ },
341
+ "check_style": {
342
+ "key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"],
343
+ "fields": {"file": "File", "line": "Line"} # Description shown separately
344
+ },
345
+ "summarize_modules": {
346
+ "key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"],
347
+ "fields": {"file": "File"} # Summary shown separately
348
+ },
349
+ "suggest_refactoring": {
350
+ "key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"],
351
+ "fields": {"file": "File", "line": "Line", "area": "Area"} # Suggestion shown separately
352
+ }
353
+ }
354
+
355
+ # Iterate and display selected sections
356
+ any_results = False
357
+ for analysis_key in requested_analyses:
358
+ if analysis_key in display_config:
359
+ config = display_config[analysis_key]
360
+ st.subheader(config["title"])
361
+ items = results_json.get(config["key"], [])
362
+ display_list_items(items, config["fields"])
363
+ if items: any_results = True
364
+
365
+ if not any_results:
366
+ st.info("No specific findings were identified in the analysis based on your selections.")
367
+
368
+ # Download button
369
  st.download_button(
370
  label="Download Full Report (JSON)",
371
  data=json.dumps(results_json, indent=4),
 
373
  mime="application/json"
374
  )
375
 
 
376
  # --- Streamlit App Main Interface ---
377
  st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
378
 
379
  st.title("πŸ€– Codebase Audit & Documentation Assistant")
380
  st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
381
+
382
+ # Sidebar controls
383
+ with st.sidebar:
384
+ st.header("βš™οΈ Analysis Controls")
385
+ # Mock Mode Toggle
386
+ st.session_state.mock_api_call = st.toggle("πŸ§ͺ Enable Mock API Mode (for Testing)", value=st.session_state.mock_api_call,
387
+ help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing.")
388
+ if st.session_state.mock_api_call:
389
+ st.info("Mock API Mode ACTIVE")
390
+ else:
391
+ st.info("Using REAL Gemini API")
392
+
393
+ st.divider()
394
+ st.header("πŸ”Ž Select Analyses")
395
+ selected_analyses = []
396
+ for key, name in AVAILABLE_ANALYSES.items():
397
+ if st.checkbox(name, value=True, key=f"cb_{key}"):
398
+ selected_analyses.append(key)
399
+
400
+ st.divider()
401
+ st.header("πŸ“„ How To Use")
402
+ st.info(
403
+ "1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n"
404
+ "2. Toggle Mock Mode if needed.\n"
405
+ "3. Select desired analyses.\n"
406
+ "4. Create a **ZIP archive** of your codebase.\n"
407
+ "5. Upload the `.zip` file.\n"
408
+ "6. Click 'Analyze Codebase'.\n"
409
+ "7. Review the report."
410
+ )
411
+ st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
412
+
413
+ st.divider()
414
+ st.warning("⚠️ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.")
415
 
416
 
417
  # Main content area
418
+ uploaded_file = st.file_uploader("πŸ“ Upload Codebase ZIP File", type=['zip'], key="file_uploader")
419
+
420
+ analysis_triggered = False
421
+ results_cache = None # To store results briefly
422
 
423
  if uploaded_file:
424
+ st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
425
 
 
426
  with st.spinner("Inspecting ZIP file..."):
427
  code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file)
428
 
429
  if code_files is not None:
430
+ st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
431
  if ignored_files:
432
  with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
433
+ # Use st.code for better formatting of list
434
+ st.code("\n".join(ignored_files), language='text')
435
+
436
+ analyze_button_disabled = (not selected_analyses or file_count == 0)
437
+ analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
438
+ if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
439
+ analysis_triggered = True
440
+
441
+ if not selected_analyses:
442
+ st.warning("Please select at least one analysis type from the sidebar.")
443
+ elif file_count == 0:
444
+ st.warning("No relevant code files found in the ZIP archive to analyze.")
445
+ else:
446
+ st.divider()
447
+ with st.spinner(f"πŸš€ Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."):
448
+ # 1. Construct Prompt
449
+ analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
450
+
451
+ if analysis_prompt and included_files_in_prompt:
452
+ st.write(f"Analyzing {len(included_files_in_prompt)} files...")
453
+ # 2. Call API (Real or Mock)
454
+ results_json, error_message = call_gemini_api(analysis_prompt)
455
+ results_cache = (results_json, error_message) # Store results
456
+ elif not included_files_in_prompt:
457
+ results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).")
458
+ else:
459
+ results_cache = (None, "Failed to generate analysis prompt.")
460
+
461
+ else: # Error during zip processing
462
+ pass # Error message already shown
463
+
464
+ # Display results outside the button click block if analysis was triggered
465
+ if analysis_triggered and results_cache:
466
+ results_json, error_message = results_cache
467
+ st.divider()
468
+ if error_message:
469
+ st.error(f"Analysis Failed: {error_message}")
470
+ # Display partial results if available (e.g., raw response on JSON error)
471
+ if results_json and isinstance(results_json, dict) and "raw_response" in results_json:
472
+ st.subheader("Raw AI Response")
473
+ st.code(results_json["raw_response"], language='text')
474
+
475
+ elif results_json:
476
+ display_results(results_json, selected_analyses)
477
  else:
478
+ st.error("Analysis did not return results or an unknown error occurred.")
479
+
480
 
481
+ elif not uploaded_file:
482
  st.info("Upload a ZIP file containing your source code to begin.")
483
 
484
  st.divider()