Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,34 +5,38 @@ import io
|
|
5 |
import json
|
6 |
import os
|
7 |
from pathlib import Path
|
8 |
-
import time
|
9 |
|
10 |
# --- Configuration ---
|
11 |
GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
|
12 |
-
MAX_PROMPT_TOKENS_ESTIMATE = 800000
|
|
|
13 |
|
14 |
AVAILABLE_ANALYSES = {
|
|
|
15 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
16 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
17 |
"check_style": "Check Style Guide Compliance (General)",
|
18 |
"summarize_modules": "Summarize Complex Modules/Files",
|
19 |
"suggest_refactoring": "Suggest Refactoring Opportunities"
|
20 |
}
|
21 |
-
|
22 |
-
CODE_EXTENSIONS = {
|
23 |
-
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go',
|
24 |
-
'.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
|
25 |
-
}
|
26 |
|
27 |
# --- Session State Initialization ---
|
28 |
if 'mock_api_call' not in st.session_state:
|
29 |
-
st.session_state.mock_api_call = False
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# --- Gemini API Setup ---
|
32 |
model = None
|
33 |
|
34 |
def initialize_gemini_model():
|
35 |
-
"""Initializes the Gemini model
|
36 |
global model
|
37 |
if model is None and not st.session_state.mock_api_call:
|
38 |
try:
|
@@ -48,38 +52,39 @@ def initialize_gemini_model():
|
|
48 |
st.stop()
|
49 |
return False
|
50 |
elif st.session_state.mock_api_call:
|
51 |
-
|
52 |
return True # Allow proceeding in mock mode
|
53 |
elif model is not None:
|
54 |
-
|
55 |
return True
|
56 |
return False
|
57 |
|
58 |
# --- Helper Functions ---
|
59 |
|
60 |
def estimate_token_count(text):
|
61 |
-
"""Roughly estimate token count (
|
62 |
return len(text) // 3
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
Returns:
|
69 |
-
code_files (dict): Mapping of file paths to content.
|
70 |
-
total_chars (int): Total number of characters in included files.
|
71 |
-
file_count (int): Count of processed code files.
|
72 |
-
ignored_files (list): List of files skipped or not processed.
|
73 |
-
"""
|
74 |
code_files = {}
|
75 |
total_chars = 0
|
76 |
file_count = 0
|
77 |
ignored_files = []
|
|
|
|
|
78 |
|
79 |
try:
|
80 |
-
with zipfile.ZipFile(io.BytesIO(
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
|
84 |
continue
|
85 |
|
@@ -87,11 +92,12 @@ def process_zip_file(uploaded_file):
|
|
87 |
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
88 |
try:
|
89 |
with zip_ref.open(member) as file:
|
|
|
90 |
try:
|
91 |
-
content =
|
92 |
except UnicodeDecodeError:
|
93 |
try:
|
94 |
-
content =
|
95 |
except Exception as decode_err:
|
96 |
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
|
97 |
continue
|
@@ -102,31 +108,42 @@ def process_zip_file(uploaded_file):
|
|
102 |
except Exception as read_err:
|
103 |
ignored_files.append(f"{member.filename} (Read Error: {read_err})")
|
104 |
else:
|
105 |
-
# Only add to ignored if it's not explicitly ignored by path rules above
|
106 |
if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
|
107 |
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
|
108 |
|
|
|
|
|
|
|
109 |
except zipfile.BadZipFile:
|
|
|
110 |
st.error("π¨ Invalid or corrupted ZIP file.")
|
111 |
return None, 0, 0, []
|
112 |
except Exception as e:
|
|
|
113 |
st.error(f"π¨ Error processing ZIP file: {e}")
|
114 |
return None, 0, 0, []
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
return code_files, total_chars, file_count, ignored_files
|
117 |
|
118 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
119 |
-
"""
|
120 |
-
|
121 |
-
|
122 |
-
Returns:
|
123 |
-
full_prompt (str): The complete prompt.
|
124 |
-
included_files (list): List of file names included in the prompt.
|
125 |
-
"""
|
126 |
-
prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
|
127 |
-
current_token_estimate = estimate_token_count(prompt_content)
|
128 |
included_files = []
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
for filename, content in code_files_dict.items():
|
132 |
file_marker = f"--- START FILE: {filename} ---\n"
|
@@ -136,20 +153,23 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
|
|
136 |
segment_token_estimate = estimate_token_count(segment)
|
137 |
|
138 |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
|
139 |
-
|
140 |
current_token_estimate += segment_token_estimate
|
141 |
included_files.append(filename)
|
142 |
else:
|
143 |
-
st.warning(f"β οΈ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).")
|
144 |
break
|
145 |
|
|
|
|
|
146 |
if not included_files:
|
147 |
st.error("π¨ No code files could be included within the estimated token limit.")
|
148 |
return None, []
|
149 |
|
150 |
-
|
|
|
151 |
|
152 |
-
#
|
153 |
json_structure_description = "{\n"
|
154 |
structure_parts = []
|
155 |
if "generate_docs" in requested_analyses:
|
@@ -162,6 +182,7 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
|
|
162 |
structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
|
163 |
if "suggest_refactoring" in requested_analyses:
|
164 |
structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
|
|
|
165 |
json_structure_description += ",\n".join(structure_parts)
|
166 |
json_structure_description += "\n}"
|
167 |
|
@@ -176,40 +197,32 @@ Respond ONLY with a single, valid JSON object adhering strictly to the following
|
|
176 |
|
177 |
**JSON Output Only:**
|
178 |
"""
|
179 |
-
|
|
|
180 |
return full_prompt, included_files
|
181 |
|
182 |
def call_gemini_api(prompt):
|
183 |
-
"""
|
184 |
-
Calls the Gemini API (or simulates it in mock mode) with the provided prompt.
|
185 |
-
|
186 |
-
Returns:
|
187 |
-
insights (dict): The parsed JSON response from the API.
|
188 |
-
error_message (str): An error message if something went wrong.
|
189 |
-
"""
|
190 |
if not prompt:
|
191 |
return None, "Prompt generation failed."
|
192 |
|
193 |
-
#
|
194 |
if st.session_state.mock_api_call:
|
195 |
-
st.info("
|
196 |
-
|
|
|
197 |
|
198 |
-
# Simulated successful response
|
199 |
mock_json_response = json.dumps({
|
200 |
"documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
|
201 |
"potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
|
202 |
-
"style_issues": [
|
203 |
-
"module_summaries": [
|
204 |
-
|
205 |
-
{"file": "mock/utils.py", "summary": "Utility functions for mocking."}
|
206 |
-
],
|
207 |
-
"refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}]
|
208 |
})
|
209 |
-
st.success("Mock response generated
|
210 |
return json.loads(mock_json_response), None
|
211 |
|
212 |
-
#
|
213 |
else:
|
214 |
if not initialize_gemini_model():
|
215 |
return None, "Gemini Model Initialization Failed."
|
@@ -217,7 +230,10 @@ def call_gemini_api(prompt):
|
|
217 |
return None, "Gemini model not available."
|
218 |
|
219 |
try:
|
220 |
-
st.
|
|
|
|
|
|
|
221 |
response = model.generate_content(
|
222 |
prompt,
|
223 |
generation_config=genai.types.GenerationConfig(temperature=0.2),
|
@@ -228,37 +244,34 @@ def call_gemini_api(prompt):
|
|
228 |
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
229 |
]
|
230 |
)
|
231 |
-
|
|
|
|
|
|
|
232 |
|
233 |
try:
|
234 |
json_response_text = response.text.strip()
|
235 |
-
# Remove potential markdown code block fences
|
236 |
if json_response_text.startswith("```json"):
|
237 |
json_response_text = json_response_text[7:]
|
238 |
if json_response_text.startswith("```"):
|
239 |
json_response_text = json_response_text[3:]
|
240 |
if json_response_text.endswith("```"):
|
241 |
json_response_text = json_response_text[:-3]
|
242 |
-
|
243 |
-
# Extract JSON object boundaries
|
244 |
json_start = json_response_text.find('{')
|
245 |
json_end = json_response_text.rfind('}') + 1
|
246 |
-
|
247 |
if json_start != -1 and json_end != -1 and json_end > json_start:
|
248 |
final_json_text = json_response_text[json_start:json_end]
|
249 |
insights = json.loads(final_json_text)
|
250 |
return insights, None
|
251 |
else:
|
252 |
-
st.warning("β οΈ Could not find valid JSON object boundaries ({...}) in response.
|
253 |
return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
|
254 |
-
|
255 |
except json.JSONDecodeError as json_err:
|
256 |
st.error(f"π¨ Error parsing JSON response from AI: {json_err}")
|
257 |
-
st.error("Raw AI Response:")
|
258 |
st.code(response.text, language='text')
|
259 |
return None, f"AI response was not valid JSON: {json_err}"
|
260 |
except AttributeError:
|
261 |
-
st.error("π¨ Unexpected API response structure.")
|
262 |
st.code(f"Response object: {response}", language='text')
|
263 |
try:
|
264 |
block_reason = response.prompt_feedback.block_reason
|
@@ -266,7 +279,7 @@ def call_gemini_api(prompt):
|
|
266 |
return None, f"Content blocked by API. Reason: {block_reason}"
|
267 |
except Exception:
|
268 |
pass
|
269 |
-
return None, "Unexpected response structure from API."
|
270 |
except Exception as e:
|
271 |
st.error(f"π¨ Unexpected issue processing AI response: {e}")
|
272 |
try:
|
@@ -274,95 +287,82 @@ def call_gemini_api(prompt):
|
|
274 |
except Exception:
|
275 |
pass
|
276 |
return None, f"Unexpected response structure: {e}"
|
277 |
-
|
278 |
except Exception as e:
|
|
|
279 |
st.error(f"π¨ An error occurred during API call: {e}")
|
280 |
error_msg = f"API call failed: {e}"
|
281 |
if hasattr(e, 'message'):
|
282 |
if "429" in e.message:
|
283 |
-
error_msg = "API Quota Exceeded or Rate Limit hit.
|
284 |
elif "API key not valid" in e.message:
|
285 |
-
error_msg = "Invalid Gemini API Key.
|
286 |
elif "blocked" in e.message.lower():
|
287 |
-
error_msg = "Content blocked due to safety settings.
|
288 |
elif "block_reason: SAFETY" in str(e):
|
289 |
-
error_msg = "Content blocked due to safety settings.
|
290 |
-
|
291 |
return None, error_msg
|
292 |
|
293 |
def display_results(results_json, requested_analyses):
|
294 |
-
"""Renders the analysis results
|
295 |
st.header("π Analysis Report")
|
296 |
-
|
297 |
if not isinstance(results_json, dict):
|
298 |
st.error("Invalid results format received.")
|
299 |
st.json(results_json)
|
300 |
return
|
301 |
-
|
302 |
if "raw_response" in results_json:
|
303 |
st.subheader("Raw AI Response (JSON Parsing Failed)")
|
304 |
st.code(results_json["raw_response"], language='text')
|
305 |
return
|
306 |
|
307 |
-
def display_list_items(items, fields):
|
308 |
-
if items:
|
309 |
-
for item in items:
|
310 |
-
details = []
|
311 |
-
for field_key, field_label in fields.items():
|
312 |
-
value = item.get(field_key, 'N/A')
|
313 |
-
if value != 'N/A':
|
314 |
-
details.append(f"**{field_label}:** {value}")
|
315 |
-
st.markdown("- " + " - ".join(details))
|
316 |
-
# Display multi-line outputs when applicable
|
317 |
-
if 'suggestion' in item:
|
318 |
-
st.code(item['suggestion'], language='text')
|
319 |
-
elif 'description' in item:
|
320 |
-
st.markdown(f" > {item['description']}")
|
321 |
-
elif 'summary' in item:
|
322 |
-
st.markdown(f" > {item['summary']}")
|
323 |
-
else:
|
324 |
-
st.markdown("_No items found for this category._")
|
325 |
-
st.divider()
|
326 |
-
|
327 |
display_config = {
|
328 |
-
"generate_docs": {
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
}
|
333 |
-
"find_bugs": {
|
334 |
-
"key": "potential_bugs",
|
335 |
-
"title": AVAILABLE_ANALYSES["find_bugs"],
|
336 |
-
"fields": {"file": "File", "line": "Line", "severity": "Severity"}
|
337 |
-
},
|
338 |
-
"check_style": {
|
339 |
-
"key": "style_issues",
|
340 |
-
"title": AVAILABLE_ANALYSES["check_style"],
|
341 |
-
"fields": {"file": "File", "line": "Line"}
|
342 |
-
},
|
343 |
-
"summarize_modules": {
|
344 |
-
"key": "module_summaries",
|
345 |
-
"title": AVAILABLE_ANALYSES["summarize_modules"],
|
346 |
-
"fields": {"file": "File"}
|
347 |
-
},
|
348 |
-
"suggest_refactoring": {
|
349 |
-
"key": "refactoring_suggestions",
|
350 |
-
"title": AVAILABLE_ANALYSES["suggest_refactoring"],
|
351 |
-
"fields": {"file": "File", "line": "Line", "area": "Area"}
|
352 |
-
}
|
353 |
}
|
354 |
|
355 |
-
|
356 |
for analysis_key in requested_analyses:
|
357 |
if analysis_key in display_config:
|
358 |
config = display_config[analysis_key]
|
359 |
-
st.subheader(config["title"])
|
360 |
items = results_json.get(config["key"], [])
|
361 |
-
|
|
|
|
|
|
|
362 |
if items:
|
363 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
-
if not
|
366 |
st.info("No specific findings were identified in the analysis based on your selections.")
|
367 |
|
368 |
st.download_button(
|
@@ -374,54 +374,36 @@ def display_results(results_json, requested_analyses):
|
|
374 |
|
375 |
# --- Streamlit App Main Interface ---
|
376 |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
|
|
|
|
|
377 |
|
378 |
-
st.title("π€ Codebase Audit & Documentation Assistant")
|
379 |
-
st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
|
380 |
-
|
381 |
-
# Sidebar controls
|
382 |
with st.sidebar:
|
383 |
st.header("βοΈ Analysis Controls")
|
384 |
-
st.session_state.mock_api_call = st.toggle(
|
385 |
-
|
386 |
-
value=st.session_state.mock_api_call,
|
387 |
-
help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing."
|
388 |
-
)
|
389 |
-
if st.session_state.mock_api_call:
|
390 |
-
st.info("Mock API Mode ACTIVE")
|
391 |
-
else:
|
392 |
-
st.info("Using REAL Gemini API")
|
393 |
-
|
394 |
st.divider()
|
395 |
st.header("π Select Analyses")
|
396 |
-
selected_analyses = []
|
397 |
-
for key, name in AVAILABLE_ANALYSES.items():
|
398 |
-
if st.checkbox(name, value=True, key=f"cb_{key}"):
|
399 |
-
selected_analyses.append(key)
|
400 |
-
|
401 |
st.divider()
|
402 |
st.header("π How To Use")
|
403 |
-
st.info(
|
404 |
-
|
405 |
-
"2. Toggle Mock Mode if needed.\n"
|
406 |
-
"3. Select desired analyses.\n"
|
407 |
-
"4. Create a **ZIP archive** of your codebase.\n"
|
408 |
-
"5. Upload the `.zip` file.\n"
|
409 |
-
"6. Click 'Analyze Codebase'.\n"
|
410 |
-
"7. Review the report."
|
411 |
-
)
|
412 |
-
st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
|
413 |
st.divider()
|
414 |
-
st.warning("β οΈ **Privacy:** Code
|
415 |
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
|
|
420 |
|
421 |
if uploaded_file:
|
422 |
st.success(f"β
File '{uploaded_file.name}' uploaded.")
|
423 |
-
|
424 |
-
|
|
|
|
|
|
|
425 |
|
426 |
if code_files is not None:
|
427 |
st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
|
@@ -431,42 +413,44 @@ if uploaded_file:
|
|
431 |
|
432 |
analyze_button_disabled = (not selected_analyses or file_count == 0)
|
433 |
analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
|
434 |
-
if
|
435 |
-
|
|
|
|
|
|
|
436 |
if not selected_analyses:
|
437 |
-
st.warning("Please select
|
438 |
elif file_count == 0:
|
439 |
-
st.warning("No relevant code files found
|
440 |
else:
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
|
456 |
-
if analysis_triggered and results_cache:
|
457 |
-
results_json, error_message = results_cache
|
458 |
-
st.divider()
|
459 |
-
if error_message:
|
460 |
-
st.error(f"Analysis Failed: {error_message}")
|
461 |
-
if results_json and isinstance(results_json, dict) and "raw_response" in results_json:
|
462 |
-
st.subheader("Raw AI Response")
|
463 |
-
st.code(results_json["raw_response"], language='text')
|
464 |
-
elif results_json:
|
465 |
-
display_results(results_json, selected_analyses)
|
466 |
-
else:
|
467 |
-
st.error("Analysis did not return results or an unknown error occurred.")
|
468 |
elif not uploaded_file:
|
469 |
-
|
470 |
|
471 |
-
|
472 |
-
|
|
|
5 |
import json
|
6 |
import os
|
7 |
from pathlib import Path
|
8 |
+
import time
|
9 |
|
10 |
# --- Configuration ---
|
11 |
GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
|
12 |
+
MAX_PROMPT_TOKENS_ESTIMATE = 800000
|
13 |
+
RESULTS_PAGE_SIZE = 25 # Number of items to show per category initially
|
14 |
|
15 |
AVAILABLE_ANALYSES = {
|
16 |
+
# ... (keep the same)
|
17 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
18 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
19 |
"check_style": "Check Style Guide Compliance (General)",
|
20 |
"summarize_modules": "Summarize Complex Modules/Files",
|
21 |
"suggest_refactoring": "Suggest Refactoring Opportunities"
|
22 |
}
|
23 |
+
CODE_EXTENSIONS = {'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'}
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# --- Session State Initialization ---
|
26 |
if 'mock_api_call' not in st.session_state:
|
27 |
+
st.session_state.mock_api_call = False
|
28 |
+
if 'analysis_results' not in st.session_state:
|
29 |
+
st.session_state.analysis_results = None # Store results here
|
30 |
+
if 'error_message' not in st.session_state:
|
31 |
+
st.session_state.error_message = None
|
32 |
+
if 'analysis_requested' not in st.session_state:
|
33 |
+
st.session_state.analysis_requested = False # Flag to know when analysis is done
|
34 |
|
35 |
# --- Gemini API Setup ---
|
36 |
model = None
|
37 |
|
38 |
def initialize_gemini_model():
|
39 |
+
"""Initializes the Gemini API model unless running in mock mode."""
|
40 |
global model
|
41 |
if model is None and not st.session_state.mock_api_call:
|
42 |
try:
|
|
|
52 |
st.stop()
|
53 |
return False
|
54 |
elif st.session_state.mock_api_call:
|
55 |
+
# Running in Mock Mode. Skipping Gemini initialization.
|
56 |
return True # Allow proceeding in mock mode
|
57 |
elif model is not None:
|
58 |
+
# Gemini Model already initialized.
|
59 |
return True
|
60 |
return False
|
61 |
|
62 |
# --- Helper Functions ---
|
63 |
|
64 |
def estimate_token_count(text):
|
65 |
+
"""Roughly estimate token count (assuming ~3 characters per token)."""
|
66 |
return len(text) // 3
|
67 |
|
68 |
+
# --- OPTIMIZATION: Cache ZIP processing ---
|
69 |
+
@st.cache_data(max_entries=5) # Cache results for recent uploads
|
70 |
+
def process_zip_file_cached(file_id, file_size, file_content_bytes):
|
71 |
+
"""Extracts code files and their content. Cached function."""
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
code_files = {}
|
73 |
total_chars = 0
|
74 |
file_count = 0
|
75 |
ignored_files = []
|
76 |
+
status_placeholder = st.empty() # For progress bar
|
77 |
+
progress_bar = status_placeholder.progress(0)
|
78 |
|
79 |
try:
|
80 |
+
with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
|
81 |
+
members = zip_ref.infolist()
|
82 |
+
total_members = len(members)
|
83 |
+
for i, member in enumerate(members):
|
84 |
+
# Update progress bar periodically (every 10 files)
|
85 |
+
if i % 10 == 0:
|
86 |
+
progress_bar.progress(int((i / total_members) * 100))
|
87 |
+
|
88 |
if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
|
89 |
continue
|
90 |
|
|
|
92 |
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
93 |
try:
|
94 |
with zip_ref.open(member) as file:
|
95 |
+
file_bytes = file.read()
|
96 |
try:
|
97 |
+
content = file_bytes.decode('utf-8')
|
98 |
except UnicodeDecodeError:
|
99 |
try:
|
100 |
+
content = file_bytes.decode('latin-1')
|
101 |
except Exception as decode_err:
|
102 |
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
|
103 |
continue
|
|
|
108 |
except Exception as read_err:
|
109 |
ignored_files.append(f"{member.filename} (Read Error: {read_err})")
|
110 |
else:
|
|
|
111 |
if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
|
112 |
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
|
113 |
|
114 |
+
progress_bar.progress(100) # Ensure it completes
|
115 |
+
status_placeholder.empty() # Remove progress bar after completion
|
116 |
+
|
117 |
except zipfile.BadZipFile:
|
118 |
+
status_placeholder.empty()
|
119 |
st.error("π¨ Invalid or corrupted ZIP file.")
|
120 |
return None, 0, 0, []
|
121 |
except Exception as e:
|
122 |
+
status_placeholder.empty()
|
123 |
st.error(f"π¨ Error processing ZIP file: {e}")
|
124 |
return None, 0, 0, []
|
125 |
|
126 |
+
if file_count == 0 and not ignored_files:
|
127 |
+
st.warning("No files with recognized code extensions found in the ZIP.")
|
128 |
+
elif file_count == 0 and ignored_files:
|
129 |
+
st.warning("No files with recognized code extensions found. Some files were skipped.")
|
130 |
+
|
131 |
+
print(f"Cache miss or new file: Processed ZIP {file_id}") # Debug print
|
132 |
return code_files, total_chars, file_count, ignored_files
|
133 |
|
134 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
135 |
+
"""Constructs the prompt for Gemini, including code content and JSON structure request."""
|
136 |
+
prompt_parts = ["Analyze the following codebase provided as a collection of file paths and their content.\n\n"]
|
137 |
+
current_token_estimate = estimate_token_count(prompt_parts[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
included_files = []
|
139 |
+
|
140 |
+
# Use join for potentially faster concatenation
|
141 |
+
code_segments = []
|
142 |
+
|
143 |
+
# Provide feedback for large codebases
|
144 |
+
prompt_status = st.empty()
|
145 |
+
if len(code_files_dict) > 50:
|
146 |
+
prompt_status.write("Constructing prompt (processing files)...")
|
147 |
|
148 |
for filename, content in code_files_dict.items():
|
149 |
file_marker = f"--- START FILE: {filename} ---\n"
|
|
|
153 |
segment_token_estimate = estimate_token_count(segment)
|
154 |
|
155 |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
|
156 |
+
code_segments.append(segment)
|
157 |
current_token_estimate += segment_token_estimate
|
158 |
included_files.append(filename)
|
159 |
else:
|
160 |
+
st.warning(f"β οΈ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate:,} tokens).")
|
161 |
break
|
162 |
|
163 |
+
prompt_status.empty() # Clear status message
|
164 |
+
|
165 |
if not included_files:
|
166 |
st.error("π¨ No code files could be included within the estimated token limit.")
|
167 |
return None, []
|
168 |
|
169 |
+
concatenated_code = "".join(code_segments)
|
170 |
+
prompt_parts.append(concatenated_code)
|
171 |
|
172 |
+
# Generate the expected JSON structure description based on selected analyses
|
173 |
json_structure_description = "{\n"
|
174 |
structure_parts = []
|
175 |
if "generate_docs" in requested_analyses:
|
|
|
182 |
structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
|
183 |
if "suggest_refactoring" in requested_analyses:
|
184 |
structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
|
185 |
+
|
186 |
json_structure_description += ",\n".join(structure_parts)
|
187 |
json_structure_description += "\n}"
|
188 |
|
|
|
197 |
|
198 |
**JSON Output Only:**
|
199 |
"""
|
200 |
+
prompt_parts.append(prompt_footer)
|
201 |
+
full_prompt = "".join(prompt_parts)
|
202 |
return full_prompt, included_files
|
203 |
|
204 |
def call_gemini_api(prompt):
|
205 |
+
"""Calls the Gemini API or returns mock data based on session state."""
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
if not prompt:
|
207 |
return None, "Prompt generation failed."
|
208 |
|
209 |
+
# MOCK MODE LOGIC
|
210 |
if st.session_state.mock_api_call:
|
211 |
+
st.info("MOCK MODE: Simulating API call...")
|
212 |
+
st.write("...") # Minimal feedback in mock mode
|
213 |
+
time.sleep(1) # Shorter mock delay
|
214 |
|
|
|
215 |
mock_json_response = json.dumps({
|
216 |
"documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
|
217 |
"potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
|
218 |
+
"style_issues": [],
|
219 |
+
"module_summaries": [],
|
220 |
+
"refactoring_suggestions": []
|
|
|
|
|
|
|
221 |
})
|
222 |
+
st.success("Mock response generated.")
|
223 |
return json.loads(mock_json_response), None
|
224 |
|
225 |
+
# REAL API CALL LOGIC
|
226 |
else:
|
227 |
if not initialize_gemini_model():
|
228 |
return None, "Gemini Model Initialization Failed."
|
|
|
230 |
return None, "Gemini model not available."
|
231 |
|
232 |
try:
|
233 |
+
api_status = st.empty()
|
234 |
+
token_estimate = estimate_token_count(prompt)
|
235 |
+
api_status.info(f"π‘ Sending request to {GEMINI_MODEL_NAME} (Estimated prompt tokens: {token_estimate:,})... This can take several minutes depending on code size and model load.")
|
236 |
+
start_time = time.time()
|
237 |
response = model.generate_content(
|
238 |
prompt,
|
239 |
generation_config=genai.types.GenerationConfig(temperature=0.2),
|
|
|
244 |
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
245 |
]
|
246 |
)
|
247 |
+
end_time = time.time()
|
248 |
+
api_status.success(f"β
Response received from AI in {end_time - start_time:.2f} seconds.")
|
249 |
+
time.sleep(1)
|
250 |
+
api_status.empty()
|
251 |
|
252 |
try:
|
253 |
json_response_text = response.text.strip()
|
|
|
254 |
if json_response_text.startswith("```json"):
|
255 |
json_response_text = json_response_text[7:]
|
256 |
if json_response_text.startswith("```"):
|
257 |
json_response_text = json_response_text[3:]
|
258 |
if json_response_text.endswith("```"):
|
259 |
json_response_text = json_response_text[:-3]
|
|
|
|
|
260 |
json_start = json_response_text.find('{')
|
261 |
json_end = json_response_text.rfind('}') + 1
|
|
|
262 |
if json_start != -1 and json_end != -1 and json_end > json_start:
|
263 |
final_json_text = json_response_text[json_start:json_end]
|
264 |
insights = json.loads(final_json_text)
|
265 |
return insights, None
|
266 |
else:
|
267 |
+
st.warning("β οΈ Could not find valid JSON object boundaries ({...}) in response.")
|
268 |
return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
|
|
|
269 |
except json.JSONDecodeError as json_err:
|
270 |
st.error(f"π¨ Error parsing JSON response from AI: {json_err}")
|
|
|
271 |
st.code(response.text, language='text')
|
272 |
return None, f"AI response was not valid JSON: {json_err}"
|
273 |
except AttributeError:
|
274 |
+
st.error("π¨ Unexpected API response structure (AttributeError).")
|
275 |
st.code(f"Response object: {response}", language='text')
|
276 |
try:
|
277 |
block_reason = response.prompt_feedback.block_reason
|
|
|
279 |
return None, f"Content blocked by API. Reason: {block_reason}"
|
280 |
except Exception:
|
281 |
pass
|
282 |
+
return None, "Unexpected response structure from API (AttributeError)."
|
283 |
except Exception as e:
|
284 |
st.error(f"π¨ Unexpected issue processing AI response: {e}")
|
285 |
try:
|
|
|
287 |
except Exception:
|
288 |
pass
|
289 |
return None, f"Unexpected response structure: {e}"
|
|
|
290 |
except Exception as e:
|
291 |
+
api_status.empty()
|
292 |
st.error(f"π¨ An error occurred during API call: {e}")
|
293 |
error_msg = f"API call failed: {e}"
|
294 |
if hasattr(e, 'message'):
|
295 |
if "429" in e.message:
|
296 |
+
error_msg = "API Quota Exceeded or Rate Limit hit."
|
297 |
elif "API key not valid" in e.message:
|
298 |
+
error_msg = "Invalid Gemini API Key."
|
299 |
elif "blocked" in e.message.lower():
|
300 |
+
error_msg = "Content blocked due to safety settings."
|
301 |
elif "block_reason: SAFETY" in str(e):
|
302 |
+
error_msg = "Content blocked due to safety settings."
|
|
|
303 |
return None, error_msg
|
304 |
|
305 |
def display_results(results_json, requested_analyses):
|
306 |
+
"""Renders the analysis results with pagination."""
|
307 |
st.header("π Analysis Report")
|
|
|
308 |
if not isinstance(results_json, dict):
|
309 |
st.error("Invalid results format received.")
|
310 |
st.json(results_json)
|
311 |
return
|
|
|
312 |
if "raw_response" in results_json:
|
313 |
st.subheader("Raw AI Response (JSON Parsing Failed)")
|
314 |
st.code(results_json["raw_response"], language='text')
|
315 |
return
|
316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
display_config = {
|
318 |
+
"generate_docs": {"key": "documentation_suggestions", "title": AVAILABLE_ANALYSES["generate_docs"], "fields": {"file": "File", "line": "Line"}},
|
319 |
+
"find_bugs": {"key": "potential_bugs", "title": AVAILABLE_ANALYSES["find_bugs"], "fields": {"file": "File", "line": "Line", "severity": "Severity"}},
|
320 |
+
"check_style": {"key": "style_issues", "title": AVAILABLE_ANALYSES["check_style"], "fields": {"file": "File", "line": "Line"}},
|
321 |
+
"summarize_modules": {"key": "module_summaries", "title": AVAILABLE_ANALYSES["summarize_modules"], "fields": {"file": "File"}},
|
322 |
+
"suggest_refactoring": {"key": "refactoring_suggestions", "title": AVAILABLE_ANALYSES["suggest_refactoring"], "fields": {"file": "File", "line": "Line", "area": "Area"}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
}
|
324 |
|
325 |
+
any_results_found = False
|
326 |
for analysis_key in requested_analyses:
|
327 |
if analysis_key in display_config:
|
328 |
config = display_config[analysis_key]
|
|
|
329 |
items = results_json.get(config["key"], [])
|
330 |
+
total_items = len(items)
|
331 |
+
|
332 |
+
st.subheader(f"{config['title']} ({total_items} found)")
|
333 |
+
|
334 |
if items:
|
335 |
+
any_results_found = True
|
336 |
+
state_key = f"visible_{analysis_key}"
|
337 |
+
if state_key not in st.session_state:
|
338 |
+
st.session_state[state_key] = RESULTS_PAGE_SIZE
|
339 |
+
|
340 |
+
visible_count = st.session_state[state_key]
|
341 |
+
items_to_display = items[:visible_count]
|
342 |
+
|
343 |
+
for item in items_to_display:
|
344 |
+
details = []
|
345 |
+
for field_key, field_label in config["fields"].items():
|
346 |
+
value = item.get(field_key, 'N/A')
|
347 |
+
if value != 'N/A':
|
348 |
+
details.append(f"**{field_label}:** `{value}`" if field_key == 'file' else f"**{field_label}:** {value}")
|
349 |
+
st.markdown("- " + " - ".join(details))
|
350 |
+
if 'suggestion' in item:
|
351 |
+
st.code(item['suggestion'], language='text')
|
352 |
+
elif 'description' in item:
|
353 |
+
st.markdown(f" > {item['description']}")
|
354 |
+
elif 'summary' in item:
|
355 |
+
st.markdown(f" > {item['summary']}")
|
356 |
+
|
357 |
+
if total_items > visible_count:
|
358 |
+
if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
|
359 |
+
st.session_state[state_key] += RESULTS_PAGE_SIZE
|
360 |
+
st.rerun()
|
361 |
+
else:
|
362 |
+
st.markdown("_No items found for this category._")
|
363 |
+
st.divider()
|
364 |
|
365 |
+
if not any_results_found:
|
366 |
st.info("No specific findings were identified in the analysis based on your selections.")
|
367 |
|
368 |
st.download_button(
|
|
|
374 |
|
375 |
# --- Streamlit App Main Interface ---
|
376 |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
|
377 |
+
st.title("π€ Codebase Audit Assistant")
|
378 |
+
st.markdown(f"Upload codebase (`.zip`) for analysis via **{GEMINI_MODEL_NAME}**.")
|
379 |
|
|
|
|
|
|
|
|
|
380 |
with st.sidebar:
|
381 |
st.header("βοΈ Analysis Controls")
|
382 |
+
st.session_state.mock_api_call = st.toggle("π§ͺ Enable Mock API Mode", value=st.session_state.mock_api_call, help="Use fake data instead of calling Gemini API.")
|
383 |
+
st.info("Mock API Mode ACTIVE" if st.session_state.mock_api_call else "Using REAL Gemini API")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
st.divider()
|
385 |
st.header("π Select Analyses")
|
386 |
+
selected_analyses = [key for key, name in AVAILABLE_ANALYSES.items() if st.checkbox(name, value=True, key=f"cb_{key}")]
|
|
|
|
|
|
|
|
|
387 |
st.divider()
|
388 |
st.header("π How To Use")
|
389 |
+
st.info("1. Set API Key (if not in Mock Mode).\n2. Toggle Mock Mode if needed.\n3. Select analyses.\n4. Create & Upload a **ZIP** of your code.\n5. Click 'Analyze Codebase'.\n6. Review the report.")
|
390 |
+
st.info(f"Note: Only common code extensions are supported. Analysis is limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} estimated tokens).")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
st.divider()
|
392 |
+
st.warning("β οΈ **Privacy:** Code is sent to the Google API if Mock Mode is OFF.")
|
393 |
|
394 |
+
uploaded_file = st.file_uploader("π Upload Codebase ZIP File", type=['zip'], key="file_uploader",
|
395 |
+
on_change=lambda: st.session_state.update(analysis_results=None, error_message=None, analysis_requested=False))
|
396 |
+
|
397 |
+
analysis_button_placeholder = st.empty() # Placeholder for the button
|
398 |
+
results_placeholder = st.container() # Container for results display
|
399 |
|
400 |
if uploaded_file:
|
401 |
st.success(f"β
File '{uploaded_file.name}' uploaded.")
|
402 |
+
|
403 |
+
uploaded_file_bytes = uploaded_file.getvalue()
|
404 |
+
file_id = f"{uploaded_file.name}-{uploaded_file.size}"
|
405 |
+
|
406 |
+
code_files, total_chars, file_count, ignored_files = process_zip_file_cached(file_id, uploaded_file.size, uploaded_file_bytes)
|
407 |
|
408 |
if code_files is not None:
|
409 |
st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
|
|
|
413 |
|
414 |
analyze_button_disabled = (not selected_analyses or file_count == 0)
|
415 |
analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
|
416 |
+
if analysis_button_placeholder.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
|
417 |
+
st.session_state.analysis_requested = True
|
418 |
+
st.session_state.analysis_results = None
|
419 |
+
st.session_state.error_message = None
|
420 |
+
|
421 |
if not selected_analyses:
|
422 |
+
st.warning("Please select analysis types.")
|
423 |
elif file_count == 0:
|
424 |
+
st.warning("No relevant code files found.")
|
425 |
else:
|
426 |
+
with results_placeholder:
|
427 |
+
with st.spinner(f"π Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... Please wait."):
|
428 |
+
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
|
429 |
+
if analysis_prompt and included_files_in_prompt:
|
430 |
+
results_json, error_msg = call_gemini_api(analysis_prompt)
|
431 |
+
st.session_state.analysis_results = results_json
|
432 |
+
st.session_state.error_message = error_msg
|
433 |
+
elif not included_files_in_prompt:
|
434 |
+
st.session_state.error_message = "Could not proceed: No files included (check token limits/errors)."
|
435 |
+
else:
|
436 |
+
st.session_state.error_message = "Failed to generate analysis prompt."
|
437 |
+
st.rerun()
|
438 |
+
|
439 |
+
if st.session_state.analysis_requested:
|
440 |
+
with results_placeholder:
|
441 |
+
st.divider()
|
442 |
+
if st.session_state.error_message:
|
443 |
+
st.error(f"Analysis Failed: {st.session_state.error_message}")
|
444 |
+
if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
|
445 |
+
st.subheader("Raw AI Response")
|
446 |
+
st.code(st.session_state.analysis_results["raw_response"], language='text')
|
447 |
+
elif st.session_state.analysis_results:
|
448 |
+
display_results(st.session_state.analysis_results, selected_analyses)
|
449 |
+
else:
|
450 |
+
st.info("Analysis initiated, but no results or errors were stored. Please try again.")
|
451 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
elif not uploaded_file:
|
453 |
+
results_placeholder.info("Upload a ZIP file containing your source code to begin.")
|
454 |
|
455 |
+
results_placeholder.divider()
|
456 |
+
results_placeholder.markdown("_Assistant powered by Google Gemini._")
|