Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,23 +3,26 @@ import google.generativeai as genai
|
|
3 |
import zipfile
|
4 |
import io
|
5 |
import json
|
6 |
-
import os
|
7 |
from pathlib import Path
|
8 |
import time
|
9 |
|
10 |
# --- Configuration ---
|
11 |
# Model names are now discovered dynamically. Remove hardcoded names.
|
12 |
-
MAX_PROMPT_TOKENS_ESTIMATE = 800000
|
13 |
RESULTS_PAGE_SIZE = 25
|
14 |
|
15 |
-
AVAILABLE_ANALYSES = {
|
16 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
17 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
18 |
"check_style": "Check Style Guide Compliance (General)",
|
19 |
"summarize_modules": "Summarize Complex Modules/Files",
|
20 |
-
"suggest_refactoring": "Suggest Refactoring Opportunities"
|
21 |
}
|
22 |
-
CODE_EXTENSIONS = {
|
|
|
|
|
|
|
23 |
|
24 |
# --- Session State Initialization ---
|
25 |
# (Keep most session state, add one for the selected model)
|
@@ -30,17 +33,17 @@ if 'analysis_results' not in st.session_state:
|
|
30 |
if 'error_message' not in st.session_state:
|
31 |
st.session_state.error_message = None
|
32 |
if 'analysis_requested' not in st.session_state:
|
33 |
-
|
34 |
if 'selected_model_name' not in st.session_state:
|
35 |
-
|
36 |
if 'available_models_dict' not in st.session_state:
|
37 |
-
|
38 |
|
39 |
# --- Gemini API Setup & Model Discovery ---
|
40 |
-
model = None
|
41 |
|
42 |
# --- NEW: Function to list available models ---
|
43 |
-
@st.cache_data(ttl=3600)
|
44 |
def get_available_models():
|
45 |
"""Lists models supporting 'generateContent' using the API key."""
|
46 |
model_dict = {}
|
@@ -61,7 +64,7 @@ def get_available_models():
|
|
61 |
return model_dict
|
62 |
except Exception as e:
|
63 |
st.error(f"π¨ Error listing available models: {e}")
|
64 |
-
return {}
|
65 |
|
66 |
def initialize_gemini_model():
|
67 |
"""Initializes the Gemini model based on the selected name."""
|
@@ -72,7 +75,7 @@ def initialize_gemini_model():
|
|
72 |
try:
|
73 |
if 'GEMINI_API_KEY' not in st.secrets:
|
74 |
st.error("π¨ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
|
75 |
-
st.stop()
|
76 |
# Configure API key (might be redundant if list_models worked, but safe)
|
77 |
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
|
78 |
print(f"Initializing Gemini Model: {selected_name}")
|
@@ -82,170 +85,319 @@ def initialize_gemini_model():
|
|
82 |
return True
|
83 |
except Exception as e:
|
84 |
st.error(f"π¨ Error initializing selected Gemini model '{selected_name}': {e}")
|
85 |
-
st.session_state.selected_model_name = None
|
86 |
st.stop()
|
87 |
return False
|
88 |
elif st.session_state.mock_api_call:
|
89 |
-
return True
|
90 |
elif model is not None and model.model_name == selected_name:
|
91 |
-
|
92 |
elif model is not None and model.model_name != selected_name:
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
elif not selected_name and not st.session_state.mock_api_call:
|
97 |
-
|
98 |
-
|
99 |
-
return False
|
100 |
|
101 |
# --- Helper Functions ---
|
102 |
# (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
|
103 |
# call_gemini_api, display_results - remain the same as the optimized version)
|
104 |
-
# estimate_token_count
|
105 |
-
def estimate_token_count(text): return len(text) // 3
|
106 |
|
107 |
-
|
|
|
|
|
|
|
108 |
@st.cache_data(max_entries=5)
|
109 |
def process_zip_file_cached(file_id, file_size, file_content_bytes):
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
try:
|
114 |
with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
|
115 |
-
members = zip_ref.infolist()
|
|
|
116 |
for i, member in enumerate(members):
|
117 |
-
if i % 10 == 0:
|
118 |
-
|
|
|
|
|
119 |
file_path = Path(member.filename)
|
120 |
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
121 |
try:
|
122 |
with zip_ref.open(member) as file:
|
123 |
file_bytes = file.read()
|
124 |
-
try:
|
|
|
125 |
except UnicodeDecodeError:
|
126 |
-
try:
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
else:
|
131 |
if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
|
132 |
-
|
133 |
-
progress_bar.progress(100)
|
134 |
-
|
135 |
-
except
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
return code_files, total_chars, file_count, ignored_files
|
139 |
|
140 |
-
|
141 |
-
# construct_analysis_prompt (no changes)
|
142 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
for filename, content in code_files_dict.items():
|
148 |
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
|
149 |
segment_token_estimate = estimate_token_count(segment)
|
150 |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
|
151 |
-
code_segments.append(segment)
|
152 |
-
|
|
|
|
|
|
|
|
|
153 |
prompt_status.empty()
|
154 |
-
|
|
|
|
|
|
|
|
|
155 |
prompt_parts.append("".join(code_segments))
|
156 |
-
json_structure_description = "{\n"
|
157 |
-
|
158 |
-
|
159 |
-
if "
|
160 |
-
|
161 |
-
if "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
json_structure_description += ",\n".join(structure_parts) + "\n}"
|
163 |
prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
|
164 |
prompt_parts.append(prompt_footer)
|
|
|
165 |
full_prompt = "".join(prompt_parts)
|
166 |
return full_prompt, included_files
|
167 |
|
168 |
-
# call_gemini_api (no changes other than relying on the globally selected model)
|
169 |
def call_gemini_api(prompt):
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
173 |
# MOCK MODE
|
174 |
if st.session_state.mock_api_call:
|
175 |
-
st.info(" MOCK MODE: Simulating API call...")
|
176 |
-
|
177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
# REAL API CALL
|
179 |
else:
|
180 |
-
if not initialize_gemini_model():
|
181 |
-
|
|
|
|
|
182 |
try:
|
183 |
api_status = st.empty()
|
184 |
-
# Include model name in status message
|
185 |
api_status.info(f"π‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
|
186 |
-
start_time = time.time()
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
json_response_text = response.text.strip()
|
190 |
-
#
|
191 |
-
if json_response_text.startswith("```json"):
|
192 |
-
|
193 |
-
if json_response_text.
|
194 |
-
|
|
|
|
|
|
|
|
|
195 |
if json_start != -1 and json_end != -1 and json_end > json_start:
|
196 |
-
final_json_text = json_response_text[json_start:json_end]
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
if hasattr(e, 'message'):
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
211 |
return None, error_msg
|
212 |
|
213 |
-
# display_results (no changes needed from optimized version)
|
214 |
def display_results(results_json, requested_analyses):
|
215 |
-
|
|
|
|
|
216 |
st.header("π Analysis Report")
|
217 |
-
if not isinstance(results_json, dict):
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
"
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
}
|
226 |
any_results_found = False
|
227 |
for analysis_key in requested_analyses:
|
228 |
if analysis_key in display_config:
|
229 |
-
config = display_config[analysis_key]
|
230 |
-
|
|
|
|
|
231 |
if items:
|
232 |
-
any_results_found = True
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
st.markdown("- " + " - ".join(details))
|
238 |
-
if 'suggestion' in item:
|
239 |
-
|
240 |
-
elif '
|
241 |
-
|
|
|
|
|
|
|
242 |
if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
|
243 |
-
st.session_state[state_key] += RESULTS_PAGE_SIZE
|
244 |
-
|
|
|
|
|
245 |
st.divider()
|
246 |
-
if not any_results_found:
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
# --- Streamlit App Main Interface ---
|
251 |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
|
@@ -254,7 +406,11 @@ st.title("π€ Codebase Audit & Documentation Assistant")
|
|
254 |
# --- Sidebar ---
|
255 |
with st.sidebar:
|
256 |
st.header("βοΈ Analysis Controls")
|
257 |
-
st.session_state.mock_api_call = st.toggle(
|
|
|
|
|
|
|
|
|
258 |
|
259 |
st.divider()
|
260 |
st.header("β Select Model")
|
@@ -268,17 +424,16 @@ with st.sidebar:
|
|
268 |
# Try to find the index of the previously selected model
|
269 |
current_model_display_name = None
|
270 |
if st.session_state.selected_model_name:
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
|
277 |
try:
|
278 |
selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
|
279 |
except ValueError:
|
280 |
-
|
281 |
-
|
282 |
|
283 |
selected_display_name = st.selectbox(
|
284 |
"Choose Gemini model:",
|
@@ -291,42 +446,56 @@ with st.sidebar:
|
|
291 |
st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
|
292 |
st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
|
293 |
elif 'GEMINI_API_KEY' in st.secrets:
|
294 |
-
|
295 |
-
|
296 |
else:
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
else: # Mock mode is active
|
301 |
st.info("Mock API Mode ACTIVE")
|
302 |
-
st.session_state.selected_model_name = "mock_model"
|
303 |
# --- End Dynamic Model Selection ---
|
304 |
|
305 |
-
|
306 |
st.divider()
|
307 |
st.header("π Select Analyses")
|
308 |
-
selected_analyses = [
|
|
|
|
|
|
|
309 |
st.divider()
|
310 |
-
st.header("π How To Use")
|
311 |
-
st.info(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
|
313 |
st.divider()
|
314 |
st.warning("β οΈ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
|
315 |
|
316 |
-
|
317 |
# Update title dynamically based on selected model
|
318 |
if st.session_state.selected_model_name and not st.session_state.mock_api_call:
|
319 |
st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
|
320 |
elif st.session_state.mock_api_call:
|
321 |
-
|
322 |
else:
|
323 |
-
|
324 |
-
|
325 |
|
326 |
# --- Main Content Area ---
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
analysis_button_placeholder = st.empty()
|
331 |
results_placeholder = st.container()
|
332 |
|
@@ -334,50 +503,73 @@ if uploaded_file:
|
|
334 |
st.success(f"β
File '{uploaded_file.name}' uploaded.")
|
335 |
uploaded_file_bytes = uploaded_file.getvalue()
|
336 |
file_id = f"{uploaded_file.name}-{uploaded_file.size}"
|
337 |
-
code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
|
|
|
|
|
338 |
if code_files is not None:
|
339 |
st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
|
340 |
if ignored_files:
|
341 |
-
|
|
|
342 |
|
343 |
# Disable button if no model selected (and not in mock mode)
|
344 |
model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
|
345 |
analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
|
346 |
analyze_button_label = "Analyze Codebase"
|
347 |
-
if not model_ready:
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
else:
|
356 |
with results_placeholder:
|
357 |
-
|
358 |
-
|
359 |
-
|
|
|
|
|
|
|
|
|
360 |
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
|
361 |
if analysis_prompt and included_files_in_prompt:
|
362 |
results_json, error_msg = call_gemini_api(analysis_prompt)
|
363 |
-
st.session_state.analysis_results = results_json
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
366 |
st.rerun()
|
367 |
|
368 |
# Display results (Keep the same logic)
|
369 |
if st.session_state.analysis_requested:
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
|
|
|
|
381 |
|
382 |
results_placeholder.divider()
|
383 |
-
results_placeholder.markdown("_Assistant powered by Google Gemini._")
|
|
|
3 |
import zipfile
|
4 |
import io
|
5 |
import json
|
6 |
+
import os # Still needed for API key potentially, but not model names
|
7 |
from pathlib import Path
|
8 |
import time
|
9 |
|
10 |
# --- Configuration ---
|
11 |
# Model names are now discovered dynamically. Remove hardcoded names.
|
12 |
+
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
|
13 |
RESULTS_PAGE_SIZE = 25
|
14 |
|
15 |
+
AVAILABLE_ANALYSES = { # Keep analyses config
|
16 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
17 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
18 |
"check_style": "Check Style Guide Compliance (General)",
|
19 |
"summarize_modules": "Summarize Complex Modules/Files",
|
20 |
+
"suggest_refactoring": "Suggest Refactoring Opportunities",
|
21 |
}
|
22 |
+
CODE_EXTENSIONS = {
|
23 |
+
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb',
|
24 |
+
'.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
|
25 |
+
} # Keep extensions
|
26 |
|
27 |
# --- Session State Initialization ---
|
28 |
# (Keep most session state, add one for the selected model)
|
|
|
33 |
if 'error_message' not in st.session_state:
|
34 |
st.session_state.error_message = None
|
35 |
if 'analysis_requested' not in st.session_state:
|
36 |
+
st.session_state.analysis_requested = False
|
37 |
if 'selected_model_name' not in st.session_state:
|
38 |
+
st.session_state.selected_model_name = None # Will hold the "models/..." name
|
39 |
if 'available_models_dict' not in st.session_state:
|
40 |
+
st.session_state.available_models_dict = {} # Store display_name -> name mapping
|
41 |
|
42 |
# --- Gemini API Setup & Model Discovery ---
|
43 |
+
model = None # Global variable for the initialized model instance
|
44 |
|
45 |
# --- NEW: Function to list available models ---
|
46 |
+
@st.cache_data(ttl=3600) # Cache model list for an hour
|
47 |
def get_available_models():
|
48 |
"""Lists models supporting 'generateContent' using the API key."""
|
49 |
model_dict = {}
|
|
|
64 |
return model_dict
|
65 |
except Exception as e:
|
66 |
st.error(f"π¨ Error listing available models: {e}")
|
67 |
+
return {} # Return empty on error
|
68 |
|
69 |
def initialize_gemini_model():
|
70 |
"""Initializes the Gemini model based on the selected name."""
|
|
|
75 |
try:
|
76 |
if 'GEMINI_API_KEY' not in st.secrets:
|
77 |
st.error("π¨ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
|
78 |
+
st.stop() # Stop if key missing for initialization
|
79 |
# Configure API key (might be redundant if list_models worked, but safe)
|
80 |
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
|
81 |
print(f"Initializing Gemini Model: {selected_name}")
|
|
|
85 |
return True
|
86 |
except Exception as e:
|
87 |
st.error(f"π¨ Error initializing selected Gemini model '{selected_name}': {e}")
|
88 |
+
st.session_state.selected_model_name = None # Reset selection on error
|
89 |
st.stop()
|
90 |
return False
|
91 |
elif st.session_state.mock_api_call:
|
92 |
+
return True # No init needed for mock mode
|
93 |
elif model is not None and model.model_name == selected_name:
|
94 |
+
return True # Already initialized with the correct model
|
95 |
elif model is not None and model.model_name != selected_name:
|
96 |
+
print("Model changed. Re-initializing...")
|
97 |
+
model = None # Reset model instance
|
98 |
+
return initialize_gemini_model() # Recurse to re-initialize with new name
|
99 |
elif not selected_name and not st.session_state.mock_api_call:
|
100 |
+
# This case happens if no model is selected yet
|
101 |
+
return False # Cannot initialize without a selection
|
102 |
+
return False # Default case
|
103 |
|
104 |
# --- Helper Functions ---
|
105 |
# (estimate_token_count, process_zip_file_cached, construct_analysis_prompt,
|
106 |
# call_gemini_api, display_results - remain the same as the optimized version)
|
|
|
|
|
107 |
|
108 |
+
def estimate_token_count(text):
|
109 |
+
"""Estimates the number of tokens based on text length."""
|
110 |
+
return len(text) // 3
|
111 |
+
|
112 |
@st.cache_data(max_entries=5)
|
113 |
def process_zip_file_cached(file_id, file_size, file_content_bytes):
|
114 |
+
"""
|
115 |
+
Processes a ZIP file and extracts code files.
|
116 |
+
Returns a tuple of (code_files dict, total_chars, file_count, ignored_files list).
|
117 |
+
"""
|
118 |
+
code_files = {}
|
119 |
+
total_chars = 0
|
120 |
+
file_count = 0
|
121 |
+
ignored_files = []
|
122 |
+
status_placeholder = st.empty()
|
123 |
+
progress_bar = status_placeholder.progress(0)
|
124 |
try:
|
125 |
with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
|
126 |
+
members = zip_ref.infolist()
|
127 |
+
total_members = len(members)
|
128 |
for i, member in enumerate(members):
|
129 |
+
if i % 10 == 0:
|
130 |
+
progress_bar.progress(int((i / total_members) * 100))
|
131 |
+
if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename:
|
132 |
+
continue
|
133 |
file_path = Path(member.filename)
|
134 |
if file_path.suffix.lower() in CODE_EXTENSIONS:
|
135 |
try:
|
136 |
with zip_ref.open(member) as file:
|
137 |
file_bytes = file.read()
|
138 |
+
try:
|
139 |
+
content = file_bytes.decode('utf-8')
|
140 |
except UnicodeDecodeError:
|
141 |
+
try:
|
142 |
+
content = file_bytes.decode('latin-1')
|
143 |
+
except Exception as decode_err:
|
144 |
+
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
|
145 |
+
continue
|
146 |
+
code_files[member.filename] = content
|
147 |
+
total_chars += len(content)
|
148 |
+
file_count += 1
|
149 |
+
except Exception as read_err:
|
150 |
+
ignored_files.append(f"{member.filename} (Read Error: {read_err})")
|
151 |
else:
|
152 |
if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
|
153 |
+
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
|
154 |
+
progress_bar.progress(100)
|
155 |
+
status_placeholder.empty()
|
156 |
+
except zipfile.BadZipFile:
|
157 |
+
status_placeholder.empty()
|
158 |
+
st.error("π¨ Invalid ZIP.")
|
159 |
+
return None, 0, 0, []
|
160 |
+
except Exception as e:
|
161 |
+
status_placeholder.empty()
|
162 |
+
st.error(f"π¨ ZIP Error: {e}")
|
163 |
+
return None, 0, 0, []
|
164 |
+
if file_count == 0:
|
165 |
+
if not ignored_files:
|
166 |
+
st.warning("No code files found.")
|
167 |
+
else:
|
168 |
+
st.warning("No code files found; some skipped.")
|
169 |
return code_files, total_chars, file_count, ignored_files
|
170 |
|
|
|
|
|
171 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
172 |
+
"""
|
173 |
+
Constructs the prompt for analysis by including code files and JSON structure for expected output.
|
174 |
+
Returns the full prompt and a list of included files.
|
175 |
+
"""
|
176 |
+
prompt_parts = ["Analyze the following codebase...\n\n"]
|
177 |
+
current_token_estimate = estimate_token_count(prompt_parts[0])
|
178 |
+
included_files = []
|
179 |
+
code_segments = []
|
180 |
+
prompt_status = st.empty()
|
181 |
+
|
182 |
+
if len(code_files_dict) > 50:
|
183 |
+
prompt_status.info("Constructing prompt...")
|
184 |
+
|
185 |
for filename, content in code_files_dict.items():
|
186 |
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
|
187 |
segment_token_estimate = estimate_token_count(segment)
|
188 |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
|
189 |
+
code_segments.append(segment)
|
190 |
+
current_token_estimate += segment_token_estimate
|
191 |
+
included_files.append(filename)
|
192 |
+
else:
|
193 |
+
st.warning(f"β οΈ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
|
194 |
+
break
|
195 |
prompt_status.empty()
|
196 |
+
|
197 |
+
if not included_files:
|
198 |
+
st.error("π¨ No code files included in prompt.")
|
199 |
+
return None, []
|
200 |
+
|
201 |
prompt_parts.append("".join(code_segments))
|
202 |
+
json_structure_description = "{\n"
|
203 |
+
structure_parts = []
|
204 |
+
|
205 |
+
if "generate_docs" in requested_analyses:
|
206 |
+
structure_parts.append(' "documentation_suggestions": [...]')
|
207 |
+
if "find_bugs" in requested_analyses:
|
208 |
+
structure_parts.append(' "potential_bugs": [...]')
|
209 |
+
if "check_style" in requested_analyses:
|
210 |
+
structure_parts.append(' "style_issues": [...]')
|
211 |
+
if "summarize_modules" in requested_analyses:
|
212 |
+
structure_parts.append(' "module_summaries": [...]')
|
213 |
+
if "suggest_refactoring" in requested_analyses:
|
214 |
+
structure_parts.append(' "refactoring_suggestions": [...]')
|
215 |
+
|
216 |
json_structure_description += ",\n".join(structure_parts) + "\n}"
|
217 |
prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
|
218 |
prompt_parts.append(prompt_footer)
|
219 |
+
|
220 |
full_prompt = "".join(prompt_parts)
|
221 |
return full_prompt, included_files
|
222 |
|
|
|
223 |
def call_gemini_api(prompt):
|
224 |
+
"""
|
225 |
+
Calls the Gemini API using the provided prompt.
|
226 |
+
Returns the parsed JSON insights or an error message.
|
227 |
+
"""
|
228 |
+
if not prompt:
|
229 |
+
return None, "Prompt generation failed."
|
230 |
+
|
231 |
# MOCK MODE
|
232 |
if st.session_state.mock_api_call:
|
233 |
+
st.info(" MOCK MODE: Simulating API call...")
|
234 |
+
time.sleep(1)
|
235 |
+
mock_json_response = json.dumps({
|
236 |
+
"documentation_suggestions": [],
|
237 |
+
"potential_bugs": [],
|
238 |
+
"style_issues": [],
|
239 |
+
"module_summaries": [],
|
240 |
+
"refactoring_suggestions": []
|
241 |
+
})
|
242 |
+
st.success("Mock response generated.")
|
243 |
+
return json.loads(mock_json_response), None
|
244 |
# REAL API CALL
|
245 |
else:
|
246 |
+
if not initialize_gemini_model():
|
247 |
+
return None, "Gemini Model Initialization Failed."
|
248 |
+
if model is None:
|
249 |
+
return None, "Gemini model not selected or available." # Added check
|
250 |
try:
|
251 |
api_status = st.empty()
|
|
|
252 |
api_status.info(f"π‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
|
253 |
+
start_time = time.time()
|
254 |
+
response = model.generate_content(
|
255 |
+
prompt,
|
256 |
+
generation_config=genai.types.GenerationConfig(temperature=0.2),
|
257 |
+
safety_settings=[
|
258 |
+
{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
|
259 |
+
for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
|
260 |
+
"HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
|
261 |
+
]
|
262 |
+
)
|
263 |
+
end_time = time.time()
|
264 |
+
api_status.success(f"β
Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.")
|
265 |
+
time.sleep(1)
|
266 |
+
api_status.empty()
|
267 |
+
try:
|
268 |
json_response_text = response.text.strip()
|
269 |
+
# Remove markdown code fences if present
|
270 |
+
if json_response_text.startswith("```json"):
|
271 |
+
json_response_text = json_response_text[7:]
|
272 |
+
if json_response_text.startswith("```"):
|
273 |
+
json_response_text = json_response_text[3:]
|
274 |
+
if json_response_text.endswith("```"):
|
275 |
+
json_response_text = json_response_text[:-3]
|
276 |
+
json_start = json_response_text.find('{')
|
277 |
+
json_end = json_response_text.rfind('}') + 1
|
278 |
if json_start != -1 and json_end != -1 and json_end > json_start:
|
279 |
+
final_json_text = json_response_text[json_start:json_end]
|
280 |
+
insights = json.loads(final_json_text)
|
281 |
+
return insights, None
|
282 |
+
else:
|
283 |
+
st.warning("β οΈ Could not find valid JSON object.")
|
284 |
+
return {"raw_response": response.text}, "AI response did not contain clear JSON object."
|
285 |
+
except json.JSONDecodeError as json_err:
|
286 |
+
st.error(f"π¨ Error parsing JSON: {json_err}")
|
287 |
+
st.code(response.text, language='text')
|
288 |
+
return None, f"AI response not valid JSON: {json_err}"
|
289 |
+
except AttributeError:
|
290 |
+
st.error("π¨ Unexpected API response structure (AttributeError).")
|
291 |
+
st.code(f"Response object: {response}", language='text')
|
292 |
+
return None, "Unexpected response structure (AttributeError)."
|
293 |
+
except Exception as e:
|
294 |
+
st.error(f"π¨ Unexpected issue processing response: {e}")
|
295 |
+
try:
|
296 |
+
st.code(f"Response object: {response}", language='text')
|
297 |
+
except Exception:
|
298 |
+
pass
|
299 |
+
return None, f"Unexpected response structure: {e}"
|
300 |
+
except Exception as e:
|
301 |
+
api_status.empty()
|
302 |
+
st.error(f"π¨ API call error: {e}")
|
303 |
+
error_msg = f"API call failed: {e}"
|
304 |
if hasattr(e, 'message'):
|
305 |
+
if "429" in e.message:
|
306 |
+
error_msg = "API Quota Exceeded or Rate Limit hit."
|
307 |
+
elif "API key not valid" in e.message:
|
308 |
+
error_msg = "Invalid Gemini API Key."
|
309 |
+
elif "permission denied" in e.message.lower():
|
310 |
+
error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
|
311 |
+
elif "blocked" in e.message.lower():
|
312 |
+
error_msg = "Content blocked due to safety settings."
|
313 |
+
elif "block_reason: SAFETY" in str(e):
|
314 |
+
error_msg = "Content blocked due to safety settings."
|
315 |
return None, error_msg
|
316 |
|
|
|
317 |
def display_results(results_json, requested_analyses):
|
318 |
+
"""
|
319 |
+
Displays the analysis results with pagination and allows JSON download.
|
320 |
+
"""
|
321 |
st.header("π Analysis Report")
|
322 |
+
if not isinstance(results_json, dict):
|
323 |
+
st.error("Invalid results format.")
|
324 |
+
st.json(results_json)
|
325 |
+
return
|
326 |
+
if "raw_response" in results_json:
|
327 |
+
st.subheader("Raw AI Response (JSON Parsing Failed)")
|
328 |
+
st.code(results_json["raw_response"], language='text')
|
329 |
+
return
|
330 |
+
|
331 |
+
display_config = {
|
332 |
+
"generate_docs": {
|
333 |
+
"key": "documentation_suggestions",
|
334 |
+
"title": AVAILABLE_ANALYSES["generate_docs"],
|
335 |
+
"fields": {"file": "File", "line": "Line"}
|
336 |
+
},
|
337 |
+
"find_bugs": {
|
338 |
+
"key": "potential_bugs",
|
339 |
+
"title": AVAILABLE_ANALYSES["find_bugs"],
|
340 |
+
"fields": {"file": "File", "line": "Line", "severity": "Severity"}
|
341 |
+
},
|
342 |
+
"check_style": {
|
343 |
+
"key": "style_issues",
|
344 |
+
"title": AVAILABLE_ANALYSES["check_style"],
|
345 |
+
"fields": {"file": "File", "line": "Line"}
|
346 |
+
},
|
347 |
+
"summarize_modules": {
|
348 |
+
"key": "module_summaries",
|
349 |
+
"title": AVAILABLE_ANALYSES["summarize_modules"],
|
350 |
+
"fields": {"file": "File"}
|
351 |
+
},
|
352 |
+
"suggest_refactoring": {
|
353 |
+
"key": "refactoring_suggestions",
|
354 |
+
"title": AVAILABLE_ANALYSES["suggest_refactoring"],
|
355 |
+
"fields": {"file": "File", "line": "Line", "area": "Area"}
|
356 |
+
},
|
357 |
}
|
358 |
any_results_found = False
|
359 |
for analysis_key in requested_analyses:
|
360 |
if analysis_key in display_config:
|
361 |
+
config = display_config[analysis_key]
|
362 |
+
items = results_json.get(config["key"], [])
|
363 |
+
total_items = len(items)
|
364 |
+
st.subheader(f"{config['title']} ({total_items} found)")
|
365 |
if items:
|
366 |
+
any_results_found = True
|
367 |
+
state_key = f"visible_{analysis_key}"
|
368 |
+
if state_key not in st.session_state:
|
369 |
+
st.session_state[state_key] = RESULTS_PAGE_SIZE
|
370 |
+
visible_count = st.session_state[state_key]
|
371 |
+
items_to_display = items[:visible_count]
|
372 |
+
for item in items_to_display:
|
373 |
+
details = [
|
374 |
+
f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file'
|
375 |
+
else f"**{field_label}:** {item.get(field_key, 'N/A')}"
|
376 |
+
for field_key, field_label in config["fields"].items()
|
377 |
+
if item.get(field_key, 'N/A') != 'N/A'
|
378 |
+
]
|
379 |
st.markdown("- " + " - ".join(details))
|
380 |
+
if 'suggestion' in item:
|
381 |
+
st.code(item['suggestion'], language='text')
|
382 |
+
elif 'description' in item:
|
383 |
+
st.markdown(f" > {item['description']}")
|
384 |
+
elif 'summary' in item:
|
385 |
+
st.markdown(f" > {item['summary']}")
|
386 |
+
if total_items > visible_count:
|
387 |
if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
|
388 |
+
st.session_state[state_key] += RESULTS_PAGE_SIZE
|
389 |
+
st.rerun()
|
390 |
+
else:
|
391 |
+
st.markdown("_No items found for this category._")
|
392 |
st.divider()
|
393 |
+
if not any_results_found:
|
394 |
+
st.info("No specific findings were identified.")
|
395 |
+
st.download_button(
|
396 |
+
label="Download Full Report (JSON)",
|
397 |
+
data=json.dumps(results_json, indent=4),
|
398 |
+
file_name="code_audit_report.json",
|
399 |
+
mime="application/json"
|
400 |
+
)
|
401 |
|
402 |
# --- Streamlit App Main Interface ---
|
403 |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
|
|
|
406 |
# --- Sidebar ---
|
407 |
with st.sidebar:
|
408 |
st.header("βοΈ Analysis Controls")
|
409 |
+
st.session_state.mock_api_call = st.toggle(
|
410 |
+
"π§ͺ Enable Mock API Mode",
|
411 |
+
value=st.session_state.mock_api_call,
|
412 |
+
help="Use fake data instead of calling Gemini API."
|
413 |
+
)
|
414 |
|
415 |
st.divider()
|
416 |
st.header("β Select Model")
|
|
|
424 |
# Try to find the index of the previously selected model
|
425 |
current_model_display_name = None
|
426 |
if st.session_state.selected_model_name:
|
427 |
+
# Find display name matching the stored internal name
|
428 |
+
for disp_name, internal_name in st.session_state.available_models_dict.items():
|
429 |
+
if internal_name == st.session_state.selected_model_name:
|
430 |
+
current_model_display_name = disp_name
|
431 |
+
break
|
432 |
|
433 |
try:
|
434 |
selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
|
435 |
except ValueError:
|
436 |
+
selected_index = 0 # Default to first if previous selection not found
|
|
|
437 |
|
438 |
selected_display_name = st.selectbox(
|
439 |
"Choose Gemini model:",
|
|
|
446 |
st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
|
447 |
st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
|
448 |
elif 'GEMINI_API_KEY' in st.secrets:
|
449 |
+
st.warning("No compatible models found or error listing models. Check API Key permissions.")
|
450 |
+
st.session_state.selected_model_name = None # Ensure no model selected
|
451 |
else:
|
452 |
+
st.warning("Add GEMINI_API_KEY to secrets to list models.")
|
453 |
+
st.session_state.selected_model_name = None
|
454 |
+
else: # Mock mode is active
|
|
|
455 |
st.info("Mock API Mode ACTIVE")
|
456 |
+
st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode
|
457 |
# --- End Dynamic Model Selection ---
|
458 |
|
|
|
459 |
st.divider()
|
460 |
st.header("π Select Analyses")
|
461 |
+
selected_analyses = [
|
462 |
+
key for key, name in AVAILABLE_ANALYSES.items()
|
463 |
+
if st.checkbox(name, value=True, key=f"cb_{key}")
|
464 |
+
]
|
465 |
st.divider()
|
466 |
+
st.header("π How To Use")
|
467 |
+
st.info(
|
468 |
+
"1. Set API Key.\n"
|
469 |
+
"2. Toggle Mock Mode if needed.\n"
|
470 |
+
"3. Select Model (if not Mock).\n"
|
471 |
+
"4. Select analyses.\n"
|
472 |
+
"5. Upload ZIP.\n"
|
473 |
+
"6. Click 'Analyze'.\n"
|
474 |
+
"7. Review report."
|
475 |
+
)
|
476 |
st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
|
477 |
st.divider()
|
478 |
st.warning("β οΈ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
|
479 |
|
|
|
480 |
# Update title dynamically based on selected model
|
481 |
if st.session_state.selected_model_name and not st.session_state.mock_api_call:
|
482 |
st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
|
483 |
elif st.session_state.mock_api_call:
|
484 |
+
st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
|
485 |
else:
|
486 |
+
st.markdown("Upload codebase (`.zip`) for analysis.")
|
|
|
487 |
|
488 |
# --- Main Content Area ---
|
489 |
+
uploaded_file = st.file_uploader(
|
490 |
+
"π Upload Codebase ZIP File",
|
491 |
+
type=['zip'],
|
492 |
+
key="file_uploader",
|
493 |
+
on_change=lambda: st.session_state.update(
|
494 |
+
analysis_results=None,
|
495 |
+
error_message=None,
|
496 |
+
analysis_requested=False
|
497 |
+
)
|
498 |
+
)
|
499 |
analysis_button_placeholder = st.empty()
|
500 |
results_placeholder = st.container()
|
501 |
|
|
|
503 |
st.success(f"β
File '{uploaded_file.name}' uploaded.")
|
504 |
uploaded_file_bytes = uploaded_file.getvalue()
|
505 |
file_id = f"{uploaded_file.name}-{uploaded_file.size}"
|
506 |
+
code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
|
507 |
+
file_id, uploaded_file.size, uploaded_file_bytes
|
508 |
+
)
|
509 |
if code_files is not None:
|
510 |
st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
|
511 |
if ignored_files:
|
512 |
+
with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
|
513 |
+
st.code("\n".join(ignored_files), language='text')
|
514 |
|
515 |
# Disable button if no model selected (and not in mock mode)
|
516 |
model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
|
517 |
analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
|
518 |
analyze_button_label = "Analyze Codebase"
|
519 |
+
if not model_ready:
|
520 |
+
analyze_button_label = "Select Model First"
|
521 |
+
elif analyze_button_disabled:
|
522 |
+
analyze_button_label = "Select Analyses or Upload Valid Code"
|
523 |
+
|
524 |
+
if analysis_button_placeholder.button(
|
525 |
+
analyze_button_label,
|
526 |
+
type="primary",
|
527 |
+
disabled=analyze_button_disabled
|
528 |
+
):
|
529 |
+
st.session_state.analysis_requested = True
|
530 |
+
st.session_state.analysis_results = None
|
531 |
+
st.session_state.error_message = None
|
532 |
+
if not selected_analyses:
|
533 |
+
st.warning("Please select analysis types.")
|
534 |
+
elif file_count == 0:
|
535 |
+
st.warning("No relevant code files found.")
|
536 |
+
elif not model_ready:
|
537 |
+
st.warning("Please select a Gemini model from the sidebar.")
|
538 |
else:
|
539 |
with results_placeholder:
|
540 |
+
spinner_model_name = (
|
541 |
+
st.session_state.selected_model_name
|
542 |
+
if not st.session_state.mock_api_call
|
543 |
+
else "Mock Mode"
|
544 |
+
)
|
545 |
+
spinner_msg = f"π Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
|
546 |
+
with st.spinner(spinner_msg):
|
547 |
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
|
548 |
if analysis_prompt and included_files_in_prompt:
|
549 |
results_json, error_msg = call_gemini_api(analysis_prompt)
|
550 |
+
st.session_state.analysis_results = results_json
|
551 |
+
st.session_state.error_message = error_msg
|
552 |
+
elif not included_files_in_prompt:
|
553 |
+
st.session_state.error_message = "Could not proceed: No files included."
|
554 |
+
else:
|
555 |
+
st.session_state.error_message = "Failed to generate analysis prompt."
|
556 |
st.rerun()
|
557 |
|
558 |
# Display results (Keep the same logic)
|
559 |
if st.session_state.analysis_requested:
|
560 |
+
with results_placeholder:
|
561 |
+
st.divider()
|
562 |
+
if st.session_state.error_message:
|
563 |
+
st.error(f"Analysis Failed: {st.session_state.error_message}")
|
564 |
+
if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
|
565 |
+
st.subheader("Raw AI Response")
|
566 |
+
st.code(st.session_state.analysis_results["raw_response"], language='text')
|
567 |
+
elif st.session_state.analysis_results:
|
568 |
+
display_results(st.session_state.analysis_results, selected_analyses)
|
569 |
+
else:
|
570 |
+
st.info("Analysis initiated, but no results/errors stored.")
|
571 |
+
elif not uploaded_file:
|
572 |
+
results_placeholder.info("Upload a ZIP file to begin.")
|
573 |
|
574 |
results_placeholder.divider()
|
575 |
+
results_placeholder.markdown("_Assistant powered by Google Gemini._")
|