Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,17 @@ import google.generativeai as genai
|
|
3 |
import zipfile
|
4 |
import io
|
5 |
import json
|
6 |
-
import os #
|
7 |
from pathlib import Path
|
8 |
import time
|
9 |
import plotly.express as px
|
10 |
import pandas as pd
|
11 |
|
12 |
# --- Configuration ---
|
13 |
-
MAX_PROMPT_TOKENS_ESTIMATE = 800000 #
|
14 |
RESULTS_PAGE_SIZE = 25
|
15 |
|
16 |
-
AVAILABLE_ANALYSES = {
|
17 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
18 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
19 |
"check_style": "Check Style Guide Compliance (General)",
|
@@ -35,9 +35,9 @@ if 'error_message' not in st.session_state:
|
|
35 |
if 'analysis_requested' not in st.session_state:
|
36 |
st.session_state.analysis_requested = False
|
37 |
if 'selected_model_name' not in st.session_state:
|
38 |
-
st.session_state.selected_model_name = None #
|
39 |
if 'available_models_dict' not in st.session_state:
|
40 |
-
st.session_state.available_models_dict = {} # Mapping display_name -> name
|
41 |
|
42 |
# --- Gemini API Setup & Model Discovery ---
|
43 |
model = None # Global variable for the initialized model instance
|
@@ -97,7 +97,7 @@ def estimate_token_count(text):
|
|
97 |
"""
|
98 |
Estimates the token count.
|
99 |
If a string is provided, calculates based on its length.
|
100 |
-
If an integer (
|
101 |
"""
|
102 |
if isinstance(text, int):
|
103 |
return text // 3
|
@@ -164,18 +164,22 @@ def process_zip_file_cached(file_id, file_size, file_content_bytes):
|
|
164 |
|
165 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
166 |
"""
|
167 |
-
Constructs the prompt for analysis by including code files and
|
|
|
168 |
Returns the full prompt and a list of included files.
|
169 |
"""
|
170 |
-
prompt_parts = [
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
included_files = []
|
173 |
code_segments = []
|
174 |
-
prompt_status = st.empty()
|
175 |
|
176 |
-
if len(code_files_dict) > 50:
|
177 |
-
prompt_status.info("Constructing prompt...")
|
178 |
-
|
179 |
for filename, content in code_files_dict.items():
|
180 |
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
|
181 |
segment_token_estimate = estimate_token_count(segment)
|
@@ -184,32 +188,47 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
|
|
184 |
current_token_estimate += segment_token_estimate
|
185 |
included_files.append(filename)
|
186 |
else:
|
187 |
-
st.warning(f"β οΈ
|
188 |
break
|
189 |
-
prompt_status.empty()
|
190 |
|
191 |
if not included_files:
|
192 |
st.error("π¨ No code files included in prompt.")
|
193 |
return None, []
|
194 |
|
195 |
prompt_parts.append("".join(code_segments))
|
196 |
-
|
197 |
-
structure_parts = []
|
198 |
-
|
199 |
if "generate_docs" in requested_analyses:
|
200 |
-
|
|
|
|
|
201 |
if "find_bugs" in requested_analyses:
|
202 |
-
|
|
|
|
|
203 |
if "check_style" in requested_analyses:
|
204 |
-
|
|
|
|
|
205 |
if "summarize_modules" in requested_analyses:
|
206 |
-
|
|
|
|
|
207 |
if "suggest_refactoring" in requested_analyses:
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
prompt_parts.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
full_prompt = "".join(prompt_parts)
|
215 |
return full_prompt, included_files
|
@@ -307,7 +326,7 @@ def call_gemini_api(prompt):
|
|
307 |
|
308 |
def display_results(results_json, requested_analyses):
|
309 |
"""
|
310 |
-
Displays the analysis results with pagination and
|
311 |
"""
|
312 |
st.header("π Analysis Report")
|
313 |
if not isinstance(results_json, dict):
|
@@ -416,7 +435,6 @@ with st.sidebar:
|
|
416 |
value=st.session_state.mock_api_call,
|
417 |
help="Use fake data instead of calling Gemini API."
|
418 |
)
|
419 |
-
|
420 |
st.divider()
|
421 |
st.header("β Select Model")
|
422 |
if not st.session_state.mock_api_call:
|
@@ -451,11 +469,10 @@ with st.sidebar:
|
|
451 |
else:
|
452 |
st.info("Mock API Mode ACTIVE")
|
453 |
st.session_state.selected_model_name = "mock_model"
|
454 |
-
|
455 |
st.divider()
|
456 |
st.header("π Select Analyses")
|
457 |
selected_analyses = [
|
458 |
-
key for key, name in AVAILABLE_ANALYSES.items()
|
459 |
if st.checkbox(name, value=True, key=f"cb_{key}")
|
460 |
]
|
461 |
st.divider()
|
@@ -524,7 +541,7 @@ if uploaded_file:
|
|
524 |
analyze_button_label = "Select Model First"
|
525 |
elif analyze_button_disabled:
|
526 |
analyze_button_label = "Select Analyses or Upload Valid Code"
|
527 |
-
|
528 |
if analysis_button_placeholder.button(
|
529 |
analyze_button_label,
|
530 |
type="primary",
|
@@ -542,8 +559,8 @@ if uploaded_file:
|
|
542 |
else:
|
543 |
with results_placeholder:
|
544 |
spinner_model_name = (
|
545 |
-
st.session_state.selected_model_name
|
546 |
-
if not st.session_state.mock_api_call
|
547 |
else "Mock Mode"
|
548 |
)
|
549 |
spinner_msg = f"π Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
|
|
|
3 |
import zipfile
|
4 |
import io
|
5 |
import json
|
6 |
+
import os # For API key usage
|
7 |
from pathlib import Path
|
8 |
import time
|
9 |
import plotly.express as px
|
10 |
import pandas as pd
|
11 |
|
12 |
# --- Configuration ---
|
13 |
+
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Estimated token limit for the prompt
|
14 |
RESULTS_PAGE_SIZE = 25
|
15 |
|
16 |
+
AVAILABLE_ANALYSES = {
|
17 |
"generate_docs": "Generate Missing Docstrings/Comments",
|
18 |
"find_bugs": "Identify Potential Bugs & Anti-patterns",
|
19 |
"check_style": "Check Style Guide Compliance (General)",
|
|
|
35 |
if 'analysis_requested' not in st.session_state:
|
36 |
st.session_state.analysis_requested = False
|
37 |
if 'selected_model_name' not in st.session_state:
|
38 |
+
st.session_state.selected_model_name = None # Holds internal model name
|
39 |
if 'available_models_dict' not in st.session_state:
|
40 |
+
st.session_state.available_models_dict = {} # Mapping: display_name -> internal name
|
41 |
|
42 |
# --- Gemini API Setup & Model Discovery ---
|
43 |
model = None # Global variable for the initialized model instance
|
|
|
97 |
"""
|
98 |
Estimates the token count.
|
99 |
If a string is provided, calculates based on its length.
|
100 |
+
If an integer (total char count) is provided, uses that directly.
|
101 |
"""
|
102 |
if isinstance(text, int):
|
103 |
return text // 3
|
|
|
164 |
|
165 |
def construct_analysis_prompt(code_files_dict, requested_analyses):
|
166 |
"""
|
167 |
+
Constructs the prompt for analysis by including code files and structured instructions.
|
168 |
+
The prompt now requests detailed feedback, including line references, severity, and recommended fixes.
|
169 |
Returns the full prompt and a list of included files.
|
170 |
"""
|
171 |
+
prompt_parts = [
|
172 |
+
"You are a highly skilled code auditor. Analyze the following codebase in detail.\n",
|
173 |
+
"For each issue, provide:\n",
|
174 |
+
" - A short summary with line references (or approximate line references).\n",
|
175 |
+
" - A severity level (Low, Medium, High).\n",
|
176 |
+
" - A recommended fix or code snippet if applicable.\n\n",
|
177 |
+
"Here is the code:\n\n"
|
178 |
+
]
|
179 |
+
current_token_estimate = estimate_token_count("".join(prompt_parts))
|
180 |
included_files = []
|
181 |
code_segments = []
|
|
|
182 |
|
|
|
|
|
|
|
183 |
for filename, content in code_files_dict.items():
|
184 |
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
|
185 |
segment_token_estimate = estimate_token_count(segment)
|
|
|
188 |
current_token_estimate += segment_token_estimate
|
189 |
included_files.append(filename)
|
190 |
else:
|
191 |
+
st.warning(f"β οΈ Exceeded context limit after {len(included_files)} files.")
|
192 |
break
|
|
|
193 |
|
194 |
if not included_files:
|
195 |
st.error("π¨ No code files included in prompt.")
|
196 |
return None, []
|
197 |
|
198 |
prompt_parts.append("".join(code_segments))
|
199 |
+
prompt_parts.append("\n\nYour tasks are:\n")
|
|
|
|
|
200 |
if "generate_docs" in requested_analyses:
|
201 |
+
prompt_parts.append(
|
202 |
+
"1) Generate missing docstrings/comments using PEP 257 style. Provide recommended text and line references.\n"
|
203 |
+
)
|
204 |
if "find_bugs" in requested_analyses:
|
205 |
+
prompt_parts.append(
|
206 |
+
"2) Identify potential bugs & anti-patterns. For each, include severity, line references, and a recommended fix.\n"
|
207 |
+
)
|
208 |
if "check_style" in requested_analyses:
|
209 |
+
prompt_parts.append(
|
210 |
+
"3) Check style guide compliance (PEP 8 or similar). Include line references, severity, and suggested changes.\n"
|
211 |
+
)
|
212 |
if "summarize_modules" in requested_analyses:
|
213 |
+
prompt_parts.append(
|
214 |
+
"4) Summarize each module/file by describing its primary responsibilities.\n"
|
215 |
+
)
|
216 |
if "suggest_refactoring" in requested_analyses:
|
217 |
+
prompt_parts.append(
|
218 |
+
"5) Suggest refactoring opportunities with code snippets and justification, including line references.\n"
|
219 |
+
)
|
220 |
+
|
221 |
+
prompt_parts.append(
|
222 |
+
"\nFormat your response in valid JSON with the following structure:\n"
|
223 |
+
"{\n"
|
224 |
+
" \"documentation_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
|
225 |
+
" \"potential_bugs\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
|
226 |
+
" \"style_issues\": [ ... ],\n"
|
227 |
+
" \"module_summaries\": [ {\"file\": \"...\", \"summary\": \"...\"}, ... ],\n"
|
228 |
+
" \"refactoring_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"area\": \"...\", \"summary\": \"...\", \"suggestion\": \"...\"}, ... ]\n"
|
229 |
+
"}\n"
|
230 |
+
"Only output valid JSON (no markdown formatting)!\n"
|
231 |
+
)
|
232 |
|
233 |
full_prompt = "".join(prompt_parts)
|
234 |
return full_prompt, included_files
|
|
|
326 |
|
327 |
def display_results(results_json, requested_analyses):
|
328 |
"""
|
329 |
+
Displays the analysis results with pagination and a JSON download option.
|
330 |
"""
|
331 |
st.header("π Analysis Report")
|
332 |
if not isinstance(results_json, dict):
|
|
|
435 |
value=st.session_state.mock_api_call,
|
436 |
help="Use fake data instead of calling Gemini API."
|
437 |
)
|
|
|
438 |
st.divider()
|
439 |
st.header("β Select Model")
|
440 |
if not st.session_state.mock_api_call:
|
|
|
469 |
else:
|
470 |
st.info("Mock API Mode ACTIVE")
|
471 |
st.session_state.selected_model_name = "mock_model"
|
|
|
472 |
st.divider()
|
473 |
st.header("π Select Analyses")
|
474 |
selected_analyses = [
|
475 |
+
key for key, name in AVAILABLE_ANALYSES.items()
|
476 |
if st.checkbox(name, value=True, key=f"cb_{key}")
|
477 |
]
|
478 |
st.divider()
|
|
|
541 |
analyze_button_label = "Select Model First"
|
542 |
elif analyze_button_disabled:
|
543 |
analyze_button_label = "Select Analyses or Upload Valid Code"
|
544 |
+
|
545 |
if analysis_button_placeholder.button(
|
546 |
analyze_button_label,
|
547 |
type="primary",
|
|
|
559 |
else:
|
560 |
with results_placeholder:
|
561 |
spinner_model_name = (
|
562 |
+
st.session_state.selected_model_name
|
563 |
+
if not st.session_state.mock_api_call
|
564 |
else "Mock Mode"
|
565 |
)
|
566 |
spinner_msg = f"π Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
|