mgbam commited on
Commit
f25bc7a
Β·
verified Β·
1 Parent(s): f8f2363

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -34
app.py CHANGED
@@ -3,17 +3,17 @@ import google.generativeai as genai
3
  import zipfile
4
  import io
5
  import json
6
- import os # Still needed for API key potentially, but not model names
7
  from pathlib import Path
8
  import time
9
  import plotly.express as px
10
  import pandas as pd
11
 
12
  # --- Configuration ---
13
- MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
14
  RESULTS_PAGE_SIZE = 25
15
 
16
- AVAILABLE_ANALYSES = { # Keep analyses config
17
  "generate_docs": "Generate Missing Docstrings/Comments",
18
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
19
  "check_style": "Check Style Guide Compliance (General)",
@@ -35,9 +35,9 @@ if 'error_message' not in st.session_state:
35
  if 'analysis_requested' not in st.session_state:
36
  st.session_state.analysis_requested = False
37
  if 'selected_model_name' not in st.session_state:
38
- st.session_state.selected_model_name = None # Will hold the "models/..." name
39
  if 'available_models_dict' not in st.session_state:
40
- st.session_state.available_models_dict = {} # Mapping display_name -> name
41
 
42
  # --- Gemini API Setup & Model Discovery ---
43
  model = None # Global variable for the initialized model instance
@@ -97,7 +97,7 @@ def estimate_token_count(text):
97
  """
98
  Estimates the token count.
99
  If a string is provided, calculates based on its length.
100
- If an integer (e.g. total char count) is provided, uses that directly.
101
  """
102
  if isinstance(text, int):
103
  return text // 3
@@ -164,18 +164,22 @@ def process_zip_file_cached(file_id, file_size, file_content_bytes):
164
 
165
  def construct_analysis_prompt(code_files_dict, requested_analyses):
166
  """
167
- Constructs the prompt for analysis by including code files and a JSON structure for output.
 
168
  Returns the full prompt and a list of included files.
169
  """
170
- prompt_parts = ["Analyze the following codebase...\n\n"]
171
- current_token_estimate = estimate_token_count(prompt_parts[0])
 
 
 
 
 
 
 
172
  included_files = []
173
  code_segments = []
174
- prompt_status = st.empty()
175
 
176
- if len(code_files_dict) > 50:
177
- prompt_status.info("Constructing prompt...")
178
-
179
  for filename, content in code_files_dict.items():
180
  segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
181
  segment_token_estimate = estimate_token_count(segment)
@@ -184,32 +188,47 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
184
  current_token_estimate += segment_token_estimate
185
  included_files.append(filename)
186
  else:
187
- st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
188
  break
189
- prompt_status.empty()
190
 
191
  if not included_files:
192
  st.error("🚨 No code files included in prompt.")
193
  return None, []
194
 
195
  prompt_parts.append("".join(code_segments))
196
- json_structure_description = "{\n"
197
- structure_parts = []
198
-
199
  if "generate_docs" in requested_analyses:
200
- structure_parts.append(' "documentation_suggestions": [...]')
 
 
201
  if "find_bugs" in requested_analyses:
202
- structure_parts.append(' "potential_bugs": [...]')
 
 
203
  if "check_style" in requested_analyses:
204
- structure_parts.append(' "style_issues": [...]')
 
 
205
  if "summarize_modules" in requested_analyses:
206
- structure_parts.append(' "module_summaries": [...]')
 
 
207
  if "suggest_refactoring" in requested_analyses:
208
- structure_parts.append(' "refactoring_suggestions": [...]')
209
-
210
- json_structure_description += ",\n".join(structure_parts) + "\n}"
211
- prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
212
- prompt_parts.append(prompt_footer)
 
 
 
 
 
 
 
 
 
 
213
 
214
  full_prompt = "".join(prompt_parts)
215
  return full_prompt, included_files
@@ -307,7 +326,7 @@ def call_gemini_api(prompt):
307
 
308
  def display_results(results_json, requested_analyses):
309
  """
310
- Displays the analysis results with pagination and allows JSON download.
311
  """
312
  st.header("πŸ“Š Analysis Report")
313
  if not isinstance(results_json, dict):
@@ -416,7 +435,6 @@ with st.sidebar:
416
  value=st.session_state.mock_api_call,
417
  help="Use fake data instead of calling Gemini API."
418
  )
419
-
420
  st.divider()
421
  st.header("β™Š Select Model")
422
  if not st.session_state.mock_api_call:
@@ -451,11 +469,10 @@ with st.sidebar:
451
  else:
452
  st.info("Mock API Mode ACTIVE")
453
  st.session_state.selected_model_name = "mock_model"
454
-
455
  st.divider()
456
  st.header("πŸ”Ž Select Analyses")
457
  selected_analyses = [
458
- key for key, name in AVAILABLE_ANALYSES.items()
459
  if st.checkbox(name, value=True, key=f"cb_{key}")
460
  ]
461
  st.divider()
@@ -524,7 +541,7 @@ if uploaded_file:
524
  analyze_button_label = "Select Model First"
525
  elif analyze_button_disabled:
526
  analyze_button_label = "Select Analyses or Upload Valid Code"
527
-
528
  if analysis_button_placeholder.button(
529
  analyze_button_label,
530
  type="primary",
@@ -542,8 +559,8 @@ if uploaded_file:
542
  else:
543
  with results_placeholder:
544
  spinner_model_name = (
545
- st.session_state.selected_model_name
546
- if not st.session_state.mock_api_call
547
  else "Mock Mode"
548
  )
549
  spinner_msg = f"πŸš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
 
3
  import zipfile
4
  import io
5
  import json
6
+ import os # For API key usage
7
  from pathlib import Path
8
  import time
9
  import plotly.express as px
10
  import pandas as pd
11
 
12
  # --- Configuration ---
13
+ MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Estimated token limit for the prompt
14
  RESULTS_PAGE_SIZE = 25
15
 
16
+ AVAILABLE_ANALYSES = {
17
  "generate_docs": "Generate Missing Docstrings/Comments",
18
  "find_bugs": "Identify Potential Bugs & Anti-patterns",
19
  "check_style": "Check Style Guide Compliance (General)",
 
35
  if 'analysis_requested' not in st.session_state:
36
  st.session_state.analysis_requested = False
37
  if 'selected_model_name' not in st.session_state:
38
+ st.session_state.selected_model_name = None # Holds internal model name
39
  if 'available_models_dict' not in st.session_state:
40
+ st.session_state.available_models_dict = {} # Mapping: display_name -> internal name
41
 
42
  # --- Gemini API Setup & Model Discovery ---
43
  model = None # Global variable for the initialized model instance
 
97
  """
98
  Estimates the token count.
99
  If a string is provided, calculates based on its length.
100
+ If an integer (total char count) is provided, uses that directly.
101
  """
102
  if isinstance(text, int):
103
  return text // 3
 
164
 
165
  def construct_analysis_prompt(code_files_dict, requested_analyses):
166
  """
167
+ Constructs the prompt for analysis by including code files and structured instructions.
168
+ The prompt now requests detailed feedback, including line references, severity, and recommended fixes.
169
  Returns the full prompt and a list of included files.
170
  """
171
+ prompt_parts = [
172
+ "You are a highly skilled code auditor. Analyze the following codebase in detail.\n",
173
+ "For each issue, provide:\n",
174
+ " - A short summary with line references (or approximate line references).\n",
175
+ " - A severity level (Low, Medium, High).\n",
176
+ " - A recommended fix or code snippet if applicable.\n\n",
177
+ "Here is the code:\n\n"
178
+ ]
179
+ current_token_estimate = estimate_token_count("".join(prompt_parts))
180
  included_files = []
181
  code_segments = []
 
182
 
 
 
 
183
  for filename, content in code_files_dict.items():
184
  segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
185
  segment_token_estimate = estimate_token_count(segment)
 
188
  current_token_estimate += segment_token_estimate
189
  included_files.append(filename)
190
  else:
191
+ st.warning(f"⚠️ Exceeded context limit after {len(included_files)} files.")
192
  break
 
193
 
194
  if not included_files:
195
  st.error("🚨 No code files included in prompt.")
196
  return None, []
197
 
198
  prompt_parts.append("".join(code_segments))
199
+ prompt_parts.append("\n\nYour tasks are:\n")
 
 
200
  if "generate_docs" in requested_analyses:
201
+ prompt_parts.append(
202
+ "1) Generate missing docstrings/comments using PEP 257 style. Provide recommended text and line references.\n"
203
+ )
204
  if "find_bugs" in requested_analyses:
205
+ prompt_parts.append(
206
+ "2) Identify potential bugs & anti-patterns. For each, include severity, line references, and a recommended fix.\n"
207
+ )
208
  if "check_style" in requested_analyses:
209
+ prompt_parts.append(
210
+ "3) Check style guide compliance (PEP 8 or similar). Include line references, severity, and suggested changes.\n"
211
+ )
212
  if "summarize_modules" in requested_analyses:
213
+ prompt_parts.append(
214
+ "4) Summarize each module/file by describing its primary responsibilities.\n"
215
+ )
216
  if "suggest_refactoring" in requested_analyses:
217
+ prompt_parts.append(
218
+ "5) Suggest refactoring opportunities with code snippets and justification, including line references.\n"
219
+ )
220
+
221
+ prompt_parts.append(
222
+ "\nFormat your response in valid JSON with the following structure:\n"
223
+ "{\n"
224
+ " \"documentation_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
225
+ " \"potential_bugs\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
226
+ " \"style_issues\": [ ... ],\n"
227
+ " \"module_summaries\": [ {\"file\": \"...\", \"summary\": \"...\"}, ... ],\n"
228
+ " \"refactoring_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"area\": \"...\", \"summary\": \"...\", \"suggestion\": \"...\"}, ... ]\n"
229
+ "}\n"
230
+ "Only output valid JSON (no markdown formatting)!\n"
231
+ )
232
 
233
  full_prompt = "".join(prompt_parts)
234
  return full_prompt, included_files
 
326
 
327
  def display_results(results_json, requested_analyses):
328
  """
329
+ Displays the analysis results with pagination and a JSON download option.
330
  """
331
  st.header("πŸ“Š Analysis Report")
332
  if not isinstance(results_json, dict):
 
435
  value=st.session_state.mock_api_call,
436
  help="Use fake data instead of calling Gemini API."
437
  )
 
438
  st.divider()
439
  st.header("β™Š Select Model")
440
  if not st.session_state.mock_api_call:
 
469
  else:
470
  st.info("Mock API Mode ACTIVE")
471
  st.session_state.selected_model_name = "mock_model"
 
472
  st.divider()
473
  st.header("πŸ”Ž Select Analyses")
474
  selected_analyses = [
475
+ key for key, name in AVAILABLE_ANALYSES.items()
476
  if st.checkbox(name, value=True, key=f"cb_{key}")
477
  ]
478
  st.divider()
 
541
  analyze_button_label = "Select Model First"
542
  elif analyze_button_disabled:
543
  analyze_button_label = "Select Analyses or Upload Valid Code"
544
+
545
  if analysis_button_placeholder.button(
546
  analyze_button_label,
547
  type="primary",
 
559
  else:
560
  with results_placeholder:
561
  spinner_model_name = (
562
+ st.session_state.selected_model_name
563
+ if not st.session_state.mock_api_call
564
  else "Mock Mode"
565
  )
566
  spinner_msg = f"πŸš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait."