mgbam's picture
Update app.py
ad3750e verified
raw
history blame
26.9 kB
import streamlit as st
import google.generativeai as genai
import zipfile
import io
import json
import os # Still needed for API key potentially, but not model names
from pathlib import Path
import time
# --- Configuration ---
# Model names are now discovered dynamically. Remove hardcoded names.
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate
RESULTS_PAGE_SIZE = 25
AVAILABLE_ANALYSES = { # Keep analyses config
"generate_docs": "Generate Missing Docstrings/Comments",
"find_bugs": "Identify Potential Bugs & Anti-patterns",
"check_style": "Check Style Guide Compliance (General)",
"summarize_modules": "Summarize Complex Modules/Files",
"suggest_refactoring": "Suggest Refactoring Opportunities",
}
CODE_EXTENSIONS = {
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb',
'.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
} # Keep extensions
# --- Session State Initialization ---
# (Keep most session state, add one for the selected model)
if 'mock_api_call' not in st.session_state:
st.session_state.mock_api_call = False
if 'analysis_results' not in st.session_state:
st.session_state.analysis_results = None
if 'error_message' not in st.session_state:
st.session_state.error_message = None
if 'analysis_requested' not in st.session_state:
st.session_state.analysis_requested = False
if 'selected_model_name' not in st.session_state:
st.session_state.selected_model_name = None # Will hold the "models/..." name
if 'available_models_dict' not in st.session_state:
st.session_state.available_models_dict = {} # Store display_name -> name mapping
# --- Gemini API Setup & Model Discovery ---
model = None # Global variable for the initialized model instance
# --- NEW: Function to list available models ---
@st.cache_data(ttl=3600) # Cache model list for an hour
def get_available_models():
"""Lists models supporting 'generateContent' using the API key."""
model_dict = {}
try:
if 'GEMINI_API_KEY' not in st.secrets:
# Don't stop here, let the main part handle it, but return empty
print("API key not found in secrets during model listing attempt.")
return {}
# Configure API key temporarily just for listing
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
print("Listing available models via API...")
for m in genai.list_models():
# Check if the model supports the 'generateContent' method
if 'generateContent' in m.supported_generation_methods:
# Store mapping: user-friendly name -> internal name
model_dict[m.display_name] = m.name
print(f"Found {len(model_dict)} compatible models.")
return model_dict
except Exception as e:
st.error(f"🚨 Error listing available models: {e}")
return {} # Return empty on error
def initialize_gemini_model():
"""Initializes the Gemini model based on the selected name."""
global model
selected_name = st.session_state.get('selected_model_name')
if selected_name and model is None and not st.session_state.mock_api_call:
try:
if 'GEMINI_API_KEY' not in st.secrets:
st.error("🚨 Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
st.stop() # Stop if key missing for initialization
# Configure API key (might be redundant if list_models worked, but safe)
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
print(f"Initializing Gemini Model: {selected_name}")
# Use the selected model name from session state
model = genai.GenerativeModel(model_name=selected_name)
print(f"Gemini Model Initialized ({selected_name}).")
return True
except Exception as e:
st.error(f"🚨 Error initializing selected Gemini model '{selected_name}': {e}")
st.session_state.selected_model_name = None # Reset selection on error
st.stop()
return False
elif st.session_state.mock_api_call:
return True # No init needed for mock mode
elif model is not None and model.model_name == selected_name:
return True # Already initialized with the correct model
elif model is not None and model.model_name != selected_name:
print("Model changed. Re-initializing...")
model = None # Reset model instance
return initialize_gemini_model() # Recurse to re-initialize with new name
elif not selected_name and not st.session_state.mock_api_call:
# This case happens if no model is selected yet
return False # Cannot initialize without a selection
return False # Default case
# --- Helper Functions ---
# Updated estimate_token_count to support integers and strings
def estimate_token_count(text):
"""
Estimates the token count.
If a string is provided, it calculates based on its length.
If an integer is provided (e.g., total character count), it uses that directly.
"""
if isinstance(text, int):
return text // 3
return len(text) // 3
@st.cache_data(max_entries=5)
def process_zip_file_cached(file_id, file_size, file_content_bytes):
"""
Processes a ZIP file and extracts code files.
Returns a tuple of (code_files dict, total_chars, file_count, ignored_files list).
"""
code_files = {}
total_chars = 0
file_count = 0
ignored_files = []
status_placeholder = st.empty()
progress_bar = status_placeholder.progress(0)
try:
with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref:
members = zip_ref.infolist()
total_members = len(members)
for i, member in enumerate(members):
if i % 10 == 0:
progress_bar.progress(int((i / total_members) * 100))
if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename:
continue
file_path = Path(member.filename)
if file_path.suffix.lower() in CODE_EXTENSIONS:
try:
with zip_ref.open(member) as file:
file_bytes = file.read()
try:
content = file_bytes.decode('utf-8')
except UnicodeDecodeError:
try:
content = file_bytes.decode('latin-1')
except Exception as decode_err:
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
continue
code_files[member.filename] = content
total_chars += len(content)
file_count += 1
except Exception as read_err:
ignored_files.append(f"{member.filename} (Read Error: {read_err})")
else:
if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename):
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
progress_bar.progress(100)
status_placeholder.empty()
except zipfile.BadZipFile:
status_placeholder.empty()
st.error("🚨 Invalid ZIP.")
return None, 0, 0, []
except Exception as e:
status_placeholder.empty()
st.error(f"🚨 ZIP Error: {e}")
return None, 0, 0, []
if file_count == 0:
if not ignored_files:
st.warning("No code files found.")
else:
st.warning("No code files found; some skipped.")
return code_files, total_chars, file_count, ignored_files
def construct_analysis_prompt(code_files_dict, requested_analyses):
"""
Constructs the prompt for analysis by including code files and JSON structure for expected output.
Returns the full prompt and a list of included files.
"""
prompt_parts = ["Analyze the following codebase...\n\n"]
current_token_estimate = estimate_token_count(prompt_parts[0])
included_files = []
code_segments = []
prompt_status = st.empty()
if len(code_files_dict) > 50:
prompt_status.info("Constructing prompt...")
for filename, content in code_files_dict.items():
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
segment_token_estimate = estimate_token_count(segment)
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
code_segments.append(segment)
current_token_estimate += segment_token_estimate
included_files.append(filename)
else:
st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
break
prompt_status.empty()
if not included_files:
st.error("🚨 No code files included in prompt.")
return None, []
prompt_parts.append("".join(code_segments))
json_structure_description = "{\n"
structure_parts = []
if "generate_docs" in requested_analyses:
structure_parts.append(' "documentation_suggestions": [...]')
if "find_bugs" in requested_analyses:
structure_parts.append(' "potential_bugs": [...]')
if "check_style" in requested_analyses:
structure_parts.append(' "style_issues": [...]')
if "summarize_modules" in requested_analyses:
structure_parts.append(' "module_summaries": [...]')
if "suggest_refactoring" in requested_analyses:
structure_parts.append(' "refactoring_suggestions": [...]')
json_structure_description += ",\n".join(structure_parts) + "\n}"
prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
prompt_parts.append(prompt_footer)
full_prompt = "".join(prompt_parts)
return full_prompt, included_files
def call_gemini_api(prompt):
"""
Calls the Gemini API using the provided prompt.
Returns the parsed JSON insights or an error message.
"""
if not prompt:
return None, "Prompt generation failed."
# MOCK MODE
if st.session_state.mock_api_call:
st.info(" MOCK MODE: Simulating API call...")
time.sleep(1)
mock_json_response = json.dumps({
"documentation_suggestions": [],
"potential_bugs": [],
"style_issues": [],
"module_summaries": [],
"refactoring_suggestions": []
})
st.success("Mock response generated.")
return json.loads(mock_json_response), None
# REAL API CALL
else:
if not initialize_gemini_model():
return None, "Gemini Model Initialization Failed."
if model is None:
return None, "Gemini model not selected or available." # Added check
try:
api_status = st.empty()
api_status.info(f"πŸ“‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.")
start_time = time.time()
response = model.generate_content(
prompt,
generation_config=genai.types.GenerationConfig(temperature=0.2),
safety_settings=[
{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
"HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
]
)
end_time = time.time()
api_status.success(f"βœ… Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.")
time.sleep(1)
api_status.empty()
try:
json_response_text = response.text.strip()
# Remove markdown code fences if present
if json_response_text.startswith("```json"):
json_response_text = json_response_text[7:]
if json_response_text.startswith("```"):
json_response_text = json_response_text[3:]
if json_response_text.endswith("```"):
json_response_text = json_response_text[:-3]
json_start = json_response_text.find('{')
json_end = json_response_text.rfind('}') + 1
if json_start != -1 and json_end != -1 and json_end > json_start:
final_json_text = json_response_text[json_start:json_end]
insights = json.loads(final_json_text)
return insights, None
else:
st.warning("⚠️ Could not find valid JSON object.")
return {"raw_response": response.text}, "AI response did not contain clear JSON object."
except json.JSONDecodeError as json_err:
st.error(f"🚨 Error parsing JSON: {json_err}")
st.code(response.text, language='text')
return None, f"AI response not valid JSON: {json_err}"
except AttributeError:
st.error("🚨 Unexpected API response structure (AttributeError).")
st.code(f"Response object: {response}", language='text')
return None, "Unexpected response structure (AttributeError)."
except Exception as e:
st.error(f"🚨 Unexpected issue processing response: {e}")
try:
st.code(f"Response object: {response}", language='text')
except Exception:
pass
return None, f"Unexpected response structure: {e}"
except Exception as e:
api_status.empty()
st.error(f"🚨 API call error: {e}")
error_msg = f"API call failed: {e}"
if hasattr(e, 'message'):
if "429" in e.message:
error_msg = "API Quota Exceeded or Rate Limit hit."
elif "API key not valid" in e.message:
error_msg = "Invalid Gemini API Key."
elif "permission denied" in e.message.lower():
error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access."
elif "blocked" in e.message.lower():
error_msg = "Content blocked due to safety settings."
elif "block_reason: SAFETY" in str(e):
error_msg = "Content blocked due to safety settings."
return None, error_msg
def display_results(results_json, requested_analyses):
"""
Displays the analysis results with pagination and allows JSON download.
"""
st.header("πŸ“Š Analysis Report")
if not isinstance(results_json, dict):
st.error("Invalid results format.")
st.json(results_json)
return
if "raw_response" in results_json:
st.subheader("Raw AI Response (JSON Parsing Failed)")
st.code(results_json["raw_response"], language='text')
return
display_config = {
"generate_docs": {
"key": "documentation_suggestions",
"title": AVAILABLE_ANALYSES["generate_docs"],
"fields": {"file": "File", "line": "Line"}
},
"find_bugs": {
"key": "potential_bugs",
"title": AVAILABLE_ANALYSES["find_bugs"],
"fields": {"file": "File", "line": "Line", "severity": "Severity"}
},
"check_style": {
"key": "style_issues",
"title": AVAILABLE_ANALYSES["check_style"],
"fields": {"file": "File", "line": "Line"}
},
"summarize_modules": {
"key": "module_summaries",
"title": AVAILABLE_ANALYSES["summarize_modules"],
"fields": {"file": "File"}
},
"suggest_refactoring": {
"key": "refactoring_suggestions",
"title": AVAILABLE_ANALYSES["suggest_refactoring"],
"fields": {"file": "File", "line": "Line", "area": "Area"}
},
}
any_results_found = False
for analysis_key in requested_analyses:
if analysis_key in display_config:
config = display_config[analysis_key]
items = results_json.get(config["key"], [])
total_items = len(items)
st.subheader(f"{config['title']} ({total_items} found)")
if items:
any_results_found = True
state_key = f"visible_{analysis_key}"
if state_key not in st.session_state:
st.session_state[state_key] = RESULTS_PAGE_SIZE
visible_count = st.session_state[state_key]
items_to_display = items[:visible_count]
for item in items_to_display:
details = [
f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file'
else f"**{field_label}:** {item.get(field_key, 'N/A')}"
for field_key, field_label in config["fields"].items()
if item.get(field_key, 'N/A') != 'N/A'
]
st.markdown("- " + " - ".join(details))
if 'suggestion' in item:
st.code(item['suggestion'], language='text')
elif 'description' in item:
st.markdown(f" > {item['description']}")
elif 'summary' in item:
st.markdown(f" > {item['summary']}")
if total_items > visible_count:
if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"):
st.session_state[state_key] += RESULTS_PAGE_SIZE
st.rerun()
else:
st.markdown("_No items found for this category._")
st.divider()
if not any_results_found:
st.info("No specific findings were identified.")
st.download_button(
label="Download Full Report (JSON)",
data=json.dumps(results_json, indent=4),
file_name="code_audit_report.json",
mime="application/json"
)
# --- Streamlit App Main Interface ---
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
st.title("πŸ€– Codebase Audit & Documentation Assistant")
# --- Sidebar ---
with st.sidebar:
st.header("βš™οΈ Analysis Controls")
st.session_state.mock_api_call = st.toggle(
"πŸ§ͺ Enable Mock API Mode",
value=st.session_state.mock_api_call,
help="Use fake data instead of calling Gemini API."
)
st.divider()
st.header("β™Š Select Model")
# --- NEW: Dynamic Model Selection ---
if not st.session_state.mock_api_call:
# Get available models (uses cache)
st.session_state.available_models_dict = get_available_models()
model_display_names = list(st.session_state.available_models_dict.keys())
if model_display_names:
# Try to find the index of the previously selected model
current_model_display_name = None
if st.session_state.selected_model_name:
# Find display name matching the stored internal name
for disp_name, internal_name in st.session_state.available_models_dict.items():
if internal_name == st.session_state.selected_model_name:
current_model_display_name = disp_name
break
try:
selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0
except ValueError:
selected_index = 0 # Default to first if previous selection not found
selected_display_name = st.selectbox(
"Choose Gemini model:",
options=model_display_names,
index=selected_index,
key="model_selector",
help="Select the Gemini model to use for analysis."
)
# Update session state with the internal name based on selection
st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name)
st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})")
elif 'GEMINI_API_KEY' in st.secrets:
st.warning("No compatible models found or error listing models. Check API Key permissions.")
st.session_state.selected_model_name = None # Ensure no model selected
else:
st.warning("Add GEMINI_API_KEY to secrets to list models.")
st.session_state.selected_model_name = None
else: # Mock mode is active
st.info("Mock API Mode ACTIVE")
st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode
# --- End Dynamic Model Selection ---
st.divider()
st.header("πŸ”Ž Select Analyses")
selected_analyses = [
key for key, name in AVAILABLE_ANALYSES.items()
if st.checkbox(name, value=True, key=f"cb_{key}")
]
st.divider()
st.header("πŸ“„ How To Use")
st.info(
"1. Set API Key.\n"
"2. Toggle Mock Mode if needed.\n"
"3. Select Model (if not Mock).\n"
"4. Select analyses.\n"
"5. Upload ZIP.\n"
"6. Click 'Analyze'.\n"
"7. Review report."
)
st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
st.divider()
st.warning("⚠️ **Privacy:** Code sent to Google API if Mock Mode is OFF.")
# Update title dynamically based on selected model
if st.session_state.selected_model_name and not st.session_state.mock_api_call:
st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.")
elif st.session_state.mock_api_call:
st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).")
else:
st.markdown("Upload codebase (`.zip`) for analysis.")
# --- Main Content Area ---
uploaded_file = st.file_uploader(
"πŸ“ Upload Codebase ZIP File",
type=['zip'],
key="file_uploader",
on_change=lambda: st.session_state.update(
analysis_results=None,
error_message=None,
analysis_requested=False
)
)
analysis_button_placeholder = st.empty()
results_placeholder = st.container()
if uploaded_file:
st.success(f"βœ… File '{uploaded_file.name}' uploaded.")
uploaded_file_bytes = uploaded_file.getvalue()
file_id = f"{uploaded_file.name}-{uploaded_file.size}"
code_files, total_chars, file_count, ignored_files = process_zip_file_cached(
file_id, uploaded_file.size, uploaded_file_bytes
)
if code_files is not None:
st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}")
if ignored_files:
with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
st.code("\n".join(ignored_files), language='text')
# Disable button if no model selected (and not in mock mode)
model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call
analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready)
analyze_button_label = "Analyze Codebase"
if not model_ready:
analyze_button_label = "Select Model First"
elif analyze_button_disabled:
analyze_button_label = "Select Analyses or Upload Valid Code"
if analysis_button_placeholder.button(
analyze_button_label,
type="primary",
disabled=analyze_button_disabled
):
st.session_state.analysis_requested = True
st.session_state.analysis_results = None
st.session_state.error_message = None
if not selected_analyses:
st.warning("Please select analysis types.")
elif file_count == 0:
st.warning("No relevant code files found.")
elif not model_ready:
st.warning("Please select a Gemini model from the sidebar.")
else:
with results_placeholder:
spinner_model_name = (
st.session_state.selected_model_name
if not st.session_state.mock_api_call
else "Mock Mode"
)
spinner_msg = f"πŸš€ Preparing prompt & contacting AI ({spinner_model_name})... Please wait."
with st.spinner(spinner_msg):
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
if analysis_prompt and included_files_in_prompt:
results_json, error_msg = call_gemini_api(analysis_prompt)
st.session_state.analysis_results = results_json
st.session_state.error_message = error_msg
elif not included_files_in_prompt:
st.session_state.error_message = "Could not proceed: No files included."
else:
st.session_state.error_message = "Failed to generate analysis prompt."
st.rerun()
# Display results (Keep the same logic)
if st.session_state.analysis_requested:
with results_placeholder:
st.divider()
if st.session_state.error_message:
st.error(f"Analysis Failed: {st.session_state.error_message}")
if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results:
st.subheader("Raw AI Response")
st.code(st.session_state.analysis_results["raw_response"], language='text')
elif st.session_state.analysis_results:
display_results(st.session_state.analysis_results, selected_analyses)
else:
st.info("Analysis initiated, but no results/errors stored.")
elif not uploaded_file:
results_placeholder.info("Upload a ZIP file to begin.")
results_placeholder.divider()
results_placeholder.markdown("_Assistant powered by Google Gemini._")