Spaces:
Sleeping
Sleeping
import streamlit as st | |
import google.generativeai as genai | |
import zipfile | |
import io | |
import json | |
import os # Still needed for API key potentially, but not model names | |
from pathlib import Path | |
import time | |
# --- Configuration --- | |
# Model names are now discovered dynamically. Remove hardcoded names. | |
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Keep this estimate | |
RESULTS_PAGE_SIZE = 25 | |
AVAILABLE_ANALYSES = { # Keep analyses config | |
"generate_docs": "Generate Missing Docstrings/Comments", | |
"find_bugs": "Identify Potential Bugs & Anti-patterns", | |
"check_style": "Check Style Guide Compliance (General)", | |
"summarize_modules": "Summarize Complex Modules/Files", | |
"suggest_refactoring": "Suggest Refactoring Opportunities", | |
} | |
CODE_EXTENSIONS = { | |
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go', '.rb', | |
'.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql' | |
} # Keep extensions | |
# --- Session State Initialization --- | |
# (Keep most session state, add one for the selected model) | |
if 'mock_api_call' not in st.session_state: | |
st.session_state.mock_api_call = False | |
if 'analysis_results' not in st.session_state: | |
st.session_state.analysis_results = None | |
if 'error_message' not in st.session_state: | |
st.session_state.error_message = None | |
if 'analysis_requested' not in st.session_state: | |
st.session_state.analysis_requested = False | |
if 'selected_model_name' not in st.session_state: | |
st.session_state.selected_model_name = None # Will hold the "models/..." name | |
if 'available_models_dict' not in st.session_state: | |
st.session_state.available_models_dict = {} # Store display_name -> name mapping | |
# --- Gemini API Setup & Model Discovery --- | |
model = None # Global variable for the initialized model instance | |
# --- NEW: Function to list available models --- | |
# Cache model list for an hour | |
def get_available_models(): | |
"""Lists models supporting 'generateContent' using the API key.""" | |
model_dict = {} | |
try: | |
if 'GEMINI_API_KEY' not in st.secrets: | |
# Don't stop here, let the main part handle it, but return empty | |
print("API key not found in secrets during model listing attempt.") | |
return {} | |
# Configure API key temporarily just for listing | |
genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) | |
print("Listing available models via API...") | |
for m in genai.list_models(): | |
# Check if the model supports the 'generateContent' method | |
if 'generateContent' in m.supported_generation_methods: | |
# Store mapping: user-friendly name -> internal name | |
model_dict[m.display_name] = m.name | |
print(f"Found {len(model_dict)} compatible models.") | |
return model_dict | |
except Exception as e: | |
st.error(f"π¨ Error listing available models: {e}") | |
return {} # Return empty on error | |
def initialize_gemini_model(): | |
"""Initializes the Gemini model based on the selected name.""" | |
global model | |
selected_name = st.session_state.get('selected_model_name') | |
if selected_name and model is None and not st.session_state.mock_api_call: | |
try: | |
if 'GEMINI_API_KEY' not in st.secrets: | |
st.error("π¨ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.") | |
st.stop() # Stop if key missing for initialization | |
# Configure API key (might be redundant if list_models worked, but safe) | |
genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) | |
print(f"Initializing Gemini Model: {selected_name}") | |
# Use the selected model name from session state | |
model = genai.GenerativeModel(model_name=selected_name) | |
print(f"Gemini Model Initialized ({selected_name}).") | |
return True | |
except Exception as e: | |
st.error(f"π¨ Error initializing selected Gemini model '{selected_name}': {e}") | |
st.session_state.selected_model_name = None # Reset selection on error | |
st.stop() | |
return False | |
elif st.session_state.mock_api_call: | |
return True # No init needed for mock mode | |
elif model is not None and model.model_name == selected_name: | |
return True # Already initialized with the correct model | |
elif model is not None and model.model_name != selected_name: | |
print("Model changed. Re-initializing...") | |
model = None # Reset model instance | |
return initialize_gemini_model() # Recurse to re-initialize with new name | |
elif not selected_name and not st.session_state.mock_api_call: | |
# This case happens if no model is selected yet | |
return False # Cannot initialize without a selection | |
return False # Default case | |
# --- Helper Functions --- | |
# Updated estimate_token_count to support integers and strings | |
def estimate_token_count(text): | |
""" | |
Estimates the token count. | |
If a string is provided, it calculates based on its length. | |
If an integer is provided (e.g., total character count), it uses that directly. | |
""" | |
if isinstance(text, int): | |
return text // 3 | |
return len(text) // 3 | |
def process_zip_file_cached(file_id, file_size, file_content_bytes): | |
""" | |
Processes a ZIP file and extracts code files. | |
Returns a tuple of (code_files dict, total_chars, file_count, ignored_files list). | |
""" | |
code_files = {} | |
total_chars = 0 | |
file_count = 0 | |
ignored_files = [] | |
status_placeholder = st.empty() | |
progress_bar = status_placeholder.progress(0) | |
try: | |
with zipfile.ZipFile(io.BytesIO(file_content_bytes), 'r') as zip_ref: | |
members = zip_ref.infolist() | |
total_members = len(members) | |
for i, member in enumerate(members): | |
if i % 10 == 0: | |
progress_bar.progress(int((i / total_members) * 100)) | |
if member.is_dir() or any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename: | |
continue | |
file_path = Path(member.filename) | |
if file_path.suffix.lower() in CODE_EXTENSIONS: | |
try: | |
with zip_ref.open(member) as file: | |
file_bytes = file.read() | |
try: | |
content = file_bytes.decode('utf-8') | |
except UnicodeDecodeError: | |
try: | |
content = file_bytes.decode('latin-1') | |
except Exception as decode_err: | |
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})") | |
continue | |
code_files[member.filename] = content | |
total_chars += len(content) | |
file_count += 1 | |
except Exception as read_err: | |
ignored_files.append(f"{member.filename} (Read Error: {read_err})") | |
else: | |
if not (any(p.startswith('.') for p in Path(member.filename).parts) or '__' in member.filename): | |
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})") | |
progress_bar.progress(100) | |
status_placeholder.empty() | |
except zipfile.BadZipFile: | |
status_placeholder.empty() | |
st.error("π¨ Invalid ZIP.") | |
return None, 0, 0, [] | |
except Exception as e: | |
status_placeholder.empty() | |
st.error(f"π¨ ZIP Error: {e}") | |
return None, 0, 0, [] | |
if file_count == 0: | |
if not ignored_files: | |
st.warning("No code files found.") | |
else: | |
st.warning("No code files found; some skipped.") | |
return code_files, total_chars, file_count, ignored_files | |
def construct_analysis_prompt(code_files_dict, requested_analyses): | |
""" | |
Constructs the prompt for analysis by including code files and JSON structure for expected output. | |
Returns the full prompt and a list of included files. | |
""" | |
prompt_parts = ["Analyze the following codebase...\n\n"] | |
current_token_estimate = estimate_token_count(prompt_parts[0]) | |
included_files = [] | |
code_segments = [] | |
prompt_status = st.empty() | |
if len(code_files_dict) > 50: | |
prompt_status.info("Constructing prompt...") | |
for filename, content in code_files_dict.items(): | |
segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n" | |
segment_token_estimate = estimate_token_count(segment) | |
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE: | |
code_segments.append(segment) | |
current_token_estimate += segment_token_estimate | |
included_files.append(filename) | |
else: | |
st.warning(f"β οΈ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).") | |
break | |
prompt_status.empty() | |
if not included_files: | |
st.error("π¨ No code files included in prompt.") | |
return None, [] | |
prompt_parts.append("".join(code_segments)) | |
json_structure_description = "{\n" | |
structure_parts = [] | |
if "generate_docs" in requested_analyses: | |
structure_parts.append(' "documentation_suggestions": [...]') | |
if "find_bugs" in requested_analyses: | |
structure_parts.append(' "potential_bugs": [...]') | |
if "check_style" in requested_analyses: | |
structure_parts.append(' "style_issues": [...]') | |
if "summarize_modules" in requested_analyses: | |
structure_parts.append(' "module_summaries": [...]') | |
if "suggest_refactoring" in requested_analyses: | |
structure_parts.append(' "refactoring_suggestions": [...]') | |
json_structure_description += ",\n".join(structure_parts) + "\n}" | |
prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n" | |
prompt_parts.append(prompt_footer) | |
full_prompt = "".join(prompt_parts) | |
return full_prompt, included_files | |
def call_gemini_api(prompt): | |
""" | |
Calls the Gemini API using the provided prompt. | |
Returns the parsed JSON insights or an error message. | |
""" | |
if not prompt: | |
return None, "Prompt generation failed." | |
# MOCK MODE | |
if st.session_state.mock_api_call: | |
st.info(" MOCK MODE: Simulating API call...") | |
time.sleep(1) | |
mock_json_response = json.dumps({ | |
"documentation_suggestions": [], | |
"potential_bugs": [], | |
"style_issues": [], | |
"module_summaries": [], | |
"refactoring_suggestions": [] | |
}) | |
st.success("Mock response generated.") | |
return json.loads(mock_json_response), None | |
# REAL API CALL | |
else: | |
if not initialize_gemini_model(): | |
return None, "Gemini Model Initialization Failed." | |
if model is None: | |
return None, "Gemini model not selected or available." # Added check | |
try: | |
api_status = st.empty() | |
api_status.info(f"π‘ Sending request to {model.model_name} (Est. prompt tokens: {estimate_token_count(prompt):,})... Please wait.") | |
start_time = time.time() | |
response = model.generate_content( | |
prompt, | |
generation_config=genai.types.GenerationConfig(temperature=0.2), | |
safety_settings=[ | |
{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} | |
for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", | |
"HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"] | |
] | |
) | |
end_time = time.time() | |
api_status.success(f"β Response received from AI ({model.model_name}) in {end_time - start_time:.2f}s.") | |
time.sleep(1) | |
api_status.empty() | |
try: | |
json_response_text = response.text.strip() | |
# Remove markdown code fences if present | |
if json_response_text.startswith("```json"): | |
json_response_text = json_response_text[7:] | |
if json_response_text.startswith("```"): | |
json_response_text = json_response_text[3:] | |
if json_response_text.endswith("```"): | |
json_response_text = json_response_text[:-3] | |
json_start = json_response_text.find('{') | |
json_end = json_response_text.rfind('}') + 1 | |
if json_start != -1 and json_end != -1 and json_end > json_start: | |
final_json_text = json_response_text[json_start:json_end] | |
insights = json.loads(final_json_text) | |
return insights, None | |
else: | |
st.warning("β οΈ Could not find valid JSON object.") | |
return {"raw_response": response.text}, "AI response did not contain clear JSON object." | |
except json.JSONDecodeError as json_err: | |
st.error(f"π¨ Error parsing JSON: {json_err}") | |
st.code(response.text, language='text') | |
return None, f"AI response not valid JSON: {json_err}" | |
except AttributeError: | |
st.error("π¨ Unexpected API response structure (AttributeError).") | |
st.code(f"Response object: {response}", language='text') | |
return None, "Unexpected response structure (AttributeError)." | |
except Exception as e: | |
st.error(f"π¨ Unexpected issue processing response: {e}") | |
try: | |
st.code(f"Response object: {response}", language='text') | |
except Exception: | |
pass | |
return None, f"Unexpected response structure: {e}" | |
except Exception as e: | |
api_status.empty() | |
st.error(f"π¨ API call error: {e}") | |
error_msg = f"API call failed: {e}" | |
if hasattr(e, 'message'): | |
if "429" in e.message: | |
error_msg = "API Quota Exceeded or Rate Limit hit." | |
elif "API key not valid" in e.message: | |
error_msg = "Invalid Gemini API Key." | |
elif "permission denied" in e.message.lower(): | |
error_msg = f"Permission Denied for model '{st.session_state.selected_model_name}'. Check API key access." | |
elif "blocked" in e.message.lower(): | |
error_msg = "Content blocked due to safety settings." | |
elif "block_reason: SAFETY" in str(e): | |
error_msg = "Content blocked due to safety settings." | |
return None, error_msg | |
def display_results(results_json, requested_analyses): | |
""" | |
Displays the analysis results with pagination and allows JSON download. | |
""" | |
st.header("π Analysis Report") | |
if not isinstance(results_json, dict): | |
st.error("Invalid results format.") | |
st.json(results_json) | |
return | |
if "raw_response" in results_json: | |
st.subheader("Raw AI Response (JSON Parsing Failed)") | |
st.code(results_json["raw_response"], language='text') | |
return | |
display_config = { | |
"generate_docs": { | |
"key": "documentation_suggestions", | |
"title": AVAILABLE_ANALYSES["generate_docs"], | |
"fields": {"file": "File", "line": "Line"} | |
}, | |
"find_bugs": { | |
"key": "potential_bugs", | |
"title": AVAILABLE_ANALYSES["find_bugs"], | |
"fields": {"file": "File", "line": "Line", "severity": "Severity"} | |
}, | |
"check_style": { | |
"key": "style_issues", | |
"title": AVAILABLE_ANALYSES["check_style"], | |
"fields": {"file": "File", "line": "Line"} | |
}, | |
"summarize_modules": { | |
"key": "module_summaries", | |
"title": AVAILABLE_ANALYSES["summarize_modules"], | |
"fields": {"file": "File"} | |
}, | |
"suggest_refactoring": { | |
"key": "refactoring_suggestions", | |
"title": AVAILABLE_ANALYSES["suggest_refactoring"], | |
"fields": {"file": "File", "line": "Line", "area": "Area"} | |
}, | |
} | |
any_results_found = False | |
for analysis_key in requested_analyses: | |
if analysis_key in display_config: | |
config = display_config[analysis_key] | |
items = results_json.get(config["key"], []) | |
total_items = len(items) | |
st.subheader(f"{config['title']} ({total_items} found)") | |
if items: | |
any_results_found = True | |
state_key = f"visible_{analysis_key}" | |
if state_key not in st.session_state: | |
st.session_state[state_key] = RESULTS_PAGE_SIZE | |
visible_count = st.session_state[state_key] | |
items_to_display = items[:visible_count] | |
for item in items_to_display: | |
details = [ | |
f"**{field_label}:** `{item.get(field_key, 'N/A')}`" if field_key == 'file' | |
else f"**{field_label}:** {item.get(field_key, 'N/A')}" | |
for field_key, field_label in config["fields"].items() | |
if item.get(field_key, 'N/A') != 'N/A' | |
] | |
st.markdown("- " + " - ".join(details)) | |
if 'suggestion' in item: | |
st.code(item['suggestion'], language='text') | |
elif 'description' in item: | |
st.markdown(f" > {item['description']}") | |
elif 'summary' in item: | |
st.markdown(f" > {item['summary']}") | |
if total_items > visible_count: | |
if st.button(f"Show more ({total_items - visible_count} remaining)", key=f"more_{analysis_key}"): | |
st.session_state[state_key] += RESULTS_PAGE_SIZE | |
st.rerun() | |
else: | |
st.markdown("_No items found for this category._") | |
st.divider() | |
if not any_results_found: | |
st.info("No specific findings were identified.") | |
st.download_button( | |
label="Download Full Report (JSON)", | |
data=json.dumps(results_json, indent=4), | |
file_name="code_audit_report.json", | |
mime="application/json" | |
) | |
# --- Streamlit App Main Interface --- | |
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide") | |
st.title("π€ Codebase Audit & Documentation Assistant") | |
# --- Sidebar --- | |
with st.sidebar: | |
st.header("βοΈ Analysis Controls") | |
st.session_state.mock_api_call = st.toggle( | |
"π§ͺ Enable Mock API Mode", | |
value=st.session_state.mock_api_call, | |
help="Use fake data instead of calling Gemini API." | |
) | |
st.divider() | |
st.header("β Select Model") | |
# --- NEW: Dynamic Model Selection --- | |
if not st.session_state.mock_api_call: | |
# Get available models (uses cache) | |
st.session_state.available_models_dict = get_available_models() | |
model_display_names = list(st.session_state.available_models_dict.keys()) | |
if model_display_names: | |
# Try to find the index of the previously selected model | |
current_model_display_name = None | |
if st.session_state.selected_model_name: | |
# Find display name matching the stored internal name | |
for disp_name, internal_name in st.session_state.available_models_dict.items(): | |
if internal_name == st.session_state.selected_model_name: | |
current_model_display_name = disp_name | |
break | |
try: | |
selected_index = model_display_names.index(current_model_display_name) if current_model_display_name in model_display_names else 0 | |
except ValueError: | |
selected_index = 0 # Default to first if previous selection not found | |
selected_display_name = st.selectbox( | |
"Choose Gemini model:", | |
options=model_display_names, | |
index=selected_index, | |
key="model_selector", | |
help="Select the Gemini model to use for analysis." | |
) | |
# Update session state with the internal name based on selection | |
st.session_state.selected_model_name = st.session_state.available_models_dict.get(selected_display_name) | |
st.info(f"Using REAL Gemini API ({st.session_state.selected_model_name})") | |
elif 'GEMINI_API_KEY' in st.secrets: | |
st.warning("No compatible models found or error listing models. Check API Key permissions.") | |
st.session_state.selected_model_name = None # Ensure no model selected | |
else: | |
st.warning("Add GEMINI_API_KEY to secrets to list models.") | |
st.session_state.selected_model_name = None | |
else: # Mock mode is active | |
st.info("Mock API Mode ACTIVE") | |
st.session_state.selected_model_name = "mock_model" # Use a placeholder name for mock mode | |
# --- End Dynamic Model Selection --- | |
st.divider() | |
st.header("π Select Analyses") | |
selected_analyses = [ | |
key for key, name in AVAILABLE_ANALYSES.items() | |
if st.checkbox(name, value=True, key=f"cb_{key}") | |
] | |
st.divider() | |
st.header("π How To Use") | |
st.info( | |
"1. Set API Key.\n" | |
"2. Toggle Mock Mode if needed.\n" | |
"3. Select Model (if not Mock).\n" | |
"4. Select analyses.\n" | |
"5. Upload ZIP.\n" | |
"6. Click 'Analyze'.\n" | |
"7. Review report." | |
) | |
st.info(f"Note: Limited by token estimates (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).") | |
st.divider() | |
st.warning("β οΈ **Privacy:** Code sent to Google API if Mock Mode is OFF.") | |
# Update title dynamically based on selected model | |
if st.session_state.selected_model_name and not st.session_state.mock_api_call: | |
st.markdown(f"Upload codebase (`.zip`) for analysis via **{st.session_state.selected_model_name}**.") | |
elif st.session_state.mock_api_call: | |
st.markdown("Upload codebase (`.zip`) for analysis (Using **Mock Data**).") | |
else: | |
st.markdown("Upload codebase (`.zip`) for analysis.") | |
# --- Main Content Area --- | |
uploaded_file = st.file_uploader( | |
"π Upload Codebase ZIP File", | |
type=['zip'], | |
key="file_uploader", | |
on_change=lambda: st.session_state.update( | |
analysis_results=None, | |
error_message=None, | |
analysis_requested=False | |
) | |
) | |
analysis_button_placeholder = st.empty() | |
results_placeholder = st.container() | |
if uploaded_file: | |
st.success(f"β File '{uploaded_file.name}' uploaded.") | |
uploaded_file_bytes = uploaded_file.getvalue() | |
file_id = f"{uploaded_file.name}-{uploaded_file.size}" | |
code_files, total_chars, file_count, ignored_files = process_zip_file_cached( | |
file_id, uploaded_file.size, uploaded_file_bytes | |
) | |
if code_files is not None: | |
st.info(f"Found **{file_count}** code files ({total_chars:,} chars). Est. tokens: ~{estimate_token_count(total_chars):,}") | |
if ignored_files: | |
with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"): | |
st.code("\n".join(ignored_files), language='text') | |
# Disable button if no model selected (and not in mock mode) | |
model_ready = bool(st.session_state.selected_model_name) or st.session_state.mock_api_call | |
analyze_button_disabled = (not selected_analyses or file_count == 0 or not model_ready) | |
analyze_button_label = "Analyze Codebase" | |
if not model_ready: | |
analyze_button_label = "Select Model First" | |
elif analyze_button_disabled: | |
analyze_button_label = "Select Analyses or Upload Valid Code" | |
if analysis_button_placeholder.button( | |
analyze_button_label, | |
type="primary", | |
disabled=analyze_button_disabled | |
): | |
st.session_state.analysis_requested = True | |
st.session_state.analysis_results = None | |
st.session_state.error_message = None | |
if not selected_analyses: | |
st.warning("Please select analysis types.") | |
elif file_count == 0: | |
st.warning("No relevant code files found.") | |
elif not model_ready: | |
st.warning("Please select a Gemini model from the sidebar.") | |
else: | |
with results_placeholder: | |
spinner_model_name = ( | |
st.session_state.selected_model_name | |
if not st.session_state.mock_api_call | |
else "Mock Mode" | |
) | |
spinner_msg = f"π Preparing prompt & contacting AI ({spinner_model_name})... Please wait." | |
with st.spinner(spinner_msg): | |
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses) | |
if analysis_prompt and included_files_in_prompt: | |
results_json, error_msg = call_gemini_api(analysis_prompt) | |
st.session_state.analysis_results = results_json | |
st.session_state.error_message = error_msg | |
elif not included_files_in_prompt: | |
st.session_state.error_message = "Could not proceed: No files included." | |
else: | |
st.session_state.error_message = "Failed to generate analysis prompt." | |
st.rerun() | |
# Display results (Keep the same logic) | |
if st.session_state.analysis_requested: | |
with results_placeholder: | |
st.divider() | |
if st.session_state.error_message: | |
st.error(f"Analysis Failed: {st.session_state.error_message}") | |
if isinstance(st.session_state.analysis_results, dict) and "raw_response" in st.session_state.analysis_results: | |
st.subheader("Raw AI Response") | |
st.code(st.session_state.analysis_results["raw_response"], language='text') | |
elif st.session_state.analysis_results: | |
display_results(st.session_state.analysis_results, selected_analyses) | |
else: | |
st.info("Analysis initiated, but no results/errors stored.") | |
elif not uploaded_file: | |
results_placeholder.info("Upload a ZIP file to begin.") | |
results_placeholder.divider() | |
results_placeholder.markdown("_Assistant powered by Google Gemini._") | |