Spaces:
Sleeping
Sleeping
File size: 22,008 Bytes
df481b9 f025bf0 bc80edf f025bf0 5477235 f025bf0 5477235 f025bf0 5477235 f025bf0 3a80282 5477235 3a80282 f025bf0 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 f025bf0 5477235 3a80282 bc80edf f025bf0 5477235 f025bf0 bc80edf f025bf0 5477235 3a80282 f025bf0 5477235 f025bf0 5477235 f025bf0 3a80282 5477235 3a80282 f025bf0 bc80edf f025bf0 5477235 f025bf0 3a80282 f025bf0 3a80282 f025bf0 5477235 f025bf0 3a80282 f025bf0 3a80282 f025bf0 3a80282 f025bf0 3a80282 f025bf0 3a80282 f025bf0 3a80282 f025bf0 5477235 f025bf0 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 f025bf0 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 f025bf0 3a80282 5477235 3a80282 f025bf0 5477235 f025bf0 3a80282 f025bf0 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 f025bf0 3a80282 f025bf0 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 f025bf0 5477235 f025bf0 3a80282 5477235 3a80282 10922c3 f025bf0 3a80282 5477235 df481b9 f025bf0 3a80282 f025bf0 3a80282 f025bf0 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 5477235 3a80282 bc80edf 3a80282 f025bf0 df481b9 f025bf0 5477235 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 |
import streamlit as st
import google.generativeai as genai
import zipfile
import io
import json
import os
from pathlib import Path
import time # Added for simulating mock delay
# --- Configuration ---
GEMINI_MODEL_NAME = "gemini-2.5-pro-preview-03-25"
MAX_PROMPT_TOKENS_ESTIMATE = 800000 # Adjust as needed
AVAILABLE_ANALYSES = {
"generate_docs": "Generate Missing Docstrings/Comments",
"find_bugs": "Identify Potential Bugs & Anti-patterns",
"check_style": "Check Style Guide Compliance (General)",
"summarize_modules": "Summarize Complex Modules/Files",
"suggest_refactoring": "Suggest Refactoring Opportunities"
}
CODE_EXTENSIONS = {
'.py', '.js', '.java', '.c', '.cpp', '.h', '.cs', '.go',
'.rb', '.php', '.swift', '.kt', '.ts', '.html', '.css', '.scss', '.sql'
}
# --- Session State Initialization ---
if 'mock_api_call' not in st.session_state:
st.session_state.mock_api_call = False # Default to using the real API
# --- Gemini API Setup ---
model = None
def initialize_gemini_model():
"""Initializes the Gemini model if not in mock mode."""
global model
if model is None and not st.session_state.mock_api_call:
try:
if 'GEMINI_API_KEY' not in st.secrets:
st.error("π¨ Gemini API Key not found. Add it to `.streamlit/secrets.toml`.")
st.stop()
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
model = genai.GenerativeModel(GEMINI_MODEL_NAME)
print("Gemini Model Initialized.")
return True
except Exception as e:
st.error(f"π¨ Error initializing Gemini SDK: {e}")
st.stop()
return False
elif st.session_state.mock_api_call:
print("Running in Mock Mode. Skipping Gemini initialization.")
return True # Allow proceeding in mock mode
elif model is not None:
print("Gemini Model already initialized.")
return True
return False
# --- Helper Functions ---
def estimate_token_count(text):
"""Roughly estimate token count (assumes ~3-4 characters per token)."""
return len(text) // 3
def process_zip_file(uploaded_file):
"""
Extracts code files and their content from the uploaded ZIP file.
Returns:
code_files (dict): Mapping of file paths to content.
total_chars (int): Total number of characters in included files.
file_count (int): Count of processed code files.
ignored_files (list): List of files skipped or not processed.
"""
code_files = {}
total_chars = 0
file_count = 0
ignored_files = []
try:
with zipfile.ZipFile(io.BytesIO(uploaded_file.getvalue()), 'r') as zip_ref:
for member in zip_ref.infolist():
# Skip directories, hidden files, and files with '__' in the name
if member.is_dir() or any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename:
continue
file_path = Path(member.filename)
if file_path.suffix.lower() in CODE_EXTENSIONS:
try:
with zip_ref.open(member) as file:
try:
content = file.read().decode('utf-8')
except UnicodeDecodeError:
try:
content = file.read().decode('latin-1')
except Exception as decode_err:
ignored_files.append(f"{member.filename} (Decode Error: {decode_err})")
continue
code_files[member.filename] = content
total_chars += len(content)
file_count += 1
except Exception as read_err:
ignored_files.append(f"{member.filename} (Read Error: {read_err})")
else:
# Only add to ignored if it's not explicitly ignored by path rules above
if not (any(part.startswith('.') for part in Path(member.filename).parts) or '__' in member.filename):
ignored_files.append(f"{member.filename} (Skipped Extension: {file_path.suffix})")
except zipfile.BadZipFile:
st.error("π¨ Invalid or corrupted ZIP file.")
return None, 0, 0, []
except Exception as e:
st.error(f"π¨ Error processing ZIP file: {e}")
return None, 0, 0, []
return code_files, total_chars, file_count, ignored_files
def construct_analysis_prompt(code_files_dict, requested_analyses):
"""
Constructs the prompt for Gemini, including code content and a JSON structure request.
Returns:
full_prompt (str): The complete prompt.
included_files (list): List of file names included in the prompt.
"""
prompt_content = "Analyze the following codebase provided as a collection of file paths and their content.\n\n"
current_token_estimate = estimate_token_count(prompt_content)
included_files = []
concatenated_code = ""
for filename, content in code_files_dict.items():
file_marker = f"--- START FILE: {filename} ---\n"
file_content = f"{content}\n"
file_end_marker = f"--- END FILE: {filename} ---\n\n"
segment = file_marker + file_content + file_end_marker
segment_token_estimate = estimate_token_count(segment)
if current_token_estimate + segment_token_estimate <= MAX_PROMPT_TOKENS_ESTIMATE:
concatenated_code += segment
current_token_estimate += segment_token_estimate
included_files.append(filename)
else:
st.warning(f"β οΈ Codebase may exceed context window estimate (~{MAX_PROMPT_TOKENS_ESTIMATE} tokens). Analysis performed only on the first {len(included_files)} files ({current_token_estimate} tokens).")
break
if not included_files:
st.error("π¨ No code files could be included within the estimated token limit.")
return None, []
prompt_content += concatenated_code
# Build the expected JSON structure dynamically based on the selected analyses
json_structure_description = "{\n"
structure_parts = []
if "generate_docs" in requested_analyses:
structure_parts.append(' "documentation_suggestions": [{"file": "path/to/file", "line": number, "suggestion": "Suggested docstring/comment"}]')
if "find_bugs" in requested_analyses:
structure_parts.append(' "potential_bugs": [{"file": "path/to/file", "line": number, "description": "Description of potential bug/anti-pattern", "severity": "High/Medium/Low"}]')
if "check_style" in requested_analyses:
structure_parts.append(' "style_issues": [{"file": "path/to/file", "line": number, "description": "Description of style deviation"}]')
if "summarize_modules" in requested_analyses:
structure_parts.append(' "module_summaries": [{"file": "path/to/file", "summary": "One-paragraph summary of the file purpose/functionality"}]')
if "suggest_refactoring" in requested_analyses:
structure_parts.append(' "refactoring_suggestions": [{"file": "path/to/file", "line": number, "area": "e.g., function name, class name", "suggestion": "Description of refactoring suggestion"}]')
json_structure_description += ",\n".join(structure_parts)
json_structure_description += "\n}"
prompt_footer = f"""
**Analysis Task:**
Perform the analyses corresponding to the keys present in the JSON structure below, based *only* on the provided code files ({', '.join(included_files)}).
**Output Format:**
Respond ONLY with a single, valid JSON object adhering strictly to the following structure. If no issues/suggestions are found for a category, provide an empty list `[]`. Do not include explanations outside the JSON structure.
{json_structure_description}
**JSON Output Only:**
"""
full_prompt = prompt_content + prompt_footer
return full_prompt, included_files
def call_gemini_api(prompt):
"""
Calls the Gemini API (or simulates it in mock mode) with the provided prompt.
Returns:
insights (dict): The parsed JSON response from the API.
error_message (str): An error message if something went wrong.
"""
if not prompt:
return None, "Prompt generation failed."
# --- MOCK MODE LOGIC ---
if st.session_state.mock_api_call:
st.info(" MOCK MODE: Simulating API call...")
time.sleep(2) # Simulate network/processing delay
# Simulated successful response
mock_json_response = json.dumps({
"documentation_suggestions": [{"file": "mock/core.py", "line": 15, "suggestion": "def process_data(data):\n \"\"\"Processes the input data using mock logic.\"\"\""}],
"potential_bugs": [{"file": "mock/utils.py", "line": 22, "description": "Potential division by zero if denominator is not checked.", "severity": "Medium"}],
"style_issues": [{"file": "mock/core.py", "line": 5, "description": "Variable 'varName' does not follow snake_case convention."}],
"module_summaries": [
{"file": "mock/core.py", "summary": "This file contains the core mock processing logic."},
{"file": "mock/utils.py", "summary": "Utility functions for mocking."}
],
"refactoring_suggestions": [{"file": "mock/utils.py", "line": 30, "area": "calculate_metrics function", "suggestion": "Function is too long (> 50 lines), consider breaking it down."}]
})
st.success("Mock response generated successfully.")
return json.loads(mock_json_response), None
# --- REAL API CALL LOGIC ---
else:
if not initialize_gemini_model():
return None, "Gemini Model Initialization Failed."
if model is None:
return None, "Gemini model not available."
try:
st.write(f"π‘ Sending request to {GEMINI_MODEL_NAME}...")
response = model.generate_content(
prompt,
generation_config=genai.types.GenerationConfig(temperature=0.2),
safety_settings=[
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]
)
st.write("β
Response received from AI.")
try:
json_response_text = response.text.strip()
# Remove potential markdown code block fences
if json_response_text.startswith("```json"):
json_response_text = json_response_text[7:]
if json_response_text.startswith("```"):
json_response_text = json_response_text[3:]
if json_response_text.endswith("```"):
json_response_text = json_response_text[:-3]
# Extract JSON object boundaries
json_start = json_response_text.find('{')
json_end = json_response_text.rfind('}') + 1
if json_start != -1 and json_end != -1 and json_end > json_start:
final_json_text = json_response_text[json_start:json_end]
insights = json.loads(final_json_text)
return insights, None
else:
st.warning("β οΈ Could not find valid JSON object boundaries ({...}) in response. Displaying raw text.")
return {"raw_response": response.text}, "AI response did not contain clear JSON object, showing raw text."
except json.JSONDecodeError as json_err:
st.error(f"π¨ Error parsing JSON response from AI: {json_err}")
st.error("Raw AI Response:")
st.code(response.text, language='text')
return None, f"AI response was not valid JSON: {json_err}"
except AttributeError:
st.error("π¨ Unexpected API response structure.")
st.code(f"Response object: {response}", language='text')
try:
block_reason = response.prompt_feedback.block_reason
if block_reason:
return None, f"Content blocked by API. Reason: {block_reason}"
except Exception:
pass
return None, "Unexpected response structure from API."
except Exception as e:
st.error(f"π¨ Unexpected issue processing AI response: {e}")
try:
st.code(f"Response object: {response}", language='text')
except Exception:
pass
return None, f"Unexpected response structure: {e}"
except Exception as e:
st.error(f"π¨ An error occurred during API call: {e}")
error_msg = f"API call failed: {e}"
if hasattr(e, 'message'):
if "429" in e.message:
error_msg = "API Quota Exceeded or Rate Limit hit. Check your Google Cloud/AI Studio dashboard."
elif "API key not valid" in e.message:
error_msg = "Invalid Gemini API Key. Please check `.streamlit/secrets.toml`."
elif "blocked" in e.message.lower():
error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
elif "block_reason: SAFETY" in str(e):
error_msg = "Content blocked due to safety settings. Review input code or adjust safety settings if appropriate."
return None, error_msg
def display_results(results_json, requested_analyses):
"""Renders the analysis results in the Streamlit interface."""
st.header("π Analysis Report")
if not isinstance(results_json, dict):
st.error("Invalid results format received.")
st.json(results_json)
return
if "raw_response" in results_json:
st.subheader("Raw AI Response (JSON Parsing Failed)")
st.code(results_json["raw_response"], language='text')
return
def display_list_items(items, fields):
if items:
for item in items:
details = []
for field_key, field_label in fields.items():
value = item.get(field_key, 'N/A')
if value != 'N/A':
details.append(f"**{field_label}:** {value}")
st.markdown("- " + " - ".join(details))
# Display multi-line outputs when applicable
if 'suggestion' in item:
st.code(item['suggestion'], language='text')
elif 'description' in item:
st.markdown(f" > {item['description']}")
elif 'summary' in item:
st.markdown(f" > {item['summary']}")
else:
st.markdown("_No items found for this category._")
st.divider()
display_config = {
"generate_docs": {
"key": "documentation_suggestions",
"title": AVAILABLE_ANALYSES["generate_docs"],
"fields": {"file": "File", "line": "Line"}
},
"find_bugs": {
"key": "potential_bugs",
"title": AVAILABLE_ANALYSES["find_bugs"],
"fields": {"file": "File", "line": "Line", "severity": "Severity"}
},
"check_style": {
"key": "style_issues",
"title": AVAILABLE_ANALYSES["check_style"],
"fields": {"file": "File", "line": "Line"}
},
"summarize_modules": {
"key": "module_summaries",
"title": AVAILABLE_ANALYSES["summarize_modules"],
"fields": {"file": "File"}
},
"suggest_refactoring": {
"key": "refactoring_suggestions",
"title": AVAILABLE_ANALYSES["suggest_refactoring"],
"fields": {"file": "File", "line": "Line", "area": "Area"}
}
}
any_results = False
for analysis_key in requested_analyses:
if analysis_key in display_config:
config = display_config[analysis_key]
st.subheader(config["title"])
items = results_json.get(config["key"], [])
display_list_items(items, config["fields"])
if items:
any_results = True
if not any_results:
st.info("No specific findings were identified in the analysis based on your selections.")
st.download_button(
label="Download Full Report (JSON)",
data=json.dumps(results_json, indent=4),
file_name="code_audit_report.json",
mime="application/json"
)
# --- Streamlit App Main Interface ---
st.set_page_config(page_title="Codebase Audit Assistant", layout="wide")
st.title("π€ Codebase Audit & Documentation Assistant")
st.markdown(f"Upload your codebase (`.zip`) for analysis using **{GEMINI_MODEL_NAME}**.")
# Sidebar controls
with st.sidebar:
st.header("βοΈ Analysis Controls")
st.session_state.mock_api_call = st.toggle(
"π§ͺ Enable Mock API Mode (for Testing)",
value=st.session_state.mock_api_call,
help="If enabled, uses fake data instead of calling the real Gemini API. Saves cost during testing."
)
if st.session_state.mock_api_call:
st.info("Mock API Mode ACTIVE")
else:
st.info("Using REAL Gemini API")
st.divider()
st.header("π Select Analyses")
selected_analyses = []
for key, name in AVAILABLE_ANALYSES.items():
if st.checkbox(name, value=True, key=f"cb_{key}"):
selected_analyses.append(key)
st.divider()
st.header("π How To Use")
st.info(
"1. Set API Key in `.streamlit/secrets.toml` (if not using Mock Mode).\n"
"2. Toggle Mock Mode if needed.\n"
"3. Select desired analyses.\n"
"4. Create a **ZIP archive** of your codebase.\n"
"5. Upload the `.zip` file.\n"
"6. Click 'Analyze Codebase'.\n"
"7. Review the report."
)
st.info(f"**Note:** Only files with common code extensions ({', '.join(CODE_EXTENSIONS)}) are processed. Analysis might be limited (~{MAX_PROMPT_TOKENS_ESTIMATE:,} est. tokens).")
st.divider()
st.warning("β οΈ **Privacy:** Code content is sent to the Google Gemini API if Mock Mode is OFF. Do not upload sensitive code if uncomfortable.")
# Main content area
uploaded_file = st.file_uploader("π Upload Codebase ZIP File", type=['zip'], key="file_uploader")
analysis_triggered = False
results_cache = None # To store results briefly
if uploaded_file:
st.success(f"β
File '{uploaded_file.name}' uploaded.")
with st.spinner("Inspecting ZIP file..."):
code_files, total_chars, file_count, ignored_files = process_zip_file(uploaded_file)
if code_files is not None:
st.info(f"Found **{file_count}** relevant code files ({total_chars:,} characters). Est. tokens: ~{estimate_token_count(total_chars):,}")
if ignored_files:
with st.expander(f"View {len(ignored_files)} Skipped/Ignored Files"):
st.code("\n".join(ignored_files), language='text')
analyze_button_disabled = (not selected_analyses or file_count == 0)
analyze_button_label = "Analyze Codebase" if not analyze_button_disabled else "Select Analyses or Upload Valid Code"
if st.button(analyze_button_label, type="primary", disabled=analyze_button_disabled):
analysis_triggered = True
if not selected_analyses:
st.warning("Please select at least one analysis type from the sidebar.")
elif file_count == 0:
st.warning("No relevant code files found in the ZIP archive to analyze.")
else:
st.divider()
with st.spinner(f"π Preparing prompt & contacting AI ({'Mock Mode' if st.session_state.mock_api_call else GEMINI_MODEL_NAME})... This may take time."):
analysis_prompt, included_files_in_prompt = construct_analysis_prompt(code_files, selected_analyses)
if analysis_prompt and included_files_in_prompt:
st.write(f"Analyzing {len(included_files_in_prompt)} files...")
results_json, error_message = call_gemini_api(analysis_prompt)
results_cache = (results_json, error_message)
elif not included_files_in_prompt:
results_cache = (None, "Could not proceed: No files included in prompt (check token limits/errors).")
else:
results_cache = (None, "Failed to generate analysis prompt.")
else:
# Error during ZIP processing (error already displayed)
pass
if analysis_triggered and results_cache:
results_json, error_message = results_cache
st.divider()
if error_message:
st.error(f"Analysis Failed: {error_message}")
if results_json and isinstance(results_json, dict) and "raw_response" in results_json:
st.subheader("Raw AI Response")
st.code(results_json["raw_response"], language='text')
elif results_json:
display_results(results_json, selected_analyses)
else:
st.error("Analysis did not return results or an unknown error occurred.")
elif not uploaded_file:
st.info("Upload a ZIP file containing your source code to begin.")
st.divider()
st.markdown("_Assistant powered by Google Gemini._")
|