Spaces:
Sleeping
Sleeping
import argparse | |
from pathlib import Path | |
import json | |
import logging | |
import sys | |
import time | |
from code_summarizer import ( | |
clone_repo, | |
summarize_repo, | |
upload_summary_to_firebase, | |
get_summaries_by_repo, | |
is_firestore_available | |
) | |
# Import device/model status separately if needed for logging | |
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED | |
# Basic logging config for the CLI app | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [CLI] %(message)s') | |
log = logging.getLogger(__name__) | |
REPO_CLONE_DIR = "cloned_repo_cli" | |
OUTPUT_DIR = Path("outputs") | |
OUTPUT_FILE = OUTPUT_DIR / "summaries.json" | |
def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True): | |
start_time = time.time() | |
log.info(f"Pipeline starting for: {repo_url}") | |
if not SUMMARIZER_LOADED: | |
log.error("Summarizer Model Not Loaded. Exiting.") | |
sys.exit(1) | |
firestore_ready = is_firestore_available() | |
if not firestore_ready: | |
log.warning("Firebase is not available. Uploads/Checks will be skipped.") | |
if skip_existing and firestore_ready: | |
log.info("Checking for existing summaries...") | |
if get_summaries_by_repo(repo_url): | |
log.warning("Skipping. Found existing summaries in Firebase.") | |
return | |
log.info("Cloning repository...") | |
clone_dir_path = Path(REPO_CLONE_DIR) | |
if not clone_repo(repo_url, str(clone_dir_path)): | |
log.error("Repo cloning failed. Exiting.") | |
sys.exit(1) | |
log.info(f"Running summarization (device: {summarizer_device})...") | |
summaries = summarize_repo(clone_dir_path, repo_url) | |
if not summaries: | |
log.warning("No functions found or summarization failed.") | |
return | |
log.info(f"Summarization complete. Found {len(summaries)} functions.") | |
if firestore_ready: | |
log.info(f"Uploading {len(summaries)} summaries to Firebase...") | |
upload_count = 0 | |
for i, summary in enumerate(summaries): | |
upload_summary_to_firebase(summary) | |
upload_count +=1 | |
if (i + 1) % 100 == 0: | |
log.info(f" Uploaded {i+1}/{len(summaries)}...") | |
log.info(f"Finished uploading {upload_count} summaries.") | |
else: | |
log.info("Skipping Firebase upload.") | |
if save_local: | |
log.info(f"Saving summaries locally to {OUTPUT_FILE}...") | |
try: | |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
with open(OUTPUT_FILE, "w", encoding='utf-8') as f: | |
json.dump(summaries, f, indent=2, default=str) | |
log.info(f"Saved local backup to {OUTPUT_FILE}") | |
except Exception as e: | |
log.error(f"Failed to save local backup: {e}", exc_info=True) | |
duration = time.time() - start_time | |
log.info(f"✅ Pipeline completed in {duration:.2f} seconds.") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Code Summarizer CLI", formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument("--url", required=True, help="HTTPS URL of the public GitHub repository.") | |
parser.add_argument("--skip_existing", action="store_true", help="Skip if repo already summarized in Firebase.") | |
parser.add_argument("--no_save", action="store_true", help="Disable saving local summaries.json.") | |
args = parser.parse_args() | |
run_pipeline( | |
repo_url=args.url, | |
skip_existing=args.skip_existing, | |
save_local=not args.no_save | |
) |