Spaces:
Sleeping
Sleeping
Commit
Β·
188e720
1
Parent(s):
4b6ece1
Add application code
Browse files- app.py +95 -0
- code_summarizer/New Text Document.txt +0 -0
- code_summarizer/__init__.py +29 -0
- code_summarizer/firebase_db.py +69 -0
- code_summarizer/language_parsers.py +76 -0
- code_summarizer/repo_downloader.py +28 -0
- code_summarizer/summarizer.py +95 -0
- interface.py +100 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from pathlib import Path
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
import sys
|
6 |
+
import time
|
7 |
+
|
8 |
+
from code_summarizer import (
|
9 |
+
clone_repo,
|
10 |
+
summarize_repo,
|
11 |
+
upload_summary_to_firebase,
|
12 |
+
get_summaries_by_repo,
|
13 |
+
is_firestore_available
|
14 |
+
)
|
15 |
+
# Import device/model status separately if needed for logging
|
16 |
+
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
|
17 |
+
|
18 |
+
# Basic logging config for the CLI app
|
19 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [CLI] %(message)s')
|
20 |
+
log = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
REPO_CLONE_DIR = "cloned_repo_cli"
|
23 |
+
OUTPUT_DIR = Path("outputs")
|
24 |
+
OUTPUT_FILE = OUTPUT_DIR / "summaries.json"
|
25 |
+
|
26 |
+
def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True):
|
27 |
+
start_time = time.time()
|
28 |
+
log.info(f"Pipeline starting for: {repo_url}")
|
29 |
+
|
30 |
+
if not SUMMARIZER_LOADED:
|
31 |
+
log.error("Summarizer Model Not Loaded. Exiting.")
|
32 |
+
sys.exit(1)
|
33 |
+
|
34 |
+
firestore_ready = is_firestore_available()
|
35 |
+
if not firestore_ready:
|
36 |
+
log.warning("Firebase is not available. Uploads/Checks will be skipped.")
|
37 |
+
|
38 |
+
if skip_existing and firestore_ready:
|
39 |
+
log.info("Checking for existing summaries...")
|
40 |
+
if get_summaries_by_repo(repo_url):
|
41 |
+
log.warning("Skipping. Found existing summaries in Firebase.")
|
42 |
+
return
|
43 |
+
|
44 |
+
log.info("Cloning repository...")
|
45 |
+
clone_dir_path = Path(REPO_CLONE_DIR)
|
46 |
+
if not clone_repo(repo_url, str(clone_dir_path)):
|
47 |
+
log.error("Repo cloning failed. Exiting.")
|
48 |
+
sys.exit(1)
|
49 |
+
|
50 |
+
log.info(f"Running summarization (device: {summarizer_device})...")
|
51 |
+
summaries = summarize_repo(clone_dir_path, repo_url)
|
52 |
+
if not summaries:
|
53 |
+
log.warning("No functions found or summarization failed.")
|
54 |
+
return
|
55 |
+
|
56 |
+
log.info(f"Summarization complete. Found {len(summaries)} functions.")
|
57 |
+
|
58 |
+
if firestore_ready:
|
59 |
+
log.info(f"Uploading {len(summaries)} summaries to Firebase...")
|
60 |
+
upload_count = 0
|
61 |
+
for i, summary in enumerate(summaries):
|
62 |
+
upload_summary_to_firebase(summary)
|
63 |
+
upload_count +=1
|
64 |
+
if (i + 1) % 100 == 0:
|
65 |
+
log.info(f" Uploaded {i+1}/{len(summaries)}...")
|
66 |
+
log.info(f"Finished uploading {upload_count} summaries.")
|
67 |
+
else:
|
68 |
+
log.info("Skipping Firebase upload.")
|
69 |
+
|
70 |
+
if save_local:
|
71 |
+
log.info(f"Saving summaries locally to {OUTPUT_FILE}...")
|
72 |
+
try:
|
73 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
74 |
+
with open(OUTPUT_FILE, "w", encoding='utf-8') as f:
|
75 |
+
json.dump(summaries, f, indent=2, default=str)
|
76 |
+
log.info(f"Saved local backup to {OUTPUT_FILE}")
|
77 |
+
except Exception as e:
|
78 |
+
log.error(f"Failed to save local backup: {e}", exc_info=True)
|
79 |
+
|
80 |
+
duration = time.time() - start_time
|
81 |
+
log.info(f"β
Pipeline completed in {duration:.2f} seconds.")
|
82 |
+
|
83 |
+
|
84 |
+
if __name__ == "__main__":
|
85 |
+
parser = argparse.ArgumentParser(description="Code Summarizer CLI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
86 |
+
parser.add_argument("--url", required=True, help="HTTPS URL of the public GitHub repository.")
|
87 |
+
parser.add_argument("--skip_existing", action="store_true", help="Skip if repo already summarized in Firebase.")
|
88 |
+
parser.add_argument("--no_save", action="store_true", help="Disable saving local summaries.json.")
|
89 |
+
args = parser.parse_args()
|
90 |
+
|
91 |
+
run_pipeline(
|
92 |
+
repo_url=args.url,
|
93 |
+
skip_existing=args.skip_existing,
|
94 |
+
save_local=not args.no_save
|
95 |
+
)
|
code_summarizer/New Text Document.txt
ADDED
File without changes
|
code_summarizer/__init__.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
|
4 |
+
log = logging.getLogger(__name__)
|
5 |
+
log.addHandler(logging.NullHandler())
|
6 |
+
|
7 |
+
from .repo_downloader import clone_repo
|
8 |
+
from .language_parsers import extract_code_snippets, get_language_by_extension, SUPPORTED_EXTENSIONS
|
9 |
+
from .summarizer import summarize_repo, summarize_file, get_embedding, generate_summary
|
10 |
+
from .firebase_db import upload_summary_to_firebase, get_summaries_by_repo, is_firestore_available
|
11 |
+
|
12 |
+
VERSION = "0.1.0"
|
13 |
+
|
14 |
+
__all__ = [
|
15 |
+
"clone_repo",
|
16 |
+
"extract_code_snippets",
|
17 |
+
"get_language_by_extension",
|
18 |
+
"SUPPORTED_EXTENSIONS",
|
19 |
+
"summarize_repo",
|
20 |
+
"summarize_file",
|
21 |
+
"get_embedding",
|
22 |
+
"generate_summary",
|
23 |
+
"upload_summary_to_firebase",
|
24 |
+
"get_summaries_by_repo",
|
25 |
+
"is_firestore_available",
|
26 |
+
"VERSION"
|
27 |
+
]
|
28 |
+
|
29 |
+
log.info(f"Code Summarizer Package v{VERSION} initialized.")
|
code_summarizer/firebase_db.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import firebase_admin
|
2 |
+
from firebase_admin import credentials, firestore
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
from typing import List, Dict
|
6 |
+
|
7 |
+
log = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
FIRESTORE_INITIALIZED = False
|
10 |
+
db = None
|
11 |
+
|
12 |
+
firebase_secret_json = os.environ.get('FIREBASE_SERVICE_ACCOUNT_JSON')
|
13 |
+
|
14 |
+
if firebase_secret_json:
|
15 |
+
try:
|
16 |
+
import json
|
17 |
+
# Convert the JSON string from the env var into a dictionary
|
18 |
+
credentials_dict = json.loads(firebase_secret_json)
|
19 |
+
if not firebase_admin._apps:
|
20 |
+
cred = credentials.Certificate(credentials_dict)
|
21 |
+
firebase_admin.initialize_app(cred)
|
22 |
+
log.info("Firebase Admin SDK initialized from Secret.")
|
23 |
+
else:
|
24 |
+
log.info("Firebase Admin SDK already initialized.")
|
25 |
+
db = firestore.client()
|
26 |
+
FIRESTORE_INITIALIZED = True
|
27 |
+
except Exception as e:
|
28 |
+
log.error(f"Failed to initialize Firebase from Secret: {e}", exc_info=True)
|
29 |
+
else:
|
30 |
+
log.warning("Firebase Secret (FIREBASE_SERVICE_ACCOUNT_JSON) not found in environment. Firebase disabled.")
|
31 |
+
|
32 |
+
def is_firestore_available() -> bool:
|
33 |
+
return FIRESTORE_INITIALIZED and db is not None
|
34 |
+
|
35 |
+
def upload_summary_to_firebase(summary: Dict):
|
36 |
+
if not is_firestore_available():
|
37 |
+
log.debug("Firestore unavailable, skipping upload.")
|
38 |
+
return
|
39 |
+
|
40 |
+
required_keys = ['repo_url', 'file_path', 'language', 'function_code', 'summary']
|
41 |
+
if not all(key in summary for key in required_keys):
|
42 |
+
log.warning(f"Skipped upload: Missing required keys. Has: {list(summary.keys())}")
|
43 |
+
return
|
44 |
+
|
45 |
+
try:
|
46 |
+
if "embedding" in summary and not isinstance(summary["embedding"], list):
|
47 |
+
log.warning(f"Removing invalid non-list embedding before upload for {summary.get('file_path')}")
|
48 |
+
del summary["embedding"]
|
49 |
+
|
50 |
+
doc_ref = db.collection("functions").document()
|
51 |
+
doc_ref.set(summary)
|
52 |
+
log.debug(f"Uploaded summary for: {summary.get('file_path')}")
|
53 |
+
except Exception as e:
|
54 |
+
log.error(f"Error uploading summary for {summary.get('file_path')} to Firebase: {e}", exc_info=True)
|
55 |
+
|
56 |
+
def get_summaries_by_repo(repo_url: str) -> List[Dict]:
|
57 |
+
if not is_firestore_available():
|
58 |
+
log.warning("Firestore unavailable, cannot fetch summaries.")
|
59 |
+
return []
|
60 |
+
summaries = []
|
61 |
+
try:
|
62 |
+
log.info(f"Querying Firestore for repo_url: {repo_url}")
|
63 |
+
docs_stream = db.collection("functions").where("repo_url", "==", repo_url).stream()
|
64 |
+
summaries = [doc.to_dict() for doc in docs_stream]
|
65 |
+
log.info(f"Found {len(summaries)} existing summaries in Firestore for {repo_url}.")
|
66 |
+
except Exception as e:
|
67 |
+
log.error(f"Error fetching summaries for {repo_url} from Firebase: {e}", exc_info=True)
|
68 |
+
return []
|
69 |
+
return summaries
|
code_summarizer/language_parsers.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import List, Tuple, Dict, Optional
|
4 |
+
import re
|
5 |
+
import ast
|
6 |
+
import logging
|
7 |
+
|
8 |
+
log = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
# Note: ast.get_source_segment requires Python 3.8+
|
11 |
+
SUPPORTED_EXTENSIONS: Dict[str, str] = {
|
12 |
+
".py": "python", ".js": "javascript", ".java": "java", ".cpp": "cpp",
|
13 |
+
".c": "c", ".cs": "csharp", ".ts": "typescript", ".go": "go"
|
14 |
+
}
|
15 |
+
|
16 |
+
# Regex patterns (simplified, may need adjustment per language)
|
17 |
+
# WARNING: Regex-based parsing is fragile.
|
18 |
+
patterns = {
|
19 |
+
"javascript": r"^(?:async\s+)?function\s+\w+\s*\([^)]*\)\s*\{[\s\S]*?^\}|(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>\s*\{[\s\S]*?^\}",
|
20 |
+
"typescript": r"^(?:async\s+)?function\s+\w+\s*\([^)]*\)\s*\{[\s\S]*?^\}|(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>\s*\{[\s\S]*?^\}",
|
21 |
+
"java": r"^(?:public|private|protected|static|\s)*\s*[\w<>\[\]]+\s+\w+\s*\([^)]*\)\s*(?:throws\s+[\w,\s]+)?\s*\{[\s\S]*?^\}",
|
22 |
+
"cpp": r"^(?:[\w:]+)\s+\**\s*[\w:]+\s*\([^)]*\)\s*(?:const)?\s*\{[\s\S]*?^\}",
|
23 |
+
"c": r"^(?:[\w:]+)\s+\**\s*[\w:]+\s*\([^)]*\)\s*(?:const)?\s*\{[\s\S]*?^\}",
|
24 |
+
"csharp": r"^(?:public|private|protected|internal|static|virtual|async|override|\s)*\s*[\w<>\[\],?]+\s+\w+\s*\([^)]*\)\s*\{[\s\S]*?^\}",
|
25 |
+
"go": r"^func(?:\s*\(\s*\w+\s+\*?\w+\s*\))?\s+\w+\s*\([^)]*\)\s*(?:[\w\s,()]+)?\s*\{[\s\S]*?^\}"
|
26 |
+
}
|
27 |
+
|
28 |
+
def get_language_by_extension(file_path: Path) -> Optional[str]:
|
29 |
+
return SUPPORTED_EXTENSIONS.get(file_path.suffix.lower())
|
30 |
+
|
31 |
+
def extract_python_functions(file_path: Path) -> List[str]:
|
32 |
+
functions = []
|
33 |
+
try:
|
34 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
35 |
+
source = f.read()
|
36 |
+
tree = ast.parse(source, filename=str(file_path))
|
37 |
+
for node in ast.walk(tree):
|
38 |
+
if isinstance(node, ast.FunctionDef):
|
39 |
+
try:
|
40 |
+
segment = ast.get_source_segment(source, node)
|
41 |
+
if segment:
|
42 |
+
functions.append(segment)
|
43 |
+
except Exception: # Ignore segment extraction errors
|
44 |
+
pass
|
45 |
+
except (FileNotFoundError, SyntaxError, UnicodeDecodeError) as e:
|
46 |
+
log.warning(f"Skipping file {file_path} due to parsing error: {e}")
|
47 |
+
except Exception as e:
|
48 |
+
log.error(f"Unexpected error parsing Python file {file_path}: {e}", exc_info=True)
|
49 |
+
return functions
|
50 |
+
|
51 |
+
def extract_functions_by_regex(file_path: Path, pattern: str) -> List[str]:
|
52 |
+
try:
|
53 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
54 |
+
code = f.read()
|
55 |
+
return re.findall(pattern, code, re.DOTALL | re.MULTILINE)
|
56 |
+
except (FileNotFoundError, UnicodeDecodeError) as e:
|
57 |
+
log.warning(f"Skipping file {file_path} due to read error: {e}")
|
58 |
+
return []
|
59 |
+
except Exception as e:
|
60 |
+
log.error(f"Failed regex extraction on {file_path}: {e}", exc_info=True)
|
61 |
+
return []
|
62 |
+
|
63 |
+
def extract_code_snippets(file_path: Path) -> Tuple[Optional[str], List[str]]:
|
64 |
+
language = get_language_by_extension(file_path)
|
65 |
+
if language is None:
|
66 |
+
return None, []
|
67 |
+
|
68 |
+
if language == "python":
|
69 |
+
return language, extract_python_functions(file_path)
|
70 |
+
|
71 |
+
pattern = patterns.get(language)
|
72 |
+
if pattern:
|
73 |
+
return language, extract_functions_by_regex(file_path, pattern)
|
74 |
+
else:
|
75 |
+
log.debug(f"No regex pattern defined for language: {language} in file {file_path}")
|
76 |
+
return language, []
|
code_summarizer/repo_downloader.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from git import Repo, GitCommandError
|
4 |
+
import logging
|
5 |
+
|
6 |
+
log = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
def clone_repo(repo_url: str, dest_folder: str = "cloned_repo") -> bool:
|
9 |
+
"""Clones or replaces a git repository locally."""
|
10 |
+
if os.path.exists(dest_folder):
|
11 |
+
log.info(f"Removing existing directory: {dest_folder}")
|
12 |
+
try:
|
13 |
+
shutil.rmtree(dest_folder)
|
14 |
+
except OSError as e:
|
15 |
+
log.error(f"Error removing directory {dest_folder}: {e}")
|
16 |
+
return False
|
17 |
+
|
18 |
+
try:
|
19 |
+
log.info(f"Cloning repo from {repo_url} into {dest_folder}...")
|
20 |
+
Repo.clone_from(repo_url, dest_folder)
|
21 |
+
log.info("Repo cloned successfully.")
|
22 |
+
return True
|
23 |
+
except GitCommandError as e:
|
24 |
+
log.error(f"Error cloning repo: Git command failed - {e}")
|
25 |
+
return False
|
26 |
+
except Exception as e:
|
27 |
+
log.error(f"An unexpected error occurred during cloning: {e}")
|
28 |
+
return False
|
code_summarizer/summarizer.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import RobertaTokenizer, RobertaModel, logging as hf_logging
|
3 |
+
from typing import List, Dict, Optional
|
4 |
+
|
5 |
+
from code_summarizer.language_parsers import extract_code_snippets, SUPPORTED_EXTENSIONS
|
6 |
+
from pathlib import Path
|
7 |
+
import numpy as np
|
8 |
+
import logging
|
9 |
+
|
10 |
+
log = logging.getLogger(__name__)
|
11 |
+
hf_logging.set_verbosity_error()
|
12 |
+
|
13 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
+
log.info(f"Summarizer using device: {device}")
|
15 |
+
MODEL_LOADED = False
|
16 |
+
tokenizer = None
|
17 |
+
model = None
|
18 |
+
|
19 |
+
try:
|
20 |
+
log.info("Loading CodeBERT tokenizer/model...")
|
21 |
+
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
|
22 |
+
model = RobertaModel.from_pretrained("microsoft/codebert-base")
|
23 |
+
model = model.to(device)
|
24 |
+
model.eval()
|
25 |
+
MODEL_LOADED = True
|
26 |
+
log.info("CodeBERT model loaded successfully.")
|
27 |
+
except Exception as e:
|
28 |
+
log.error(f"Failed to load CodeBERT model: {e}", exc_info=True)
|
29 |
+
|
30 |
+
def get_embedding(code: str) -> Optional[List[float]]:
|
31 |
+
if not MODEL_LOADED or tokenizer is None or model is None:
|
32 |
+
return None
|
33 |
+
try:
|
34 |
+
inputs = tokenizer(code, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
35 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
36 |
+
with torch.no_grad():
|
37 |
+
outputs = model(**inputs)
|
38 |
+
embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
|
39 |
+
return embedding.tolist()
|
40 |
+
except Exception as e:
|
41 |
+
log.warning(f"Failed to generate embedding: {e}. Snippet start: {code[:50]}...")
|
42 |
+
return None
|
43 |
+
|
44 |
+
def generate_summary(snippet: str) -> str:
|
45 |
+
try:
|
46 |
+
lines = snippet.strip().split('\n')
|
47 |
+
header = next((line.strip() for line in lines if line.strip() and not (line.strip().startswith('#') or line.strip().startswith('//') or line.strip().startswith('/*'))), "")
|
48 |
+
header = (header[:100] + "...") if len(header) > 100 else header
|
49 |
+
return f"Function/method starting with `{header}`." if header else "N/A Summary"
|
50 |
+
except Exception:
|
51 |
+
return "Summary generation failed."
|
52 |
+
|
53 |
+
def summarize_file(file_path: Path, repo_url: str) -> List[Dict]:
|
54 |
+
language, snippets = extract_code_snippets(file_path)
|
55 |
+
if not snippets:
|
56 |
+
return []
|
57 |
+
|
58 |
+
results = []
|
59 |
+
log.debug(f"Summarizing {len(snippets)} snippets from {file_path}...")
|
60 |
+
for snippet in snippets:
|
61 |
+
if not snippet or snippet.isspace():
|
62 |
+
continue
|
63 |
+
embedding = get_embedding(snippet)
|
64 |
+
summary = generate_summary(snippet)
|
65 |
+
summary_data = {
|
66 |
+
"repo_url": repo_url,
|
67 |
+
"file_path": str(file_path.as_posix()),
|
68 |
+
"language": language,
|
69 |
+
"function_code": snippet,
|
70 |
+
"summary": summary,
|
71 |
+
}
|
72 |
+
if embedding is not None:
|
73 |
+
summary_data["embedding"] = embedding
|
74 |
+
results.append(summary_data)
|
75 |
+
return results
|
76 |
+
|
77 |
+
def summarize_repo(repo_dir: Path, repo_url: str) -> List[Dict]:
|
78 |
+
all_results = []
|
79 |
+
log.info(f"Starting summarization for repository: {repo_url}")
|
80 |
+
supported_extensions = set(SUPPORTED_EXTENSIONS.keys())
|
81 |
+
files_processed_count = 0
|
82 |
+
|
83 |
+
for file in repo_dir.rglob("*"):
|
84 |
+
if file.is_file() and file.suffix.lower() in supported_extensions:
|
85 |
+
log.debug(f"Processing file: {file}")
|
86 |
+
try:
|
87 |
+
file_results = summarize_file(file, repo_url)
|
88 |
+
if file_results:
|
89 |
+
all_results.extend(file_results)
|
90 |
+
files_processed_count += 1
|
91 |
+
except Exception as e:
|
92 |
+
log.error(f"Failed to process file {file}: {e}", exc_info=True)
|
93 |
+
|
94 |
+
log.info(f"Summarization complete for {repo_url}. Processed {files_processed_count} files, found {len(all_results)} functions.")
|
95 |
+
return all_results
|
interface.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
import logging
|
4 |
+
|
5 |
+
from code_summarizer import (
|
6 |
+
clone_repo,
|
7 |
+
summarize_repo,
|
8 |
+
upload_summary_to_firebase,
|
9 |
+
is_firestore_available
|
10 |
+
)
|
11 |
+
# Import device/model status separately
|
12 |
+
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
|
13 |
+
|
14 |
+
log = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
REPO_CLONE_DIR = "cloned_repo_gradio"
|
17 |
+
|
18 |
+
def format_summaries_for_display(summaries: list) -> str:
|
19 |
+
if not summaries: return "No summaries generated."
|
20 |
+
limit = 5
|
21 |
+
output = f"β
Found {len(summaries)} functions.\n"
|
22 |
+
output += f"Firestore available: {'Yes' if is_firestore_available() else 'No'}\n---\n"
|
23 |
+
for i, summary in enumerate(summaries[:limit]):
|
24 |
+
output += f"File: {summary.get('file_path', '?')}\nLang: {summary.get('language', '?')}\n"
|
25 |
+
output += f"Summary: {summary.get('summary', '?')}\n"
|
26 |
+
output += f"Embedding: {'Yes' if 'embedding' in summary else 'No'}\n---\n"
|
27 |
+
if len(summaries) > limit:
|
28 |
+
output += f"... and {len(summaries) - limit} more."
|
29 |
+
return output
|
30 |
+
|
31 |
+
def summarize_from_url(repo_url: str):
|
32 |
+
if not repo_url or not repo_url.startswith("https"):
|
33 |
+
yield "β Invalid HTTPS GitHub URL."
|
34 |
+
return
|
35 |
+
|
36 |
+
if not SUMMARIZER_LOADED:
|
37 |
+
yield "β Summarizer Model Not Loaded. Cannot proceed."
|
38 |
+
return
|
39 |
+
|
40 |
+
yield "β³ Cloning repository..."
|
41 |
+
clone_dir_path = Path(REPO_CLONE_DIR)
|
42 |
+
if not clone_repo(repo_url, str(clone_dir_path)):
|
43 |
+
yield "β Failed to clone repo."
|
44 |
+
return
|
45 |
+
|
46 |
+
yield f"β³ Summarizing code (using {summarizer_device})..."
|
47 |
+
summaries = summarize_repo(clone_dir_path, repo_url)
|
48 |
+
if not summaries:
|
49 |
+
yield "β οΈ Repo cloned, but no functions found."
|
50 |
+
return
|
51 |
+
|
52 |
+
status = f"β
Summarized {len(summaries)} functions."
|
53 |
+
yield status + " Uploading to Firebase..."
|
54 |
+
|
55 |
+
upload_count = 0
|
56 |
+
if is_firestore_available():
|
57 |
+
for summary in summaries:
|
58 |
+
try:
|
59 |
+
upload_summary_to_firebase(summary)
|
60 |
+
upload_count += 1
|
61 |
+
except Exception as e:
|
62 |
+
log.error(f"Gradio UI: Firebase upload error: {e}")
|
63 |
+
status += f" Uploaded {upload_count} to Firebase."
|
64 |
+
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
65 |
+
else:
|
66 |
+
status += " Firebase unavailable, skipping upload."
|
67 |
+
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
68 |
+
|
69 |
+
def perform_web_search(query: str):
|
70 |
+
# Placeholder - Replace with actual search implementation
|
71 |
+
return f"π Web search (placeholder) for: '{query}'"
|
72 |
+
|
73 |
+
def launch_interface():
|
74 |
+
with gr.Blocks(title="Code Summarizer", theme=gr.themes.Soft()) as demo:
|
75 |
+
gr.Markdown("# π Code Summarizer & Search")
|
76 |
+
|
77 |
+
with gr.Tab("Repo Summarizer"):
|
78 |
+
repo_url_input = gr.Textbox(label="GitHub Repo URL", placeholder="https://github.com/user/repo")
|
79 |
+
summarize_button = gr.Button("Summarize & Upload", variant="primary")
|
80 |
+
status_output = gr.Textbox(label="Status / Output", lines=10, interactive=False)
|
81 |
+
summarize_button.click(fn=summarize_from_url, inputs=repo_url_input, outputs=status_output)
|
82 |
+
|
83 |
+
with gr.Tab("Web Code Search (Placeholder)"):
|
84 |
+
search_query_input = gr.Textbox(label="Search Query", placeholder="e.g., binary search tree cpp")
|
85 |
+
search_button = gr.Button("Search Web", variant="secondary")
|
86 |
+
search_output_display = gr.Textbox(label="Web Search Results", lines=5, interactive=False)
|
87 |
+
search_button.click(fn=perform_web_search, inputs=search_query_input, outputs=search_output_display)
|
88 |
+
|
89 |
+
log.info("Launching Gradio interface...")
|
90 |
+
demo.launch()
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
# Basic logging setup for the interface if run directly
|
94 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [Interface] %(message)s')
|
95 |
+
if not SUMMARIZER_LOADED:
|
96 |
+
log.error("Summarizer model failed to load. Interface functionality will be limited.")
|
97 |
+
# Add this check for Firebase as well, since the interface relies on it
|
98 |
+
if not is_firestore_available():
|
99 |
+
log.warning("Firebase is not available. Upload/check functionality will be disabled.")
|
100 |
+
launch_interface()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gitpython>=3.1.0,<4.0.0
|
2 |
+
torch>=1.9.0,<3.0.0
|
3 |
+
transformers>=4.10.0,<5.0.0
|
4 |
+
numpy>=1.19.0,<2.0.0
|
5 |
+
gradio>=3.15.0,<5.0.0
|
6 |
+
firebase-admin>=5.0.0,<7.0.0
|