Spaces:
Sleeping
Sleeping
Commit
Β·
8b7d3ee
1
Parent(s):
0569980
Fix building (app and interface merged)
Browse files- Procfile.txt +0 -1
- app.py +152 -35
- interface.py +0 -100
Procfile.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
web: python interface.py
|
|
|
|
app.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import argparse
|
2 |
-
|
3 |
import json
|
4 |
import logging
|
5 |
import sys
|
6 |
import time
|
|
|
7 |
|
8 |
from code_summarizer import (
|
9 |
clone_repo,
|
@@ -12,84 +13,200 @@ from code_summarizer import (
|
|
12 |
get_summaries_by_repo,
|
13 |
is_firestore_available
|
14 |
)
|
15 |
-
# Import device/model status separately if needed for logging
|
16 |
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
log = logging.getLogger(__name__)
|
21 |
|
22 |
-
|
|
|
23 |
OUTPUT_DIR = Path("outputs")
|
24 |
OUTPUT_FILE = OUTPUT_DIR / "summaries.json"
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True):
|
|
|
27 |
start_time = time.time()
|
28 |
-
log.info(f"Pipeline starting for: {repo_url}")
|
29 |
|
30 |
if not SUMMARIZER_LOADED:
|
31 |
-
log.error("Summarizer Model Not Loaded. Exiting.")
|
32 |
sys.exit(1)
|
33 |
|
34 |
firestore_ready = is_firestore_available()
|
35 |
if not firestore_ready:
|
36 |
-
log.warning("Firebase is not available. Uploads/Checks will be skipped.")
|
37 |
|
38 |
if skip_existing and firestore_ready:
|
39 |
-
log.info("Checking for existing summaries...")
|
40 |
if get_summaries_by_repo(repo_url):
|
41 |
-
log.warning("Skipping. Found existing summaries in Firebase.")
|
42 |
return
|
43 |
|
44 |
-
log.info("Cloning repository...")
|
45 |
-
clone_dir_path = Path(
|
46 |
if not clone_repo(repo_url, str(clone_dir_path)):
|
47 |
-
log.error("Repo cloning failed. Exiting.")
|
48 |
sys.exit(1)
|
49 |
|
50 |
-
log.info(f"Running summarization (device: {summarizer_device})...")
|
51 |
summaries = summarize_repo(clone_dir_path, repo_url)
|
52 |
if not summaries:
|
53 |
-
log.warning("No functions found or summarization failed.")
|
54 |
return
|
55 |
|
56 |
-
log.info(f"Summarization complete. Found {len(summaries)} functions.")
|
57 |
|
58 |
if firestore_ready:
|
59 |
-
log.info(f"Uploading {len(summaries)} summaries to Firebase...")
|
60 |
upload_count = 0
|
61 |
for i, summary in enumerate(summaries):
|
62 |
upload_summary_to_firebase(summary)
|
63 |
upload_count +=1
|
64 |
if (i + 1) % 100 == 0:
|
65 |
-
log.info(f"
|
66 |
-
log.info(f"Finished uploading {upload_count} summaries.")
|
67 |
else:
|
68 |
-
log.info("Skipping Firebase upload.")
|
69 |
|
70 |
if save_local:
|
71 |
-
log.info(f"Saving summaries locally to {OUTPUT_FILE}...")
|
72 |
try:
|
73 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
74 |
with open(OUTPUT_FILE, "w", encoding='utf-8') as f:
|
75 |
json.dump(summaries, f, indent=2, default=str)
|
76 |
-
log.info(f"Saved local backup to {OUTPUT_FILE}")
|
77 |
except Exception as e:
|
78 |
-
log.error(f"Failed to save local backup: {e}", exc_info=True)
|
79 |
|
80 |
duration = time.time() - start_time
|
81 |
-
log.info(f"β
Pipeline completed in {duration:.2f} seconds.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
-
|
86 |
-
parser
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import argparse
|
2 |
+
import gradio as gr
|
3 |
import json
|
4 |
import logging
|
5 |
import sys
|
6 |
import time
|
7 |
+
from pathlib import Path
|
8 |
|
9 |
from code_summarizer import (
|
10 |
clone_repo,
|
|
|
13 |
get_summaries_by_repo,
|
14 |
is_firestore_available
|
15 |
)
|
|
|
16 |
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
|
17 |
|
18 |
+
logging.basicConfig(
|
19 |
+
level=logging.INFO,
|
20 |
+
format='%(asctime)s - %(levelname)s - [%(name)s] %(message)s'
|
21 |
+
)
|
22 |
log = logging.getLogger(__name__)
|
23 |
|
24 |
+
REPO_CLONE_DIR_CLI = "cloned_repo_cli"
|
25 |
+
REPO_CLONE_DIR_GRADIO = "cloned_repo_gradio"
|
26 |
OUTPUT_DIR = Path("outputs")
|
27 |
OUTPUT_FILE = OUTPUT_DIR / "summaries.json"
|
28 |
|
29 |
+
def format_summaries_for_display(summaries: list) -> str:
|
30 |
+
if not summaries: return "No summaries generated."
|
31 |
+
limit = 5
|
32 |
+
output = f"Found {len(summaries)} functions.\n"
|
33 |
+
output += f"Firestore: {'Yes' if is_firestore_available() else 'No'}\n---\n"
|
34 |
+
for i, summary in enumerate(summaries[:limit]):
|
35 |
+
output += f"File: {summary.get('file_path', '?')}\nLang: {summary.get('language', '?')}\n"
|
36 |
+
output += f"Summary: {summary.get('summary', '?')}\n"
|
37 |
+
output += f"Embedding: {'Yes' if 'embedding' in summary else 'No'}\n---\n"
|
38 |
+
if len(summaries) > limit:
|
39 |
+
output += f"... and {len(summaries) - limit} more."
|
40 |
+
return output
|
41 |
+
|
42 |
+
def summarize_from_url(repo_url: str):
|
43 |
+
"""Gradio action: Clones, summarizes, uploads, yields status updates."""
|
44 |
+
log.info(f"Gradio request for URL: {repo_url}")
|
45 |
+
if not repo_url or not repo_url.startswith("https"):
|
46 |
+
yield "β Invalid HTTPS GitHub URL."
|
47 |
+
return
|
48 |
+
|
49 |
+
if not SUMMARIZER_LOADED:
|
50 |
+
yield "β Summarizer Model Not Loaded. Cannot proceed."
|
51 |
+
log.error("Gradio: Summarizer model not loaded.")
|
52 |
+
return
|
53 |
+
|
54 |
+
firestore_ready = is_firestore_available()
|
55 |
+
if not firestore_ready:
|
56 |
+
log.warning("Gradio: Firebase is not available.")
|
57 |
+
|
58 |
+
yield "β³ Cloning repository..."
|
59 |
+
clone_dir_path = Path(REPO_CLONE_DIR_GRADIO)
|
60 |
+
if not clone_repo(repo_url, str(clone_dir_path)):
|
61 |
+
yield "β Failed to clone repo."
|
62 |
+
log.error(f"Gradio: Failed to clone {repo_url}")
|
63 |
+
return
|
64 |
+
|
65 |
+
yield f"β³ Summarizing code (using {summarizer_device})..."
|
66 |
+
summaries = summarize_repo(clone_dir_path, repo_url)
|
67 |
+
if not summaries:
|
68 |
+
yield "β οΈ Repo cloned, but no functions found."
|
69 |
+
log.warning(f"Gradio: No functions found in {repo_url}")
|
70 |
+
return
|
71 |
+
|
72 |
+
status = f"β
Summarized {len(summaries)} functions."
|
73 |
+
yield status + " Uploading to Firebase..."
|
74 |
+
|
75 |
+
upload_count = 0
|
76 |
+
if firestore_ready:
|
77 |
+
for summary in summaries:
|
78 |
+
try:
|
79 |
+
upload_summary_to_firebase(summary)
|
80 |
+
upload_count += 1
|
81 |
+
except Exception as e:
|
82 |
+
log.error(f"Gradio: Firebase upload error: {e}")
|
83 |
+
status += f" Uploaded {upload_count} to Firebase."
|
84 |
+
log.info(f"Gradio: Uploaded {upload_count} summaries for {repo_url}")
|
85 |
+
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
86 |
+
else:
|
87 |
+
status += " Firebase unavailable, skipping upload."
|
88 |
+
log.warning(f"Gradio: Skipped Firebase upload for {repo_url}")
|
89 |
+
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
90 |
+
|
91 |
+
def perform_web_search(query: str):
|
92 |
+
"""Gradio action: Placeholder for web search."""
|
93 |
+
log.info(f"Gradio: Web search placeholder: {query}")
|
94 |
+
# Placeholder - Replace with actual search implementation
|
95 |
+
return f"π Web search (placeholder) for: '{query}'"
|
96 |
+
|
97 |
def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True):
|
98 |
+
"""CLI action: Runs the full pipeline."""
|
99 |
start_time = time.time()
|
100 |
+
log.info(f"CLI: Pipeline starting for: {repo_url}")
|
101 |
|
102 |
if not SUMMARIZER_LOADED:
|
103 |
+
log.error("CLI: Summarizer Model Not Loaded. Exiting.")
|
104 |
sys.exit(1)
|
105 |
|
106 |
firestore_ready = is_firestore_available()
|
107 |
if not firestore_ready:
|
108 |
+
log.warning("CLI: Firebase is not available. Uploads/Checks will be skipped.")
|
109 |
|
110 |
if skip_existing and firestore_ready:
|
111 |
+
log.info("CLI: Checking for existing summaries...")
|
112 |
if get_summaries_by_repo(repo_url):
|
113 |
+
log.warning("CLI: Skipping. Found existing summaries in Firebase.")
|
114 |
return
|
115 |
|
116 |
+
log.info("CLI: Cloning repository...")
|
117 |
+
clone_dir_path = Path(REPO_CLONE_DIR_CLI)
|
118 |
if not clone_repo(repo_url, str(clone_dir_path)):
|
119 |
+
log.error("CLI: Repo cloning failed. Exiting.")
|
120 |
sys.exit(1)
|
121 |
|
122 |
+
log.info(f"CLI: Running summarization (device: {summarizer_device})...")
|
123 |
summaries = summarize_repo(clone_dir_path, repo_url)
|
124 |
if not summaries:
|
125 |
+
log.warning("CLI: No functions found or summarization failed.")
|
126 |
return
|
127 |
|
128 |
+
log.info(f"CLI: Summarization complete. Found {len(summaries)} functions.")
|
129 |
|
130 |
if firestore_ready:
|
131 |
+
log.info(f"CLI: Uploading {len(summaries)} summaries to Firebase...")
|
132 |
upload_count = 0
|
133 |
for i, summary in enumerate(summaries):
|
134 |
upload_summary_to_firebase(summary)
|
135 |
upload_count +=1
|
136 |
if (i + 1) % 100 == 0:
|
137 |
+
log.info(f"CLI: Uploaded {i+1}/{len(summaries)}...")
|
138 |
+
log.info(f"CLI: Finished uploading {upload_count} summaries.")
|
139 |
else:
|
140 |
+
log.info("CLI: Skipping Firebase upload.")
|
141 |
|
142 |
if save_local:
|
143 |
+
log.info(f"CLI: Saving summaries locally to {OUTPUT_FILE}...")
|
144 |
try:
|
145 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
146 |
with open(OUTPUT_FILE, "w", encoding='utf-8') as f:
|
147 |
json.dump(summaries, f, indent=2, default=str)
|
148 |
+
log.info(f"CLI: Saved local backup to {OUTPUT_FILE}")
|
149 |
except Exception as e:
|
150 |
+
log.error(f"CLI: Failed to save local backup: {e}", exc_info=True)
|
151 |
|
152 |
duration = time.time() - start_time
|
153 |
+
log.info(f"CLI: β
Pipeline completed in {duration:.2f} seconds.")
|
154 |
+
|
155 |
+
if not SUMMARIZER_LOADED:
|
156 |
+
log.error("Summarizer model failed to load. Gradio interface may be limited or fail.")
|
157 |
+
if not is_firestore_available():
|
158 |
+
log.warning("Firebase is not available. Upload/check functionality will be disabled in Gradio interface.")
|
159 |
+
|
160 |
+
with gr.Blocks(title="Code Summarizer", theme=gr.themes.Soft()) as demo:
|
161 |
+
gr.Markdown("# π Code Summarizer & Search")
|
162 |
+
|
163 |
+
with gr.Tab("Repo Summarizer"):
|
164 |
+
repo_url_input = gr.Textbox(label="GitHub Repo URL", placeholder="https://github.com/user/repo")
|
165 |
+
summarize_button = gr.Button("Summarize & Upload", variant="primary")
|
166 |
+
status_output = gr.Textbox(label="Status / Output", lines=10, interactive=False)
|
167 |
+
summarize_button.click(fn=summarize_from_url, inputs=repo_url_input, outputs=status_output)
|
168 |
+
|
169 |
+
with gr.Tab("Web Code Search (Placeholder)"):
|
170 |
+
search_query_input = gr.Textbox(label="Search Query", placeholder="e.g., binary search tree cpp")
|
171 |
+
search_button = gr.Button("Search Web", variant="secondary")
|
172 |
+
search_output_display = gr.Textbox(label="Web Search Results", lines=5, interactive=False)
|
173 |
+
search_button.click(fn=perform_web_search, inputs=search_query_input, outputs=search_output_display)
|
174 |
|
175 |
|
176 |
if __name__ == "__main__":
|
177 |
+
|
178 |
+
parser = argparse.ArgumentParser(
|
179 |
+
description="Code Summarizer CLI.",
|
180 |
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
181 |
+
)
|
182 |
+
parser.add_argument(
|
183 |
+
"--url",
|
184 |
+
required=True,
|
185 |
+
help="HTTPS URL of the public GitHub repository."
|
186 |
+
)
|
187 |
+
parser.add_argument(
|
188 |
+
"--skip_existing",
|
189 |
+
action="store_true",
|
190 |
+
help="Skip if repo already summarized in Firebase."
|
191 |
+
)
|
192 |
+
parser.add_argument(
|
193 |
+
"--no_save",
|
194 |
+
action="store_true",
|
195 |
+
help="Disable saving local summaries.json."
|
196 |
+
)
|
197 |
+
|
198 |
+
try:
|
199 |
+
args = parser.parse_args()
|
200 |
+
log.info("Running in CLI mode.")
|
201 |
+
run_pipeline(
|
202 |
+
repo_url=args.url,
|
203 |
+
skip_existing=args.skip_existing,
|
204 |
+
save_local=not args.no_save
|
205 |
+
)
|
206 |
+
except SystemExit as e:
|
207 |
+
# Exit triggered by argparse on error (e.g., missing --url)
|
208 |
+
if e.code != 0: # Don't log error for --help etc.
|
209 |
+
log.error(f"Argument parsing error (Exit Code: {e.code}). Ensure --url is provided for CLI mode.")
|
210 |
+
sys.exit(e.code)
|
211 |
+
|
212 |
+
demo.launch()
|
interface.py
DELETED
@@ -1,100 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from pathlib import Path
|
3 |
-
import logging
|
4 |
-
|
5 |
-
from code_summarizer import (
|
6 |
-
clone_repo,
|
7 |
-
summarize_repo,
|
8 |
-
upload_summary_to_firebase,
|
9 |
-
is_firestore_available
|
10 |
-
)
|
11 |
-
# Import device/model status separately
|
12 |
-
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
|
13 |
-
|
14 |
-
log = logging.getLogger(__name__)
|
15 |
-
|
16 |
-
REPO_CLONE_DIR = "cloned_repo_gradio"
|
17 |
-
|
18 |
-
def format_summaries_for_display(summaries: list) -> str:
|
19 |
-
if not summaries: return "No summaries generated."
|
20 |
-
limit = 5
|
21 |
-
output = f"β
Found {len(summaries)} functions.\n"
|
22 |
-
output += f"Firestore available: {'Yes' if is_firestore_available() else 'No'}\n---\n"
|
23 |
-
for i, summary in enumerate(summaries[:limit]):
|
24 |
-
output += f"File: {summary.get('file_path', '?')}\nLang: {summary.get('language', '?')}\n"
|
25 |
-
output += f"Summary: {summary.get('summary', '?')}\n"
|
26 |
-
output += f"Embedding: {'Yes' if 'embedding' in summary else 'No'}\n---\n"
|
27 |
-
if len(summaries) > limit:
|
28 |
-
output += f"... and {len(summaries) - limit} more."
|
29 |
-
return output
|
30 |
-
|
31 |
-
def summarize_from_url(repo_url: str):
|
32 |
-
if not repo_url or not repo_url.startswith("https"):
|
33 |
-
yield "β Invalid HTTPS GitHub URL."
|
34 |
-
return
|
35 |
-
|
36 |
-
if not SUMMARIZER_LOADED:
|
37 |
-
yield "β Summarizer Model Not Loaded. Cannot proceed."
|
38 |
-
return
|
39 |
-
|
40 |
-
yield "β³ Cloning repository..."
|
41 |
-
clone_dir_path = Path(REPO_CLONE_DIR)
|
42 |
-
if not clone_repo(repo_url, str(clone_dir_path)):
|
43 |
-
yield "β Failed to clone repo."
|
44 |
-
return
|
45 |
-
|
46 |
-
yield f"β³ Summarizing code (using {summarizer_device})..."
|
47 |
-
summaries = summarize_repo(clone_dir_path, repo_url)
|
48 |
-
if not summaries:
|
49 |
-
yield "β οΈ Repo cloned, but no functions found."
|
50 |
-
return
|
51 |
-
|
52 |
-
status = f"β
Summarized {len(summaries)} functions."
|
53 |
-
yield status + " Uploading to Firebase..."
|
54 |
-
|
55 |
-
upload_count = 0
|
56 |
-
if is_firestore_available():
|
57 |
-
for summary in summaries:
|
58 |
-
try:
|
59 |
-
upload_summary_to_firebase(summary)
|
60 |
-
upload_count += 1
|
61 |
-
except Exception as e:
|
62 |
-
log.error(f"Gradio UI: Firebase upload error: {e}")
|
63 |
-
status += f" Uploaded {upload_count} to Firebase."
|
64 |
-
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
65 |
-
else:
|
66 |
-
status += " Firebase unavailable, skipping upload."
|
67 |
-
yield status + "\n---\n" + format_summaries_for_display(summaries)
|
68 |
-
|
69 |
-
def perform_web_search(query: str):
|
70 |
-
# Placeholder - Replace with actual search implementation
|
71 |
-
return f"π Web search (placeholder) for: '{query}'"
|
72 |
-
|
73 |
-
def launch_interface():
|
74 |
-
with gr.Blocks(title="Code Summarizer", theme=gr.themes.Soft()) as demo:
|
75 |
-
gr.Markdown("# π Code Summarizer & Search")
|
76 |
-
|
77 |
-
with gr.Tab("Repo Summarizer"):
|
78 |
-
repo_url_input = gr.Textbox(label="GitHub Repo URL", placeholder="https://github.com/user/repo")
|
79 |
-
summarize_button = gr.Button("Summarize & Upload", variant="primary")
|
80 |
-
status_output = gr.Textbox(label="Status / Output", lines=10, interactive=False)
|
81 |
-
summarize_button.click(fn=summarize_from_url, inputs=repo_url_input, outputs=status_output)
|
82 |
-
|
83 |
-
with gr.Tab("Web Code Search (Placeholder)"):
|
84 |
-
search_query_input = gr.Textbox(label="Search Query", placeholder="e.g., binary search tree cpp")
|
85 |
-
search_button = gr.Button("Search Web", variant="secondary")
|
86 |
-
search_output_display = gr.Textbox(label="Web Search Results", lines=5, interactive=False)
|
87 |
-
search_button.click(fn=perform_web_search, inputs=search_query_input, outputs=search_output_display)
|
88 |
-
|
89 |
-
log.info("Launching Gradio interface...")
|
90 |
-
demo.launch()
|
91 |
-
|
92 |
-
if __name__ == "__main__":
|
93 |
-
# Basic logging setup for the interface if run directly
|
94 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [Interface] %(message)s')
|
95 |
-
if not SUMMARIZER_LOADED:
|
96 |
-
log.error("Summarizer model failed to load. Interface functionality will be limited.")
|
97 |
-
# Add this check for Firebase as well, since the interface relies on it
|
98 |
-
if not is_firestore_available():
|
99 |
-
log.warning("Firebase is not available. Upload/check functionality will be disabled.")
|
100 |
-
launch_interface()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|