|
import gradio as gr |
|
import threading |
|
import sys |
|
import io |
|
import time |
|
from dotenv import load_dotenv |
|
import os |
|
from contextlib import redirect_stdout |
|
from git import Repo |
|
from huggingface_hub import Repository, HfApi |
|
|
|
load_dotenv() |
|
|
|
|
|
DOC_INDEXER = "indexer_multi.py" |
|
SPEC_INDEXER = "spec_indexer_multi.py" |
|
DOC_INDEX_FILE = "indexed_docs.json" |
|
SPEC_INDEX_FILE = "indexed_specifications.json" |
|
GIT_REPO_PATH = os.path.abspath(".") |
|
HF_REPO_ID = "OrganizedProgrammers/3GPPDocFinder" |
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
def run_python_module(module_path): |
|
""" |
|
Dynamically run a python module, capture and yield stdout in real time. |
|
""" |
|
def runner(): |
|
local_vars = {} |
|
buffer = io.StringIO() |
|
try: |
|
with redirect_stdout(buffer): |
|
|
|
import runpy |
|
runpy.run_path(module_path, run_name="__main__") |
|
except Exception as e: |
|
print(f"\n❌ Error: {e}") |
|
finally: |
|
yield buffer.getvalue() |
|
buffer.close() |
|
yield from runner() |
|
|
|
def commit_and_push_github(files, message): |
|
repo = Repo(GIT_REPO_PATH) |
|
repo.git.add(files) |
|
repo.index.commit(message) |
|
try: |
|
repo.git.push() |
|
except Exception as e: |
|
print(f"Git push failed: {e}") |
|
|
|
def commit_and_push_hf(files, message): |
|
if not HF_TOKEN: |
|
return "No HF_TOKEN provided. Skipping HuggingFace push." |
|
hf_repo_dir = os.path.join(GIT_REPO_PATH, "hf_spaces") |
|
repo = None |
|
if not os.path.exists(hf_repo_dir): |
|
repo = Repository( |
|
local_dir=hf_repo_dir, |
|
clone_from=HF_REPO_ID, |
|
token=HF_TOKEN, |
|
skip_lfs_files=True |
|
) |
|
else: |
|
repo = Repository( |
|
local_dir=hf_repo_dir, |
|
token=HF_TOKEN, |
|
skip_lfs_files=True |
|
) |
|
repo.git_pull() |
|
|
|
for f in files: |
|
import shutil |
|
shutil.copy2(f, os.path.join(hf_repo_dir, f)) |
|
repo.git_add(auto_lfs_track=True) |
|
repo.git_commit(message) |
|
repo.git_push() |
|
return "Pushed to HuggingFace." |
|
|
|
def get_docs_stats(): |
|
if os.path.exists(DOC_INDEX_FILE): |
|
import json |
|
with open(DOC_INDEX_FILE, 'r', encoding='utf-8') as f: |
|
data = json.load(f) |
|
return len(data["docs"]) |
|
return 0 |
|
|
|
def get_specs_stats(): |
|
if os.path.exists(SPEC_INDEX_FILE): |
|
import json |
|
with open(SPEC_INDEX_FILE, 'r', encoding='utf-8') as f: |
|
data = json.load(f) |
|
return len(data["specs"]) |
|
return 0 |
|
|
|
def get_scopes_stats(): |
|
if os.path.exists(SPEC_INDEX_FILE): |
|
import json |
|
with open(SPEC_INDEX_FILE, 'r', encoding="utf-8") as f: |
|
data = json.load(f) |
|
return len(data['scopes']) |
|
return 0 |
|
|
|
|
|
|
|
def index_documents(progress=gr.Progress()): |
|
progress(0, desc="Starting document indexing…") |
|
log = "" |
|
for output in run_python_module(DOC_INDEXER): |
|
log = output |
|
progress(0.7, desc="Indexing in progress...") |
|
yield log |
|
commit_and_push_github([DOC_INDEX_FILE], "Update doc index via Gradio") |
|
commit_and_push_hf([DOC_INDEX_FILE], "Update doc index via Gradio") |
|
progress(1, desc="Done!") |
|
yield log + "\n\n✅ Finished! Committed and pushed." |
|
|
|
def index_specifications(progress=gr.Progress()): |
|
progress(0, desc="Starting specifications indexing…") |
|
log = "" |
|
for output in run_python_module(SPEC_INDEXER): |
|
log = output |
|
progress(0.7, desc="Indexing in progress...") |
|
yield log |
|
commit_and_push_github([SPEC_INDEX_FILE], "Update spec index via Gradio") |
|
commit_and_push_hf([SPEC_INDEX_FILE], "Update spec index via Gradio") |
|
progress(1, desc="Done!") |
|
yield log + "\n\n✅ Finished! Committed and pushed." |
|
|
|
def refresh_stats(): |
|
return str(get_docs_stats()), str(get_specs_stats()), str(get_scopes_stats()) |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("## 📄 3GPP Indexers") |
|
with gr.Row(): |
|
with gr.Column(): |
|
doc_count = gr.Textbox(label="Docs Indexed", value=str(get_docs_stats()), interactive=False) |
|
btn_docs = gr.Button("Re-index Documents", variant="primary") |
|
with gr.Column(): |
|
spec_count = gr.Textbox(label="Specs Indexed", value=str(get_specs_stats()), interactive=False) |
|
btn_specs = gr.Button("Re-index Specifications", variant="primary") |
|
with gr.Column(): |
|
scope_count = gr.Textbox(label="Scopes Indexed", value=str(get_scopes_stats()), interactive=False) |
|
out = gr.Textbox(label="Output/Log", lines=13) |
|
refresh = gr.Button("🔄 Refresh Stats") |
|
btn_docs.click(index_documents, outputs=out) |
|
btn_specs.click(index_specifications, outputs=out) |
|
refresh.click(refresh_stats, outputs=[doc_count, spec_count, scope_count]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|