import gradio as gr import threading import sys import io import time from dotenv import load_dotenv import os from contextlib import redirect_stdout from git import Repo from huggingface_hub import Repository, HfApi load_dotenv() # == PATHS and SETTINGS == DOC_INDEXER = "indexer_multi.py" SPEC_INDEXER = "spec_indexer_multi.py" DOC_INDEX_FILE = "indexed_docs.json" SPEC_INDEX_FILE = "indexed_specifications.json" GIT_REPO_PATH = os.path.abspath(".") # or absolute path to repo HF_REPO_ID = "OrganizedProgrammers/3GPPDocFinder" HF_TOKEN = os.environ.get("HF_TOKEN") # set this as env var # == Helpers == def run_python_module(module_path): """ Dynamically run a python module, capture and yield stdout in real time. """ def runner(): local_vars = {} buffer = io.StringIO() try: with redirect_stdout(buffer): # Import as module, call main() import runpy runpy.run_path(module_path, run_name="__main__") except Exception as e: print(f"\n❌ Error: {e}") finally: yield buffer.getvalue() buffer.close() yield from runner() def commit_and_push_github(files, message): repo = Repo(GIT_REPO_PATH) repo.git.add(files) repo.index.commit(message) try: repo.git.push() except Exception as e: print(f"Git push failed: {e}") def commit_and_push_hf(files, message): if not HF_TOKEN: return "No HF_TOKEN provided. Skipping HuggingFace push." hf_repo_dir = os.path.join(GIT_REPO_PATH, "hf_spaces") repo = None if not os.path.exists(hf_repo_dir): repo = Repository( local_dir=hf_repo_dir, clone_from=HF_REPO_ID, token=HF_TOKEN, skip_lfs_files=True ) else: repo = Repository( local_dir=hf_repo_dir, token=HF_TOKEN, skip_lfs_files=True ) repo.git_pull() # Copy artifact files to huggingface space for f in files: import shutil shutil.copy2(f, os.path.join(hf_repo_dir, f)) repo.git_add(auto_lfs_track=True) repo.git_commit(message) repo.git_push() return "Pushed to HuggingFace." def get_docs_stats(): if os.path.exists(DOC_INDEX_FILE): import json with open(DOC_INDEX_FILE, 'r', encoding='utf-8') as f: data = json.load(f) return len(data["docs"]) return 0 def get_specs_stats(): if os.path.exists(SPEC_INDEX_FILE): import json with open(SPEC_INDEX_FILE, 'r', encoding='utf-8') as f: data = json.load(f) return len(data["specs"]) return 0 def get_scopes_stats(): if os.path.exists(SPEC_INDEX_FILE): import json with open(SPEC_INDEX_FILE, 'r', encoding="utf-8") as f: data = json.load(f) return len(data['scopes']) return 0 # == Gradio Functions == def index_documents(progress=gr.Progress()): progress(0, desc="Starting document indexing…") log = "" for output in run_python_module(DOC_INDEXER): log = output progress(0.7, desc="Indexing in progress...") yield log commit_and_push_github([DOC_INDEX_FILE], "Update doc index via Gradio") commit_and_push_hf([DOC_INDEX_FILE], "Update doc index via Gradio") progress(1, desc="Done!") yield log + "\n\n✅ Finished! Committed and pushed." def index_specifications(progress=gr.Progress()): progress(0, desc="Starting specifications indexing…") log = "" for output in run_python_module(SPEC_INDEXER): log = output progress(0.7, desc="Indexing in progress...") yield log commit_and_push_github([SPEC_INDEX_FILE], "Update spec index via Gradio") commit_and_push_hf([SPEC_INDEX_FILE], "Update spec index via Gradio") progress(1, desc="Done!") yield log + "\n\n✅ Finished! Committed and pushed." def refresh_stats(): return str(get_docs_stats()), str(get_specs_stats()), str(get_scopes_stats()) # == UI == with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 📄 3GPP Indexers") with gr.Row(): with gr.Column(): doc_count = gr.Textbox(label="Docs Indexed", value=str(get_docs_stats()), interactive=False) btn_docs = gr.Button("Re-index Documents", variant="primary") with gr.Column(): spec_count = gr.Textbox(label="Specs Indexed", value=str(get_specs_stats()), interactive=False) btn_specs = gr.Button("Re-index Specifications", variant="primary") with gr.Column(): scope_count = gr.Textbox(label="Scopes Indexed", value=str(get_scopes_stats()), interactive=False) out = gr.Textbox(label="Output/Log", lines=13) refresh = gr.Button("🔄 Refresh Stats") btn_docs.click(index_documents, outputs=out) btn_specs.click(index_specifications, outputs=out) refresh.click(refresh_stats, outputs=[doc_count, spec_count, scope_count]) if __name__ == "__main__": demo.launch()