Omar ID EL MOUMEN commited on
Commit
f74f129
·
1 Parent(s): 7a82f6b
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: 3GPPIndexers
3
- emoji: 📉
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.29.0
8
  app_file: app.py
9
- pinned: false
10
  license: gpl-3.0
11
  short_description: A Gradio app for indexing documents and specifications
12
  ---
 
1
  ---
2
  title: 3GPPIndexers
3
+ emoji: ⚙️
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.29.0
8
  app_file: app.py
9
+ pinned: true
10
  license: gpl-3.0
11
  short_description: A Gradio app for indexing documents and specifications
12
  ---
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import threading
3
+ import sys
4
+ import io
5
+ import time
6
+ from dotenv import load_dotenv
7
+ import os
8
+ from contextlib import redirect_stdout
9
+ from git import Repo
10
+ from huggingface_hub import Repository, HfApi
11
+
12
+ load_dotenv()
13
+
14
+ # == PATHS and SETTINGS ==
15
+ DOC_INDEXER = "indexer_multi.py"
16
+ SPEC_INDEXER = "spec_indexer_multi.py"
17
+ DOC_INDEX_FILE = "indexed_docs.json"
18
+ SPEC_INDEX_FILE = "indexed_specifications.json"
19
+ GIT_REPO_PATH = os.path.abspath(".") # or absolute path to repo
20
+ HF_REPO_ID = "OrganizedProgrammers/3GPPDocFinder"
21
+ HF_TOKEN = os.environ.get("HF_TOKEN") # set this as env var
22
+
23
+ # == Helpers ==
24
+ def run_python_module(module_path):
25
+ """
26
+ Dynamically run a python module, capture and yield stdout in real time.
27
+ """
28
+ def runner():
29
+ local_vars = {}
30
+ buffer = io.StringIO()
31
+ try:
32
+ with redirect_stdout(buffer):
33
+ # Import as module, call main()
34
+ import runpy
35
+ runpy.run_path(module_path, run_name="__main__")
36
+ except Exception as e:
37
+ print(f"\n❌ Error: {e}")
38
+ finally:
39
+ yield buffer.getvalue()
40
+ buffer.close()
41
+ yield from runner()
42
+
43
+ def commit_and_push_github(files, message):
44
+ repo = Repo(GIT_REPO_PATH)
45
+ repo.git.add(files)
46
+ repo.index.commit(message)
47
+ try:
48
+ repo.git.push()
49
+ except Exception as e:
50
+ print(f"Git push failed: {e}")
51
+
52
+ def commit_and_push_hf(files, message):
53
+ if not HF_TOKEN:
54
+ return "No HF_TOKEN provided. Skipping HuggingFace push."
55
+ hf_repo_dir = os.path.join(GIT_REPO_PATH, "hf_spaces")
56
+ repo = None
57
+ if not os.path.exists(hf_repo_dir):
58
+ repo = Repository(
59
+ local_dir=hf_repo_dir,
60
+ clone_from=HF_REPO_ID,
61
+ token=HF_TOKEN,
62
+ skip_lfs_files=True
63
+ )
64
+ else:
65
+ repo = Repository(
66
+ local_dir=hf_repo_dir,
67
+ token=HF_TOKEN,
68
+ skip_lfs_files=True
69
+ )
70
+ repo.git_pull()
71
+ # Copy artifact files to huggingface space
72
+ for f in files:
73
+ import shutil
74
+ shutil.copy2(f, os.path.join(hf_repo_dir, f))
75
+ repo.git_add(auto_lfs_track=True)
76
+ repo.git_commit(message)
77
+ repo.git_push()
78
+ return "Pushed to HuggingFace."
79
+
80
+ def get_docs_stats():
81
+ if os.path.exists(DOC_INDEX_FILE):
82
+ import json
83
+ with open(DOC_INDEX_FILE, 'r', encoding='utf-8') as f:
84
+ data = json.load(f)
85
+ return len(data["docs"])
86
+ return 0
87
+
88
+ def get_specs_stats():
89
+ if os.path.exists(SPEC_INDEX_FILE):
90
+ import json
91
+ with open(SPEC_INDEX_FILE, 'r', encoding='utf-8') as f:
92
+ data = json.load(f)
93
+ return len(data["specs"])
94
+ return 0
95
+
96
+ def get_scopes_stats():
97
+ if os.path.exists(SPEC_INDEX_FILE):
98
+ import json
99
+ with open(SPEC_INDEX_FILE, 'r', encoding="utf-8") as f:
100
+ data = json.load(f)
101
+ return len(data['scopes'])
102
+ return 0
103
+
104
+ # == Gradio Functions ==
105
+
106
+ def index_documents(progress=gr.Progress()):
107
+ progress(0, desc="Starting document indexing…")
108
+ log = ""
109
+ for output in run_python_module(DOC_INDEXER):
110
+ log = output
111
+ progress(0.7, desc="Indexing in progress...")
112
+ yield log
113
+ commit_and_push_github([DOC_INDEX_FILE], "Update doc index via Gradio")
114
+ commit_and_push_hf([DOC_INDEX_FILE], "Update doc index via Gradio")
115
+ progress(1, desc="Done!")
116
+ yield log + "\n\n✅ Finished! Committed and pushed."
117
+
118
+ def index_specifications(progress=gr.Progress()):
119
+ progress(0, desc="Starting specifications indexing…")
120
+ log = ""
121
+ for output in run_python_module(SPEC_INDEXER):
122
+ log = output
123
+ progress(0.7, desc="Indexing in progress...")
124
+ yield log
125
+ commit_and_push_github([SPEC_INDEX_FILE], "Update spec index via Gradio")
126
+ commit_and_push_hf([SPEC_INDEX_FILE], "Update spec index via Gradio")
127
+ progress(1, desc="Done!")
128
+ yield log + "\n\n✅ Finished! Committed and pushed."
129
+
130
+ def refresh_stats():
131
+ return str(get_docs_stats()), str(get_specs_stats()), str(get_scopes_stats())
132
+
133
+ # == UI ==
134
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
+ gr.Markdown("## 📄 3GPP Indexers")
136
+ with gr.Row():
137
+ with gr.Column():
138
+ doc_count = gr.Textbox(label="Docs Indexed", value=str(get_docs_stats()), interactive=False)
139
+ btn_docs = gr.Button("Re-index Documents", variant="primary")
140
+ with gr.Column():
141
+ spec_count = gr.Textbox(label="Specs Indexed", value=str(get_specs_stats()), interactive=False)
142
+ btn_specs = gr.Button("Re-index Specifications", variant="primary")
143
+ with gr.Column():
144
+ scope_count = gr.Textbox(label="Scopes Indexed", value=str(get_scopes_stats()), interactive=False)
145
+ out = gr.Textbox(label="Output/Log", lines=13)
146
+ refresh = gr.Button("🔄 Refresh Stats")
147
+ btn_docs.click(index_documents, outputs=out)
148
+ btn_specs.click(index_specifications, outputs=out)
149
+ refresh.click(refresh_stats, outputs=[doc_count, spec_count, scope_count])
150
+
151
+ if __name__ == "__main__":
152
+ demo.launch()
indexed_docs.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac006bf20aea05292515391d649398e0466e64a7a81c1b010a21c40cebb828e
3
+ size 59739552
indexed_scopes.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6035f66c4f02a1961da415490a96a1e52ef137acfad06b6e9d9ba247abed5ef7
3
+ size 2870060
indexed_specifications.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8758708d306204eb8531717e9119a292d3b997d17f469191a573882329f933c
3
+ size 41158533
packages.txt ADDED
File without changes
requirements.txt ADDED
File without changes