Omar ID EL MOUMEN
commited on
Commit
·
b215aa8
1
Parent(s):
380a44c
Add workshop compatibility + updated index
Browse files- app.py +60 -47
- indexed_docs.json +2 -2
app.py
CHANGED
@@ -147,9 +147,10 @@ class TsgDocFinder:
|
|
147 |
|
148 |
def save_indexer(self):
|
149 |
"""Save the updated index"""
|
|
|
150 |
with open(self.indexer_file, "w", encoding="utf-8") as f:
|
151 |
today = datetime.today()
|
152 |
-
output = {"docs": self.indexer, "last_indexed_date":
|
153 |
json.dump(output, f, indent=4, ensure_ascii=False)
|
154 |
|
155 |
def get_workgroup(self, doc):
|
@@ -181,63 +182,75 @@ class TsgDocFinder:
|
|
181 |
print(f"Error accessing {url}: {e}")
|
182 |
return []
|
183 |
|
184 |
-
def search_document(self, doc_id: str, release
|
185 |
-
"""Search for a specific document by its ID"""
|
186 |
original_id = doc_id
|
187 |
-
|
188 |
-
#
|
189 |
if original_id in self.indexer:
|
190 |
return self.indexer[original_id]
|
191 |
-
|
192 |
for doc in self.indexer:
|
193 |
if doc.startswith(original_id):
|
194 |
return self.indexer[doc]
|
195 |
-
|
196 |
-
#
|
197 |
main_tsg, workgroup, doc = self.get_workgroup(doc_id)
|
198 |
-
if
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
236 |
self.indexer[original_id] = doc_url
|
237 |
self.save_indexer()
|
238 |
return doc_url
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
return f"Document {doc_id} not found"
|
|
|
241 |
|
242 |
|
243 |
class SpecDocFinder:
|
|
|
147 |
|
148 |
def save_indexer(self):
|
149 |
"""Save the updated index"""
|
150 |
+
self.last_indexer_date = today.strftime("%d/%m/%Y-%H:%M:%S")
|
151 |
with open(self.indexer_file, "w", encoding="utf-8") as f:
|
152 |
today = datetime.today()
|
153 |
+
output = {"docs": self.indexer, "last_indexed_date": self.last_indexer_date}
|
154 |
json.dump(output, f, indent=4, ensure_ascii=False)
|
155 |
|
156 |
def get_workgroup(self, doc):
|
|
|
182 |
print(f"Error accessing {url}: {e}")
|
183 |
return []
|
184 |
|
185 |
+
def search_document(self, doc_id: str, release=None):
|
|
|
186 |
original_id = doc_id
|
187 |
+
|
188 |
+
# 1. Chercher dans l'index (inclut workshops si tu as bien indexé)
|
189 |
if original_id in self.indexer:
|
190 |
return self.indexer[original_id]
|
|
|
191 |
for doc in self.indexer:
|
192 |
if doc.startswith(original_id):
|
193 |
return self.indexer[doc]
|
194 |
+
|
195 |
+
# 2. Recherche live "classique" (TSG/CT)
|
196 |
main_tsg, workgroup, doc = self.get_workgroup(doc_id)
|
197 |
+
if main_tsg:
|
198 |
+
wg_url = self.find_workgroup_url(main_tsg, workgroup)
|
199 |
+
if wg_url:
|
200 |
+
meeting_folders = self.get_docs_from_url(wg_url)
|
201 |
+
for folder in meeting_folders:
|
202 |
+
meeting_url = f"{wg_url}/{folder}"
|
203 |
+
meeting_contents = self.get_docs_from_url(meeting_url)
|
204 |
+
key = "docs" if "docs" in [x.lower() for x in meeting_contents] else "tdocs" if "tdocs" in [x.lower() for x in meeting_contents] else None
|
205 |
+
if key is not None:
|
206 |
+
docs_url = f"{meeting_url}/{key}"
|
207 |
+
files = self.get_docs_from_url(docs_url)
|
208 |
+
for file in files:
|
209 |
+
if doc in file.lower() or original_id in file:
|
210 |
+
doc_url = f"{docs_url}/{file}"
|
211 |
+
self.indexer[original_id] = doc_url
|
212 |
+
return doc_url
|
213 |
+
# ZIP subfolder
|
214 |
+
if "zip" in [x for x in files]:
|
215 |
+
zip_url = f"{docs_url}/zip"
|
216 |
+
zip_files = self.get_docs_from_url(zip_url)
|
217 |
+
for file in zip_files:
|
218 |
+
if doc in file.lower() or original_id in file:
|
219 |
+
doc_url = f"{zip_url}/{file}"
|
220 |
+
self.indexer[original_id] = doc_url
|
221 |
+
self.save_indexer()
|
222 |
+
return doc_url
|
223 |
+
|
224 |
+
# 3. Dernier recours : tenter dans /ftp/workshop (recherche live)
|
225 |
+
workshop_url = f"{self.main_ftp_url}/workshop"
|
226 |
+
meetings = self.get_docs_from_url(workshop_url)
|
227 |
+
for meeting in meetings:
|
228 |
+
if meeting in ['./', '../']:
|
229 |
+
continue
|
230 |
+
meeting_url = f"{workshop_url}/{meeting}"
|
231 |
+
contents = self.get_docs_from_url(meeting_url)
|
232 |
+
for sub in contents:
|
233 |
+
if sub.lower() in ['docs', 'tdocs']:
|
234 |
+
docs_url = f"{meeting_url}/{sub}"
|
235 |
+
files = self.get_docs_from_url(docs_url)
|
236 |
+
for file in files:
|
237 |
+
if doc_id.lower() in file.lower() or original_id in file:
|
238 |
+
doc_url = f"{docs_url}/{file}"
|
239 |
self.indexer[original_id] = doc_url
|
240 |
self.save_indexer()
|
241 |
return doc_url
|
242 |
+
if "zip" in [x.lower() for x in files]:
|
243 |
+
zip_url = f"{docs_url}/zip"
|
244 |
+
zip_files = self.get_docs_from_url(zip_url)
|
245 |
+
for file in zip_files:
|
246 |
+
if doc_id.lower() in file.lower() or original_id in file:
|
247 |
+
doc_url = f"{zip_url}/{file}"
|
248 |
+
self.indexer[original_id] = doc_url
|
249 |
+
self.save_indexer()
|
250 |
+
return doc_url
|
251 |
+
|
252 |
return f"Document {doc_id} not found"
|
253 |
+
|
254 |
|
255 |
|
256 |
class SpecDocFinder:
|
indexed_docs.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dac006bf20aea05292515391d649398e0466e64a7a81c1b010a21c40cebb828e
|
3 |
+
size 59739552
|