Omar ID EL MOUMEN commited on
Commit
b215aa8
·
1 Parent(s): 380a44c

Add workshop compatibility + updated index

Browse files
Files changed (2) hide show
  1. app.py +60 -47
  2. indexed_docs.json +2 -2
app.py CHANGED
@@ -147,9 +147,10 @@ class TsgDocFinder:
147
 
148
  def save_indexer(self):
149
  """Save the updated index"""
 
150
  with open(self.indexer_file, "w", encoding="utf-8") as f:
151
  today = datetime.today()
152
- output = {"docs": self.indexer, "last_indexed_date": today.strftime("%d/%m/%Y-%H:%M:%S")}
153
  json.dump(output, f, indent=4, ensure_ascii=False)
154
 
155
  def get_workgroup(self, doc):
@@ -181,63 +182,75 @@ class TsgDocFinder:
181
  print(f"Error accessing {url}: {e}")
182
  return []
183
 
184
- def search_document(self, doc_id: str, release = None):
185
- """Search for a specific document by its ID"""
186
  original_id = doc_id
187
-
188
- # Check if already indexed
189
  if original_id in self.indexer:
190
  return self.indexer[original_id]
191
-
192
  for doc in self.indexer:
193
  if doc.startswith(original_id):
194
  return self.indexer[doc]
195
-
196
- # Parse the document ID
197
  main_tsg, workgroup, doc = self.get_workgroup(doc_id)
198
- if not main_tsg:
199
- return f"Could not parse document ID: {doc_id}"
200
-
201
- print(f"Searching for {original_id} (parsed as {doc}) in {main_tsg}/{workgroup}...")
202
-
203
- # Find the workgroup URL
204
- wg_url = self.find_workgroup_url(main_tsg, workgroup)
205
- if not wg_url:
206
- return f"Could not find workgroup for {doc_id}"
207
-
208
- # Search in the workgroup directories
209
- meeting_folders = self.get_docs_from_url(wg_url)
210
-
211
- for folder in meeting_folders:
212
- meeting_url = f"{wg_url}/{folder}"
213
- meeting_contents = self.get_docs_from_url(meeting_url)
214
- key = "docs" if "docs" in [x.lower() for x in meeting_contents] else "tdocs" if "tdocs" in [x.lower() for x in meeting_contents] else None
215
- if key is not None:
216
- docs_url = f"{meeting_url}/{key}"
217
- print(f"Checking {docs_url}...")
218
- files = self.get_docs_from_url(docs_url)
219
-
220
- # Check for the document in the main Docs folder
221
- for file in files:
222
- if doc in file.lower() or original_id in file:
223
- doc_url = f"{docs_url}/{file}"
224
- self.indexer[original_id] = doc_url
225
- return doc_url
226
-
227
- # Check in ZIP subfolder if it exists
228
- if "zip" in [x for x in files]:
229
- zip_url = f"{docs_url}/zip"
230
- print(f"Checking {zip_url}...")
231
- zip_files = self.get_docs_from_url(zip_url)
232
-
233
- for file in zip_files:
234
- if doc in file.lower() or original_id in file:
235
- doc_url = f"{zip_url}/{file}"
 
 
 
 
236
  self.indexer[original_id] = doc_url
237
  self.save_indexer()
238
  return doc_url
239
-
 
 
 
 
 
 
 
 
 
240
  return f"Document {doc_id} not found"
 
241
 
242
 
243
  class SpecDocFinder:
 
147
 
148
  def save_indexer(self):
149
  """Save the updated index"""
150
+ self.last_indexer_date = today.strftime("%d/%m/%Y-%H:%M:%S")
151
  with open(self.indexer_file, "w", encoding="utf-8") as f:
152
  today = datetime.today()
153
+ output = {"docs": self.indexer, "last_indexed_date": self.last_indexer_date}
154
  json.dump(output, f, indent=4, ensure_ascii=False)
155
 
156
  def get_workgroup(self, doc):
 
182
  print(f"Error accessing {url}: {e}")
183
  return []
184
 
185
+ def search_document(self, doc_id: str, release=None):
 
186
  original_id = doc_id
187
+
188
+ # 1. Chercher dans l'index (inclut workshops si tu as bien indexé)
189
  if original_id in self.indexer:
190
  return self.indexer[original_id]
 
191
  for doc in self.indexer:
192
  if doc.startswith(original_id):
193
  return self.indexer[doc]
194
+
195
+ # 2. Recherche live "classique" (TSG/CT)
196
  main_tsg, workgroup, doc = self.get_workgroup(doc_id)
197
+ if main_tsg:
198
+ wg_url = self.find_workgroup_url(main_tsg, workgroup)
199
+ if wg_url:
200
+ meeting_folders = self.get_docs_from_url(wg_url)
201
+ for folder in meeting_folders:
202
+ meeting_url = f"{wg_url}/{folder}"
203
+ meeting_contents = self.get_docs_from_url(meeting_url)
204
+ key = "docs" if "docs" in [x.lower() for x in meeting_contents] else "tdocs" if "tdocs" in [x.lower() for x in meeting_contents] else None
205
+ if key is not None:
206
+ docs_url = f"{meeting_url}/{key}"
207
+ files = self.get_docs_from_url(docs_url)
208
+ for file in files:
209
+ if doc in file.lower() or original_id in file:
210
+ doc_url = f"{docs_url}/{file}"
211
+ self.indexer[original_id] = doc_url
212
+ return doc_url
213
+ # ZIP subfolder
214
+ if "zip" in [x for x in files]:
215
+ zip_url = f"{docs_url}/zip"
216
+ zip_files = self.get_docs_from_url(zip_url)
217
+ for file in zip_files:
218
+ if doc in file.lower() or original_id in file:
219
+ doc_url = f"{zip_url}/{file}"
220
+ self.indexer[original_id] = doc_url
221
+ self.save_indexer()
222
+ return doc_url
223
+
224
+ # 3. Dernier recours : tenter dans /ftp/workshop (recherche live)
225
+ workshop_url = f"{self.main_ftp_url}/workshop"
226
+ meetings = self.get_docs_from_url(workshop_url)
227
+ for meeting in meetings:
228
+ if meeting in ['./', '../']:
229
+ continue
230
+ meeting_url = f"{workshop_url}/{meeting}"
231
+ contents = self.get_docs_from_url(meeting_url)
232
+ for sub in contents:
233
+ if sub.lower() in ['docs', 'tdocs']:
234
+ docs_url = f"{meeting_url}/{sub}"
235
+ files = self.get_docs_from_url(docs_url)
236
+ for file in files:
237
+ if doc_id.lower() in file.lower() or original_id in file:
238
+ doc_url = f"{docs_url}/{file}"
239
  self.indexer[original_id] = doc_url
240
  self.save_indexer()
241
  return doc_url
242
+ if "zip" in [x.lower() for x in files]:
243
+ zip_url = f"{docs_url}/zip"
244
+ zip_files = self.get_docs_from_url(zip_url)
245
+ for file in zip_files:
246
+ if doc_id.lower() in file.lower() or original_id in file:
247
+ doc_url = f"{zip_url}/{file}"
248
+ self.indexer[original_id] = doc_url
249
+ self.save_indexer()
250
+ return doc_url
251
+
252
  return f"Document {doc_id} not found"
253
+
254
 
255
 
256
  class SpecDocFinder:
indexed_docs.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:880897e372abb4fbb1d9005d2c0f96a773eab3d69d09318be136b2527215a24a
3
- size 58487700
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac006bf20aea05292515391d649398e0466e64a7a81c1b010a21c40cebb828e
3
+ size 59739552