Spaces:

OrganizedProgrammers
/

3GPPDocFinder

Running

App Files Files Community

Omar ID EL MOUMEN commited on 8 days ago

Commit

b215aa8

1 Parent(s): 380a44c

Add workshop compatibility + updated index

Browse files

Files changed (2) hide show

app.py +60 -47
indexed_docs.json +2 -2

app.py CHANGED Viewed

@@ -147,9 +147,10 @@ class TsgDocFinder:
     def save_indexer(self):
         """Save the updated index"""
         with open(self.indexer_file, "w", encoding="utf-8") as f:
             today = datetime.today()
-            output = {"docs": self.indexer, "last_indexed_date": today.strftime("%d/%m/%Y-%H:%M:%S")}
             json.dump(output, f, indent=4, ensure_ascii=False)
     def get_workgroup(self, doc):
@@ -181,63 +182,75 @@ class TsgDocFinder:
             print(f"Error accessing {url}: {e}")
             return []
-    def search_document(self, doc_id: str, release = None):
-        """Search for a specific document by its ID"""
         original_id = doc_id
-        # Check if already indexed
         if original_id in self.indexer:
             return self.indexer[original_id]
         for doc in self.indexer:
             if doc.startswith(original_id):
                 return self.indexer[doc]
-        # Parse the document ID
         main_tsg, workgroup, doc = self.get_workgroup(doc_id)
-        if not main_tsg:
-            return f"Could not parse document ID: {doc_id}"
-        print(f"Searching for {original_id} (parsed as {doc}) in {main_tsg}/{workgroup}...")
-        # Find the workgroup URL
-        wg_url = self.find_workgroup_url(main_tsg, workgroup)
-        if not wg_url:
-            return f"Could not find workgroup for {doc_id}"
-        # Search in the workgroup directories
-        meeting_folders = self.get_docs_from_url(wg_url)
-        for folder in meeting_folders:
-            meeting_url = f"{wg_url}/{folder}"
-            meeting_contents = self.get_docs_from_url(meeting_url)
-            key = "docs" if "docs" in [x.lower() for x in meeting_contents] else "tdocs" if "tdocs" in [x.lower() for x in meeting_contents] else None
-            if key is not None:
-                docs_url = f"{meeting_url}/{key}"
-                print(f"Checking {docs_url}...")
-                files = self.get_docs_from_url(docs_url)
-                # Check for the document in the main Docs folder
-                for file in files:
-                    if doc in file.lower() or original_id in file:
-                        doc_url = f"{docs_url}/{file}"
-                        self.indexer[original_id] = doc_url
-                        return doc_url
-                # Check in ZIP subfolder if it exists
-                if "zip" in [x for x in files]:
-                    zip_url = f"{docs_url}/zip"
-                    print(f"Checking {zip_url}...")
-                    zip_files = self.get_docs_from_url(zip_url)
-                    for file in zip_files:
-                        if doc in file.lower() or original_id in file:
-                            doc_url = f"{zip_url}/{file}"
                             self.indexer[original_id] = doc_url
                             self.save_indexer()
                             return doc_url
         return f"Document {doc_id} not found"
 class SpecDocFinder:

     def save_indexer(self):
         """Save the updated index"""
+        self.last_indexer_date = today.strftime("%d/%m/%Y-%H:%M:%S")
         with open(self.indexer_file, "w", encoding="utf-8") as f:
             today = datetime.today()
+            output = {"docs": self.indexer, "last_indexed_date": self.last_indexer_date}
             json.dump(output, f, indent=4, ensure_ascii=False)
     def get_workgroup(self, doc):
             print(f"Error accessing {url}: {e}")
             return []
+    def search_document(self, doc_id: str, release=None):
         original_id = doc_id
+        # 1. Chercher dans l'index (inclut workshops si tu as bien indexé)
         if original_id in self.indexer:
             return self.indexer[original_id]
         for doc in self.indexer:
             if doc.startswith(original_id):
                 return self.indexer[doc]
+        # 2. Recherche live "classique" (TSG/CT)
         main_tsg, workgroup, doc = self.get_workgroup(doc_id)
+        if main_tsg:
+            wg_url = self.find_workgroup_url(main_tsg, workgroup)
+            if wg_url:
+                meeting_folders = self.get_docs_from_url(wg_url)
+                for folder in meeting_folders:
+                    meeting_url = f"{wg_url}/{folder}"
+                    meeting_contents = self.get_docs_from_url(meeting_url)
+                    key = "docs" if "docs" in [x.lower() for x in meeting_contents] else "tdocs" if "tdocs" in [x.lower() for x in meeting_contents] else None
+                    if key is not None:
+                        docs_url = f"{meeting_url}/{key}"
+                        files = self.get_docs_from_url(docs_url)
+                        for file in files:
+                            if doc in file.lower() or original_id in file:
+                                doc_url = f"{docs_url}/{file}"
+                                self.indexer[original_id] = doc_url
+                                return doc_url
+                        # ZIP subfolder
+                        if "zip" in [x for x in files]:
+                            zip_url = f"{docs_url}/zip"
+                            zip_files = self.get_docs_from_url(zip_url)
+                            for file in zip_files:
+                                if doc in file.lower() or original_id in file:
+                                    doc_url = f"{zip_url}/{file}"
+                                    self.indexer[original_id] = doc_url
+                                    self.save_indexer()
+                                    return doc_url
+        # 3. Dernier recours : tenter dans /ftp/workshop (recherche live)
+        workshop_url = f"{self.main_ftp_url}/workshop"
+        meetings = self.get_docs_from_url(workshop_url)
+        for meeting in meetings:
+            if meeting in ['./', '../']:
+                continue
+            meeting_url = f"{workshop_url}/{meeting}"
+            contents = self.get_docs_from_url(meeting_url)
+            for sub in contents:
+                if sub.lower() in ['docs', 'tdocs']:
+                    docs_url = f"{meeting_url}/{sub}"
+                    files = self.get_docs_from_url(docs_url)
+                    for file in files:
+                        if doc_id.lower() in file.lower() or original_id in file:
+                            doc_url = f"{docs_url}/{file}"
                             self.indexer[original_id] = doc_url
                             self.save_indexer()
                             return doc_url
+                    if "zip" in [x.lower() for x in files]:
+                        zip_url = f"{docs_url}/zip"
+                        zip_files = self.get_docs_from_url(zip_url)
+                        for file in zip_files:
+                            if doc_id.lower() in file.lower() or original_id in file:
+                                doc_url = f"{zip_url}/{file}"
+                                self.indexer[original_id] = doc_url
+                                self.save_indexer()
+                                return doc_url
         return f"Document {doc_id} not found"
 class SpecDocFinder:

indexed_docs.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:880897e372abb4fbb1d9005d2c0f96a773eab3d69d09318be136b2527215a24a
-size 58487700

 version https://git-lfs.github.com/spec/v1
+oid sha256:dac006bf20aea05292515391d649398e0466e64a7a81c1b010a21c40cebb828e
+size 59739552