Spaces:

technicolor
/

InteractiveSurvey

Sleeping

technicolor commited on 5 days ago

Commit

0a8d09f

1 Parent(s): 806e2f5

update

Files changed (3) hide show

src/demo/asg_retriever.py CHANGED Viewed

@@ -164,6 +164,8 @@ def process_pdf(file_path: str, survey_id: str, embedder: HuggingFaceEmbeddings,
     # Load and split the PDF
     split_start_time = time.time()
     splitters = TextSplitting().mineru_recursive_splitter(file_path, survey_id, mode)
     documents_list = [document.page_content for document in splitters]
     for i in range(len(documents_list)):

     # Load and split the PDF
     split_start_time = time.time()
     splitters = TextSplitting().mineru_recursive_splitter(file_path, survey_id, mode)
+    if not splitters:
+        raise ValueError(f"Failed to load or split PDF: {file_path}")
     documents_list = [document.page_content for document in splitters]
     for i in range(len(documents_list)):

src/demo/asg_splitter.py CHANGED Viewed

@@ -4,6 +4,9 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 class TextSplitting:
     def mineru_recursive_splitter(self, file_path, survey_id, mode):
         docs = DocumentLoading().load_pdf(file_path, survey_id, mode)
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=400,
             chunk_overlap=30,

 class TextSplitting:
     def mineru_recursive_splitter(self, file_path, survey_id, mode):
         docs = DocumentLoading().load_pdf(file_path, survey_id, mode)
+        if docs is None:
+            # 若加载失败，则返回空列表，调用方需处理
+            return []
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=400,
             chunk_overlap=30,

src/demo/views.py CHANGED Viewed

@@ -313,7 +313,9 @@ def PosRank_get_top5_ngrams(input_pd):
 def process_file(file_name, survey_id, mode):
     global embedder
-    result = process_pdf(file_name, survey_id, embedder, mode)
     collection_name = result[0]
     name = result[-1]
     return collection_name, name

 def process_file(file_name, survey_id, mode):
     global embedder
+    # 如果 file_name 不是绝对路径，则拼接 MEDIA_ROOT
+    abs_file_path = file_name if os.path.isabs(file_name) else os.path.join(settings.MEDIA_ROOT, file_name)
+    result = process_pdf(abs_file_path, survey_id, embedder, mode)
     collection_name = result[0]
     name = result[-1]
     return collection_name, name