Spaces:

Marathon23
/

MBTIpersonality_cocktail_recommandation

Sleeping

App Files Files Community

Marathon23 commited on Nov 23, 2024

Commit

e910cb2

verified ·

1 Parent(s): 28b6c57

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -30

app.py CHANGED Viewed

@@ -10,49 +10,54 @@ from langchain.vectorstores import Chroma
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.chat_models import ChatOpenAI
 import shutil  # 用於文件複製
 # 獲取 OpenAI API 密鑰
 api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
     raise ValueError("未能獲取 OPENAI_API_KEY。請在 Hugging Face Spaces 的 Secrets 中設置它。")
 openai.api_key = api_key
-print("OpenAI API 密鑰已設置。")
 # 確保向量資料庫目錄存在且有寫入權限
 VECTORDB_DIR = os.path.abspath("./data")
 os.makedirs(VECTORDB_DIR, exist_ok=True)
 os.chmod(VECTORDB_DIR, 0o755)  # 設置適當的權限
-print(f"VECTORDB_DIR set to: {VECTORDB_DIR}")
 # 定義測試 PDF 加載器的函數
 def test_pdf_loader(file_path, loader_type='PyMuPDFLoader'):
-    print(f"Testing PDF loader ({loader_type}) with file: {file_path}")
     try:
         if loader_type == 'PyMuPDFLoader':
             loader = PyMuPDFLoader(file_path)
         elif loader_type == 'PyPDFLoader':
             loader = PyPDFLoader(file_path)
         else:
-            print(f"Unknown loader type: {loader_type}")
             return
         loaded_docs = loader.load()
         if loaded_docs:
-            print(f"Successfully loaded {file_path} with {len(loaded_docs)} documents.")
-            print(f"Document content (first 500 chars): {loaded_docs[0].page_content[:500]}")
         else:
-            print(f"No documents loaded from {file_path}.")
     except Exception as e:
-        print(f"Error loading {file_path} with {loader_type}: {e}")
 # 定義載入和處理 PDF 文件的函數
 def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
     documents = []
-    print("開始載入上傳的 PDF 文件。")
     for file_path in file_paths:
-        print(f"載入 PDF 文件: {file_path}")
         if not os.path.exists(file_path):
-            print(f"文件不存在: {file_path}")
             continue
         try:
             if loader_type == 'PyMuPDFLoader':
@@ -60,28 +65,28 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
             elif loader_type == 'PyPDFLoader':
                 loader = PyPDFLoader(file_path)
             else:
-                print(f"Unknown loader type: {loader_type}")
                 continue
             loaded_docs = loader.load()
             if loaded_docs:
-                print(f"載入 {file_path} 成功，包含 {len(loaded_docs)} 個文檔。")
                 # 打印第一個文檔的部分內容以確認
-                print(f"第一個文檔內容: {loaded_docs[0].page_content[:500]}")
                 documents.extend(loaded_docs)
             else:
-                print(f"載入 {file_path} 但未找到任何文檔。")
         except Exception as e:
-            print(f"載入 {file_path} 時出現錯誤: {e}")
     if not documents:
         raise ValueError("沒有找到任何 PDF 文件或 PDF 文件無法載入。")
     else:
-        print(f"總共載入了 {len(documents)} 個文檔。")
     # 分割長文本
     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
     documents = text_splitter.split_documents(documents)
-    print(f"分割後的文檔數量: {len(documents)}")
     if not documents:
         raise ValueError("分割後的文檔列表為空。請檢查 PDF 文件內容。")
@@ -89,7 +94,7 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
     # 初始化向量資料庫
     try:
         embeddings = OpenAIEmbeddings(openai_api_key=api_key)  # 直接傳遞 API 密鑰
-        print("初始化 OpenAIEmbeddings 成功。")
     except Exception as e:
         raise ValueError(f"初始化 OpenAIEmbeddings 時出現錯誤: {e}")
@@ -99,7 +104,7 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
             embedding=embeddings,
             persist_directory=VECTORDB_DIR
         )
-        print("初始化 Chroma 向量資料庫成功。")
     except Exception as e:
         raise ValueError(f"初始化 Chroma 向量資料庫時出現錯誤: {e}")
@@ -136,36 +141,48 @@ def handle_query(user_message, chat_history, vectordb):
         return chat_history
     except Exception as e:
         return chat_history + [("系統", f"出現錯誤: {str(e)}")]
 # 定義 Gradio 的處理函數
 def process_files(files, state):
-    print("process_files called")
     if files:
         try:
-            print(f"Received {len(files)} files")
             saved_file_paths = []
             for file in files:
-                print(f"Processing file: {file.name}")
                 save_path = os.path.join(VECTORDB_DIR, file.name)
                 with open(save_path, "wb") as f:
                     f.write(file.read())
-                # 確認文件是否存在
                 if os.path.exists(save_path):
-                    print(f"File successfully saved to: {save_path}")
                 else:
-                    print(f"Failed to save file to: {save_path}")
                 saved_file_paths.append(save_path)
-                # 測試 PDF 加載器
-                test_pdf_loader(save_path, loader_type='PyMuPDFLoader')
             # 列出 VECTORDB_DIR 中的所有文件
             saved_files = os.listdir(VECTORDB_DIR)
-            print(f"Files in VECTORDB_DIR ({VECTORDB_DIR}): {saved_files}")
             vectordb = load_and_process_documents(saved_file_paths, loader_type='PyMuPDFLoader')
             state['vectordb'] = vectordb
             return "PDF 文件已成功上傳並處理。您現在可以開始提問。", state
         except Exception as e:
-            print(f"Error in process_files: {e}")
             return f"處理文件時出現錯誤: {e}", state
     else:
         return "請上傳至少一個 PDF 文件。", state

 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.chat_models import ChatOpenAI
 import shutil  # 用於文件複製
+import logging
+# 設置日誌配置
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # 獲取 OpenAI API 密鑰
 api_key = os.getenv("OPENAI_API_KEY")
 if not api_key:
     raise ValueError("未能獲取 OPENAI_API_KEY。請在 Hugging Face Spaces 的 Secrets 中設置它。")
 openai.api_key = api_key
+logger.info("OpenAI API 密鑰已設置。")
 # 確保向量資料庫目錄存在且有寫入權限
 VECTORDB_DIR = os.path.abspath("./data")
 os.makedirs(VECTORDB_DIR, exist_ok=True)
 os.chmod(VECTORDB_DIR, 0o755)  # 設置適當的權限
+logger.info(f"VECTORDB_DIR set to: {VECTORDB_DIR}")
 # 定義測試 PDF 加載器的函數
 def test_pdf_loader(file_path, loader_type='PyMuPDFLoader'):
+    logger.info(f"Testing PDF loader ({loader_type}) with file: {file_path}")
     try:
         if loader_type == 'PyMuPDFLoader':
             loader = PyMuPDFLoader(file_path)
         elif loader_type == 'PyPDFLoader':
             loader = PyPDFLoader(file_path)
         else:
+            logger.error(f"Unknown loader type: {loader_type}")
             return
         loaded_docs = loader.load()
         if loaded_docs:
+            logger.info(f"Successfully loaded {file_path} with {len(loaded_docs)} documents.")
+            logger.info(f"Document content (first 500 chars): {loaded_docs[0].page_content[:500]}")
         else:
+            logger.error(f"No documents loaded from {file_path}.")
     except Exception as e:
+        logger.error(f"Error loading {file_path} with {loader_type}: {e}")
 # 定義載入和處理 PDF 文件的函數
 def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
     documents = []
+    logger.info("開始載入上傳的 PDF 文件。")
     for file_path in file_paths:
+        logger.info(f"載入 PDF 文件: {file_path}")
         if not os.path.exists(file_path):
+            logger.error(f"文件不存在: {file_path}")
             continue
         try:
             if loader_type == 'PyMuPDFLoader':
             elif loader_type == 'PyPDFLoader':
                 loader = PyPDFLoader(file_path)
             else:
+                logger.error(f"Unknown loader type: {loader_type}")
                 continue
             loaded_docs = loader.load()
             if loaded_docs:
+                logger.info(f"載入 {file_path} 成功，包含 {len(loaded_docs)} 個文檔。")
                 # 打印第一個文檔的部分內容以確認
+                logger.info(f"第一個文檔內容: {loaded_docs[0].page_content[:500]}")
                 documents.extend(loaded_docs)
             else:
+                logger.error(f"載入 {file_path} 但未找到任何文檔。")
         except Exception as e:
+            logger.error(f"載入 {file_path} 時出現錯誤: {e}")
     if not documents:
         raise ValueError("沒有找到任何 PDF 文件或 PDF 文件無法載入。")
     else:
+        logger.info(f"總共載入了 {len(documents)} 個文檔。")
     # 分割長文本
     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
     documents = text_splitter.split_documents(documents)
+    logger.info(f"分割後的文檔數量: {len(documents)}")
     if not documents:
         raise ValueError("分割後的文檔列表為空。請檢查 PDF 文件內容。")
     # 初始化向量資料庫
     try:
         embeddings = OpenAIEmbeddings(openai_api_key=api_key)  # 直接傳遞 API 密鑰
+        logger.info("初始化 OpenAIEmbeddings 成功。")
     except Exception as e:
         raise ValueError(f"初始化 OpenAIEmbeddings 時出現錯誤: {e}")
             embedding=embeddings,
             persist_directory=VECTORDB_DIR
         )
+        logger.info("初始化 Chroma 向量資料庫成功。")
     except Exception as e:
         raise ValueError(f"初始化 Chroma 向量資料庫時出現錯誤: {e}")
         return chat_history
     except Exception as e:
+        logger.error(f"Error in handle_query: {e}")
         return chat_history + [("系統", f"出現錯誤: {str(e)}")]
 # 定義 Gradio 的處理函數
 def process_files(files, state):
+    logger.info("process_files called")
     if files:
         try:
+            logger.info(f"Received {len(files)} files")
             saved_file_paths = []
             for file in files:
+                logger.info(f"Processing file: {file.name}")
                 save_path = os.path.join(VECTORDB_DIR, file.name)
                 with open(save_path, "wb") as f:
                     f.write(file.read())
+                # 確認文件是否存在並檢查文件大小
                 if os.path.exists(save_path):
+                    file_size = os.path.getsize(save_path)
+                    if file_size > 0:
+                        logger.info(f"File successfully saved to: {save_path} (Size: {file_size} bytes)")
+                    else:
+                        logger.error(f"File saved to {save_path} is empty.")
+                        raise ValueError(f"上傳的文件 {file.name} 為空。")
                 else:
+                    logger.error(f"Failed to save file to: {save_path}")
+                    raise FileNotFoundError(f"無法保存文件到 {save_path}")
                 saved_file_paths.append(save_path)
+                # 測試 PDF 加載器，先用 PyMuPDFLoader，再用 PyPDFLoader
+                try:
+                    test_pdf_loader(save_path, loader_type='PyMuPDFLoader')
+                except Exception as e:
+                    logger.error(f"PyMuPDFLoader failed: {e}")
+                    logger.info("Attempting to load with PyPDFLoader...")
+                    test_pdf_loader(save_path, loader_type='PyPDFLoader')
             # 列出 VECTORDB_DIR 中的所有文件
             saved_files = os.listdir(VECTORDB_DIR)
+            logger.info(f"Files in VECTORDB_DIR ({VECTORDB_DIR}): {saved_files}")
             vectordb = load_and_process_documents(saved_file_paths, loader_type='PyMuPDFLoader')
             state['vectordb'] = vectordb
             return "PDF 文件已成功上傳並處理。您現在可以開始提問。", state
         except Exception as e:
+            logger.error(f"Error in process_files: {e}")
             return f"處理文件時出現錯誤: {e}", state
     else:
         return "請上傳至少一個 PDF 文件。", state