Spaces:

Roberta2024
/

Pdf_geminiAI

Sleeping

App Files Files Community

Roberta2024 commited on May 21

Commit

087ebab

verified ·

1 Parent(s): e41cbe6

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +59 -43

src/streamlit_app.py CHANGED Viewed

@@ -3,26 +3,28 @@ import PyPDF2
 import pandas as pd
 import os
 import google.generativeai as genai
-import csv
 from datetime import datetime
 import logging
 import sys
-import io
-import tempfile
 import base64
-# 設定日誌
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
-        logging.FileHandler("pdf_processing.log"),
         logging.StreamHandler(sys.stdout)
     ]
 )
 logger = logging.getLogger(__name__)
-# 設定頁面配置
 st.set_page_config(
     page_title="PDF處理與Gemini翻譯工具",
     page_icon="📄",
@@ -30,46 +32,54 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# 應用程式標題和介紹
 st.title("📄 PDF處理與Gemini翻譯工具")
 st.markdown("上傳PDF檔案，選擇要處理的頁面，讓Gemini解釋內容並翻譯成繁體中文。")
-# 側邊欄 - 設定區域
 with st.sidebar:
     st.header("設定")
-    # API金鑰輸入
     api_key = st.text_input(
         "Gemini API金鑰",
-        value="AIzaSyC3fbJVYQbINa8ztuOS5dFE4ud0I9jhy3o",  # 預設值，實際使用應從設定檔讀取
         type="password"
     )
-    # 上傳PDF檔案
     uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
-    # 處理選項區塊
     with st.expander("處理選項", expanded=True):
-        # 初始化session_state
         if 'total_pages' not in st.session_state:
             st.session_state.total_pages = 0
         if 'page_content' not in st.session_state:
             st.session_state.page_content = {}
-        # 頁面選擇（只在上傳檔案後顯示）
         if uploaded_file is not None:
-            # 讀取PDF並獲取頁數
             try:
-                pdf_reader = PyPDF2.PdfReader(uploaded_file)
                 st.session_state.total_pages = len(pdf_reader.pages)
-                # 加載PDF內容到session_state（如果尚未加載）
                 if len(st.session_state.page_content) == 0:
                     with st.spinner("正在加載PDF..."):
                         for i in range(st.session_state.total_pages):
                             st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
-                # 頁面選擇滑桿
                 page_to_process = st.slider(
                     "選擇要處理的頁面",
                     min_value=1,
@@ -79,31 +89,31 @@ with st.sidebar:
                 st.info(f"PDF共有 {st.session_state.total_pages} 頁")
             except Exception as e:
                 st.error(f"無法讀取PDF: {str(e)}")
         else:
             st.info("請先上傳PDF檔案")
             page_to_process = 1
-    # 進階選項
     with st.expander("進階選項"):
-        # 給Gemini的指示詞
         instruction = st.text_area(
             "給Gemini的指示詞",
             value="請詳細解釋以下內容的主要要點和重要信息",
             height=100
         )
-        # 輸出檔名
         output_filename = st.text_input(
             "輸出CSV檔名",
             value="gemini_translated_results.csv"
         )
-# 主要功能函數
 def setup_gemini_api(api_key):
     """設置Gemini API"""
     try:
-        os.environ["GOOGLE_API_KEY"] = api_key
         genai.configure(api_key=api_key)
         return genai.GenerativeModel("gemini-1.5-flash")
     except Exception as e:
@@ -145,12 +155,12 @@ def get_csv_download_link(df, filename="data.csv"):
     href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
     return href
-# 主要內容區域
 if uploaded_file is not None:
-    # 顯示頁面內容預覽
     st.header("頁面內容預覽")
-    # 從session_state獲取選定頁面的內容
     if page_to_process in st.session_state.page_content:
         page_text = st.session_state.page_content[page_to_process]
         st.text_area(
@@ -162,40 +172,40 @@ if uploaded_file is not None:
     else:
         st.warning("無法獲取選定頁面的內容")
-    # 處理按鈕
     process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
-    # 當按下處理按鈕
     if process_button:
         if not api_key:
             st.error("請輸入Gemini API金鑰!")
         else:
-            # 設置進度顯示
             progress_placeholder = st.empty()
             results_placeholder = st.empty()
             with st.spinner("正在處理中..."):
                 progress_bar = progress_placeholder.progress(0)
-                # 設置API
                 model = setup_gemini_api(api_key)
                 if model:
                     progress_bar.progress(20)
-                    # 獲取選定頁面的內容
                     page_text = st.session_state.page_content[page_to_process]
-                    # 使用Gemini處理
                     progress_placeholder.text("正在使用Gemini解釋內容...")
                     explanation = process_with_gemini(model, page_text, instruction)
                     progress_bar.progress(60)
-                    # 翻譯成繁體中文
                     progress_placeholder.text("正在翻譯成繁體中文...")
                     translation = translate_with_gemini(model, explanation)
                     progress_bar.progress(90)
-                    # 創建結果DataFrame
                     results_data = {
                         "時間戳記": [datetime.now().isoformat()],
                         "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
@@ -204,17 +214,23 @@ if uploaded_file is not None:
                     }
                     results_df = pd.DataFrame(results_data)
-                    # 保存為CSV（臨時）
-                    results_df.to_csv(output_filename, index=False, encoding="utf-8-sig")
-                    # 完成
                     progress_bar.progress(100)
                     progress_placeholder.empty()
-                    # 顯示結果
                     st.success("處理完成!")
-                    # 創建選項卡顯示結果
                     tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
                     with tab1:
@@ -231,17 +247,17 @@ if uploaded_file is not None:
                         st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
                         st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
 else:
-    # 未上傳檔案時顯示的內容
     st.info("👈 請從側邊欄上傳PDF檔案開始")
-    # 顯示使用說明
     with st.expander("使用說明", expanded=True):
         st.markdown("""
         ### 如何使用這個工具:
         1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
         2. **選擇頁面** - 使用滑桿選擇要處理的頁面
-        3. **設定API金鑰** - 輸入您的Gemini API金鑰（預設已填入，可修改）
         4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
         5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
         6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
@@ -255,6 +271,6 @@ else:
         - 結果以CSV格式���存
         """)
-# 頁尾
 st.markdown("---")
 st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")

 import pandas as pd
 import os
 import google.generativeai as genai
 from datetime import datetime
 import logging
 import sys
 import base64
+import tempfile
+# Create logs directory in a writable location
+log_dir = "/tmp/logs"
+os.makedirs(log_dir, exist_ok=True)
+# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
+        logging.FileHandler(os.path.join(log_dir, "pdf_processing.log")),
         logging.StreamHandler(sys.stdout)
     ]
 )
 logger = logging.getLogger(__name__)
+# Page configuration
 st.set_page_config(
     page_title="PDF處理與Gemini翻譯工具",
     page_icon="📄",
     initial_sidebar_state="expanded"
 )
+# App title and introduction
 st.title("📄 PDF處理與Gemini翻譯工具")
 st.markdown("上傳PDF檔案，選擇要處理的頁面，讓Gemini解釋內容並翻譯成繁體中文。")
+# Sidebar - Settings area
 with st.sidebar:
     st.header("設定")
+    # API key input - Using st.secrets is more secure but requires setup
     api_key = st.text_input(
         "Gemini API金鑰",
+        value="",  # Remove hardcoded API key
         type="password"
     )
+    # Upload PDF file
     uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
+    # Processing options block
     with st.expander("處理選項", expanded=True):
+        # Initialize session_state
         if 'total_pages' not in st.session_state:
             st.session_state.total_pages = 0
         if 'page_content' not in st.session_state:
             st.session_state.page_content = {}
+        # Page selection (only shown after file upload)
         if uploaded_file is not None:
+            # Read PDF and get page count
             try:
+                # Create a temporary file to avoid potential security issues with direct file uploads
+                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    tmp_path = tmp_file.name
+                pdf_reader = PyPDF2.PdfReader(tmp_path)
                 st.session_state.total_pages = len(pdf_reader.pages)
+                # Load PDF content to session_state (if not already loaded)
                 if len(st.session_state.page_content) == 0:
                     with st.spinner("正在加載PDF..."):
                         for i in range(st.session_state.total_pages):
                             st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
+                # Remove the temporary file
+                os.unlink(tmp_path)
+                # Page selection slider
                 page_to_process = st.slider(
                     "選擇要處理的頁面",
                     min_value=1,
                 st.info(f"PDF共有 {st.session_state.total_pages} 頁")
             except Exception as e:
+                logger.error(f"無法讀取PDF: {str(e)}")
                 st.error(f"無法讀取PDF: {str(e)}")
         else:
             st.info("請先上傳PDF檔案")
             page_to_process = 1
+    # Advanced options
     with st.expander("進階選項"):
+        # Instructions for Gemini
         instruction = st.text_area(
             "給Gemini的指示詞",
             value="請詳細解釋以下內容的主要要點和重要信息",
             height=100
         )
+        # Output filename
         output_filename = st.text_input(
             "輸出CSV檔名",
             value="gemini_translated_results.csv"
         )
+# Main function definitions
 def setup_gemini_api(api_key):
     """設置Gemini API"""
     try:
         genai.configure(api_key=api_key)
         return genai.GenerativeModel("gemini-1.5-flash")
     except Exception as e:
     href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
     return href
+# Main content area
 if uploaded_file is not None:
+    # Display page content preview
     st.header("頁面內容預覽")
+    # Get selected page content from session_state
     if page_to_process in st.session_state.page_content:
         page_text = st.session_state.page_content[page_to_process]
         st.text_area(
     else:
         st.warning("無法獲取選定頁面的內容")
+    # Process button
     process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
+    # When process button is clicked
     if process_button:
         if not api_key:
             st.error("請輸入Gemini API金鑰!")
         else:
+            # Set up progress display
             progress_placeholder = st.empty()
             results_placeholder = st.empty()
             with st.spinner("正在處理中..."):
                 progress_bar = progress_placeholder.progress(0)
+                # Set up API
                 model = setup_gemini_api(api_key)
                 if model:
                     progress_bar.progress(20)
+                    # Get selected page content
                     page_text = st.session_state.page_content[page_to_process]
+                    # Process with Gemini
                     progress_placeholder.text("正在使用Gemini解釋內容...")
                     explanation = process_with_gemini(model, page_text, instruction)
                     progress_bar.progress(60)
+                    # Translate to Traditional Chinese
                     progress_placeholder.text("正在翻譯成繁體中文...")
                     translation = translate_with_gemini(model, explanation)
                     progress_bar.progress(90)
+                    # Create results DataFrame
                     results_data = {
                         "時間戳記": [datetime.now().isoformat()],
                         "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
                     }
                     results_df = pd.DataFrame(results_data)
+                    # Save as CSV to a writable location
+                    csv_path = os.path.join("/tmp", output_filename)
+                    try:
+                        results_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
+                        logger.info(f"CSV saved to {csv_path}")
+                    except Exception as e:
+                        logger.error(f"Failed to save CSV: {e}")
+                        st.error(f"無法保存CSV: {str(e)}")
+                    # Complete
                     progress_bar.progress(100)
                     progress_placeholder.empty()
+                    # Display results
                     st.success("處理完成!")
+                    # Create tabs to display results
                     tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
                     with tab1:
                         st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
                         st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
 else:
+    # Content to display when no file is uploaded
     st.info("👈 請從側邊欄上傳PDF檔案開始")
+    # Display usage instructions
     with st.expander("使用說明", expanded=True):
         st.markdown("""
         ### 如何使用這個工具:
         1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
         2. **選擇頁面** - 使用滑桿選擇要處理的頁面
+        3. **設定API金鑰** - 輸入您的Gemini API金鑰
         4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
         5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
         6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
         - 結果以CSV格式���存
         """)
+# Footer
 st.markdown("---")
 st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")