Spaces:

Roberta2024
/

Pdf_geminiAI

Sleeping

App Files Files Community

Roberta2024 commited on May 21

Commit

de02f53

verified ·

1 Parent(s): 16fae2b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +258 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,260 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import PyPDF2
+import pandas as pd
+import os
+import google.generativeai as genai
+import csv
+from datetime import datetime
+import logging
+import sys
+import io
+import tempfile
+import base64
+# 設定日誌
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("pdf_processing.log"),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+logger = logging.getLogger(__name__)
+# 設定頁面配置
+st.set_page_config(
+    page_title="PDF處理與Gemini翻譯工具",
+    page_icon="📄",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# 應用程式標題和介紹
+st.title("📄 PDF處理與Gemini翻譯工具")
+st.markdown("上傳PDF檔案，選擇要處理的頁面，讓Gemini解釋內容並翻譯成繁體中文。")
+# 側邊欄 - 設定區域
+with st.sidebar:
+    st.header("設定")
+    # API金鑰輸入
+    api_key = st.text_input(
+        "Gemini API金鑰",
+        value="AIzaSyC3fbJVYQbINa8ztuOS5dFE4ud0I9jhy3o",  # 預設值，實際使用應從設定檔讀取
+        type="password"
+    )
+    # 上傳PDF檔案
+    uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
+    # 處理選項區塊
+    with st.expander("處理選項", expanded=True):
+        # 初始化session_state
+        if 'total_pages' not in st.session_state:
+            st.session_state.total_pages = 0
+        if 'page_content' not in st.session_state:
+            st.session_state.page_content = {}
+        # 頁面選擇（只在上傳檔案後顯示）
+        if uploaded_file is not None:
+            # 讀取PDF並獲取頁數
+            try:
+                pdf_reader = PyPDF2.PdfReader(uploaded_file)
+                st.session_state.total_pages = len(pdf_reader.pages)
+                # 加載PDF內容到session_state（如果尚未加載）
+                if len(st.session_state.page_content) == 0:
+                    with st.spinner("正在加載PDF..."):
+                        for i in range(st.session_state.total_pages):
+                            st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
+                # 頁面選擇滑桿
+                page_to_process = st.slider(
+                    "選擇要處理的頁面",
+                    min_value=1,
+                    max_value=st.session_state.total_pages,
+                    value=1
+                )
+                st.info(f"PDF共有 {st.session_state.total_pages} 頁")
+            except Exception as e:
+                st.error(f"無法讀取PDF: {str(e)}")
+        else:
+            st.info("請先上傳PDF檔案")
+            page_to_process = 1
+    # 進階選項
+    with st.expander("進階選項"):
+        # 給Gemini的指示詞
+        instruction = st.text_area(
+            "給Gemini的指示詞",
+            value="請詳細解釋以下內容的主要要點和重要信息",
+            height=100
+        )
+        # 輸出檔名
+        output_filename = st.text_input(
+            "輸出CSV檔名",
+            value="gemini_translated_results.csv"
+        )
+# 主要功能函數
+def setup_gemini_api(api_key):
+    """設置Gemini API"""
+    try:
+        os.environ["GOOGLE_API_KEY"] = api_key
+        genai.configure(api_key=api_key)
+        return genai.GenerativeModel("gemini-1.5-flash")
+    except Exception as e:
+        logger.error(f"Gemini API設置失敗: {e}")
+        st.error(f"API設置失敗: {str(e)}")
+        return None
+def process_with_gemini(model, text, instruction):
+    """使用Gemini處理文本"""
+    try:
+        prompt = f"{instruction}:\n\n{text}"
+        response = model.generate_content(prompt)
+        return response.text.strip()
+    except Exception as e:
+        logger.error(f"Gemini處理失敗: {e}")
+        return f"處理失敗: {str(e)}"
+def translate_with_gemini(model, text):
+    """使用Gemini將文本翻譯成繁體中文"""
+    try:
+        prompt = f"""
+        請將以下文本翻譯成繁體中文，保持專業和準確性：
+        {text}
+        只需要返回翻譯後的文本，不要加入其他解釋或備註。
+        """
+        response = model.generate_content(prompt)
+        return response.text.strip()
+    except Exception as e:
+        logger.error(f"Gemini翻譯失敗: {e}")
+        return f"翻譯失敗: {str(e)}"
+def get_csv_download_link(df, filename="data.csv"):
+    """生成CSV檔案下載連結"""
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
+    return href
+# 主要內容區域
+if uploaded_file is not None:
+    # 顯示頁面內容預覽
+    st.header("頁面內容預覽")
+    # 從session_state獲取選定頁面的內容
+    if page_to_process in st.session_state.page_content:
+        page_text = st.session_state.page_content[page_to_process]
+        st.text_area(
+            f"第 {page_to_process} 頁內容",
+            value=page_text,
+            height=150,
+            disabled=True
+        )
+    else:
+        st.warning("無法獲取選定頁面的內容")
+    # 處理按鈕
+    process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
+    # 當按下處理按鈕
+    if process_button:
+        if not api_key:
+            st.error("請輸入Gemini API金鑰!")
+        else:
+            # 設置進度顯示
+            progress_placeholder = st.empty()
+            results_placeholder = st.empty()
+            with st.spinner("正在處理中..."):
+                progress_bar = progress_placeholder.progress(0)
+                # 設置API
+                model = setup_gemini_api(api_key)
+                if model:
+                    progress_bar.progress(20)
+                    # 獲取選定頁面的內容
+                    page_text = st.session_state.page_content[page_to_process]
+                    # 使用Gemini處理
+                    progress_placeholder.text("正在使用Gemini解釋內容...")
+                    explanation = process_with_gemini(model, page_text, instruction)
+                    progress_bar.progress(60)
+                    # 翻譯成繁體中文
+                    progress_placeholder.text("正在翻譯成繁體中文...")
+                    translation = translate_with_gemini(model, explanation)
+                    progress_bar.progress(90)
+                    # 創建結果DataFrame
+                    results_data = {
+                        "時間戳記": [datetime.now().isoformat()],
+                        "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
+                        "Gemini解釋": [explanation],
+                        "繁體中文翻譯": [translation]
+                    }
+                    results_df = pd.DataFrame(results_data)
+                    # 保存為CSV（臨時）
+                    results_df.to_csv(output_filename, index=False, encoding="utf-8-sig")
+                    # 完成
+                    progress_bar.progress(100)
+                    progress_placeholder.empty()
+                    # 顯示結果
+                    st.success("處理完成!")
+                    # 創建選項卡顯示結果
+                    tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
+                    with tab1:
+                        st.subheader("Gemini解釋結果")
+                        st.write(explanation)
+                    with tab2:
+                        st.subheader("繁體中文翻譯")
+                        st.write(translation)
+                    with tab3:
+                        st.subheader("CSV資料預覽")
+                        st.dataframe(results_df)
+                        st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
+                        st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
+else:
+    # 未上傳檔案時顯示的內容
+    st.info("👈 請從側邊欄上傳PDF檔案開始")
+    # 顯示使用說明
+    with st.expander("使用說明", expanded=True):
+        st.markdown("""
+        ### 如何使用這個工具:
+        1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
+        2. **選擇頁面** - 使用滑桿選擇要處理的頁面
+        3. **設定API金鑰** - 輸入您的Gemini API金鑰（預設已填入，可修改）
+        4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
+        5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
+        6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
+        7. **下載結果** - 下載CSV格式的結果檔案
+        ### 功能特點:
+        - 逐頁預覽PDF內容
+        - 使用Gemini AI解釋文本
+        - 自動翻譯成繁體中文
+        - 結果以CSV格式儲存
+        """)
+# 頁尾
+st.markdown("---")
+st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")