Roberta2024 commited on
Commit
087ebab
·
verified ·
1 Parent(s): e41cbe6

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +59 -43
src/streamlit_app.py CHANGED
@@ -3,26 +3,28 @@ import PyPDF2
3
  import pandas as pd
4
  import os
5
  import google.generativeai as genai
6
- import csv
7
  from datetime import datetime
8
  import logging
9
  import sys
10
- import io
11
- import tempfile
12
  import base64
 
 
 
 
 
13
 
14
- # 設定日誌
15
  logging.basicConfig(
16
  level=logging.INFO,
17
  format='%(asctime)s - %(levelname)s - %(message)s',
18
  handlers=[
19
- logging.FileHandler("pdf_processing.log"),
20
  logging.StreamHandler(sys.stdout)
21
  ]
22
  )
23
  logger = logging.getLogger(__name__)
24
 
25
- # 設定頁面配置
26
  st.set_page_config(
27
  page_title="PDF處理與Gemini翻譯工具",
28
  page_icon="📄",
@@ -30,46 +32,54 @@ st.set_page_config(
30
  initial_sidebar_state="expanded"
31
  )
32
 
33
- # 應用程式標題和介紹
34
  st.title("📄 PDF處理與Gemini翻譯工具")
35
  st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。")
36
 
37
- # 側邊欄 - 設定區域
38
  with st.sidebar:
39
  st.header("設定")
40
 
41
- # API金鑰輸入
42
  api_key = st.text_input(
43
  "Gemini API金鑰",
44
- value="AIzaSyC3fbJVYQbINa8ztuOS5dFE4ud0I9jhy3o", # 預設值,實際使用應從設定檔讀取
45
  type="password"
46
  )
47
 
48
- # 上傳PDF檔案
49
  uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
50
 
51
- # 處理選項區塊
52
  with st.expander("處理選項", expanded=True):
53
- # 初始化session_state
54
  if 'total_pages' not in st.session_state:
55
  st.session_state.total_pages = 0
56
  if 'page_content' not in st.session_state:
57
  st.session_state.page_content = {}
58
 
59
- # 頁面選擇(只在上傳檔案後顯示)
60
  if uploaded_file is not None:
61
- # 讀取PDF並獲取頁數
62
  try:
63
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
 
 
 
 
 
64
  st.session_state.total_pages = len(pdf_reader.pages)
65
 
66
- # 加載PDF內容到session_state(如果尚未加載)
67
  if len(st.session_state.page_content) == 0:
68
  with st.spinner("正在加載PDF..."):
69
  for i in range(st.session_state.total_pages):
70
  st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
71
 
72
- # 頁面選擇滑桿
 
 
 
73
  page_to_process = st.slider(
74
  "選擇要處理的頁面",
75
  min_value=1,
@@ -79,31 +89,31 @@ with st.sidebar:
79
 
80
  st.info(f"PDF共有 {st.session_state.total_pages} 頁")
81
  except Exception as e:
 
82
  st.error(f"無法讀取PDF: {str(e)}")
83
  else:
84
  st.info("請先上傳PDF檔案")
85
  page_to_process = 1
86
 
87
- # 進階選項
88
  with st.expander("進階選項"):
89
- # Gemini的指示詞
90
  instruction = st.text_area(
91
  "給Gemini的指示詞",
92
  value="請詳細解釋以下內容的主要要點和重要信息",
93
  height=100
94
  )
95
 
96
- # 輸出檔名
97
  output_filename = st.text_input(
98
  "輸出CSV檔名",
99
  value="gemini_translated_results.csv"
100
  )
101
 
102
- # 主要功能函數
103
  def setup_gemini_api(api_key):
104
  """設置Gemini API"""
105
  try:
106
- os.environ["GOOGLE_API_KEY"] = api_key
107
  genai.configure(api_key=api_key)
108
  return genai.GenerativeModel("gemini-1.5-flash")
109
  except Exception as e:
@@ -145,12 +155,12 @@ def get_csv_download_link(df, filename="data.csv"):
145
  href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
146
  return href
147
 
148
- # 主要內容區域
149
  if uploaded_file is not None:
150
- # 顯示頁面內容預覽
151
  st.header("頁面內容預覽")
152
 
153
- # session_state獲取選定頁面的內容
154
  if page_to_process in st.session_state.page_content:
155
  page_text = st.session_state.page_content[page_to_process]
156
  st.text_area(
@@ -162,40 +172,40 @@ if uploaded_file is not None:
162
  else:
163
  st.warning("無法獲取選定頁面的內容")
164
 
165
- # 處理按鈕
166
  process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
167
 
168
- # 當按下處理按鈕
169
  if process_button:
170
  if not api_key:
171
  st.error("請輸入Gemini API金鑰!")
172
  else:
173
- # 設置進度顯示
174
  progress_placeholder = st.empty()
175
  results_placeholder = st.empty()
176
 
177
  with st.spinner("正在處理中..."):
178
  progress_bar = progress_placeholder.progress(0)
179
 
180
- # 設置API
181
  model = setup_gemini_api(api_key)
182
  if model:
183
  progress_bar.progress(20)
184
 
185
- # 獲取選定頁面的內容
186
  page_text = st.session_state.page_content[page_to_process]
187
 
188
- # 使用Gemini處理
189
  progress_placeholder.text("正在使用Gemini解釋內容...")
190
  explanation = process_with_gemini(model, page_text, instruction)
191
  progress_bar.progress(60)
192
 
193
- # 翻譯成繁體中文
194
  progress_placeholder.text("正在翻譯成繁體中文...")
195
  translation = translate_with_gemini(model, explanation)
196
  progress_bar.progress(90)
197
 
198
- # 創建結果DataFrame
199
  results_data = {
200
  "時間戳記": [datetime.now().isoformat()],
201
  "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
@@ -204,17 +214,23 @@ if uploaded_file is not None:
204
  }
205
  results_df = pd.DataFrame(results_data)
206
 
207
- # 保存為CSV(臨時)
208
- results_df.to_csv(output_filename, index=False, encoding="utf-8-sig")
 
 
 
 
 
 
209
 
210
- # 完成
211
  progress_bar.progress(100)
212
  progress_placeholder.empty()
213
 
214
- # 顯示結果
215
  st.success("處理完成!")
216
 
217
- # 創建選項卡顯示結果
218
  tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
219
 
220
  with tab1:
@@ -231,17 +247,17 @@ if uploaded_file is not None:
231
  st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
232
  st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
233
  else:
234
- # 未上傳檔案時顯示的內容
235
  st.info("👈 請從側邊欄上傳PDF檔案開始")
236
 
237
- # 顯示使用說明
238
  with st.expander("使用說明", expanded=True):
239
  st.markdown("""
240
  ### 如何使用這個工具:
241
 
242
  1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
243
  2. **選擇頁面** - 使用滑桿選擇要處理的頁面
244
- 3. **設定API金鑰** - 輸入您的Gemini API金鑰(預設已填入,可修改)
245
  4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
246
  5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
247
  6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
@@ -255,6 +271,6 @@ else:
255
  - 結果以CSV格式���存
256
  """)
257
 
258
- # 頁尾
259
  st.markdown("---")
260
  st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")
 
3
  import pandas as pd
4
  import os
5
  import google.generativeai as genai
 
6
  from datetime import datetime
7
  import logging
8
  import sys
 
 
9
  import base64
10
+ import tempfile
11
+
12
+ # Create logs directory in a writable location
13
+ log_dir = "/tmp/logs"
14
+ os.makedirs(log_dir, exist_ok=True)
15
 
16
+ # Configure logging
17
  logging.basicConfig(
18
  level=logging.INFO,
19
  format='%(asctime)s - %(levelname)s - %(message)s',
20
  handlers=[
21
+ logging.FileHandler(os.path.join(log_dir, "pdf_processing.log")),
22
  logging.StreamHandler(sys.stdout)
23
  ]
24
  )
25
  logger = logging.getLogger(__name__)
26
 
27
+ # Page configuration
28
  st.set_page_config(
29
  page_title="PDF處理與Gemini翻譯工具",
30
  page_icon="📄",
 
32
  initial_sidebar_state="expanded"
33
  )
34
 
35
+ # App title and introduction
36
  st.title("📄 PDF處理與Gemini翻譯工具")
37
  st.markdown("上傳PDF檔案,選擇要處理的頁面,讓Gemini解釋內容並翻譯成繁體中文。")
38
 
39
+ # Sidebar - Settings area
40
  with st.sidebar:
41
  st.header("設定")
42
 
43
+ # API key input - Using st.secrets is more secure but requires setup
44
  api_key = st.text_input(
45
  "Gemini API金鑰",
46
+ value="", # Remove hardcoded API key
47
  type="password"
48
  )
49
 
50
+ # Upload PDF file
51
  uploaded_file = st.file_uploader("上傳PDF檔案", type=["pdf"])
52
 
53
+ # Processing options block
54
  with st.expander("處理選項", expanded=True):
55
+ # Initialize session_state
56
  if 'total_pages' not in st.session_state:
57
  st.session_state.total_pages = 0
58
  if 'page_content' not in st.session_state:
59
  st.session_state.page_content = {}
60
 
61
+ # Page selection (only shown after file upload)
62
  if uploaded_file is not None:
63
+ # Read PDF and get page count
64
  try:
65
+ # Create a temporary file to avoid potential security issues with direct file uploads
66
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
67
+ tmp_file.write(uploaded_file.getvalue())
68
+ tmp_path = tmp_file.name
69
+
70
+ pdf_reader = PyPDF2.PdfReader(tmp_path)
71
  st.session_state.total_pages = len(pdf_reader.pages)
72
 
73
+ # Load PDF content to session_state (if not already loaded)
74
  if len(st.session_state.page_content) == 0:
75
  with st.spinner("正在加載PDF..."):
76
  for i in range(st.session_state.total_pages):
77
  st.session_state.page_content[i+1] = pdf_reader.pages[i].extract_text()
78
 
79
+ # Remove the temporary file
80
+ os.unlink(tmp_path)
81
+
82
+ # Page selection slider
83
  page_to_process = st.slider(
84
  "選擇要處理的頁面",
85
  min_value=1,
 
89
 
90
  st.info(f"PDF共有 {st.session_state.total_pages} 頁")
91
  except Exception as e:
92
+ logger.error(f"無法讀取PDF: {str(e)}")
93
  st.error(f"無法讀取PDF: {str(e)}")
94
  else:
95
  st.info("請先上傳PDF檔案")
96
  page_to_process = 1
97
 
98
+ # Advanced options
99
  with st.expander("進階選項"):
100
+ # Instructions for Gemini
101
  instruction = st.text_area(
102
  "給Gemini的指示詞",
103
  value="請詳細解釋以下內容的主要要點和重要信息",
104
  height=100
105
  )
106
 
107
+ # Output filename
108
  output_filename = st.text_input(
109
  "輸出CSV檔名",
110
  value="gemini_translated_results.csv"
111
  )
112
 
113
+ # Main function definitions
114
  def setup_gemini_api(api_key):
115
  """設置Gemini API"""
116
  try:
 
117
  genai.configure(api_key=api_key)
118
  return genai.GenerativeModel("gemini-1.5-flash")
119
  except Exception as e:
 
155
  href = f'<a href="data:file/csv;base64,{b64}" download="{filename}">下載 CSV 檔案</a>'
156
  return href
157
 
158
+ # Main content area
159
  if uploaded_file is not None:
160
+ # Display page content preview
161
  st.header("頁面內容預覽")
162
 
163
+ # Get selected page content from session_state
164
  if page_to_process in st.session_state.page_content:
165
  page_text = st.session_state.page_content[page_to_process]
166
  st.text_area(
 
172
  else:
173
  st.warning("無法獲取選定頁面的內容")
174
 
175
+ # Process button
176
  process_button = st.button("處理並翻譯", type="primary", use_container_width=True)
177
 
178
+ # When process button is clicked
179
  if process_button:
180
  if not api_key:
181
  st.error("請輸入Gemini API金鑰!")
182
  else:
183
+ # Set up progress display
184
  progress_placeholder = st.empty()
185
  results_placeholder = st.empty()
186
 
187
  with st.spinner("正在處理中..."):
188
  progress_bar = progress_placeholder.progress(0)
189
 
190
+ # Set up API
191
  model = setup_gemini_api(api_key)
192
  if model:
193
  progress_bar.progress(20)
194
 
195
+ # Get selected page content
196
  page_text = st.session_state.page_content[page_to_process]
197
 
198
+ # Process with Gemini
199
  progress_placeholder.text("正在使用Gemini解釋內容...")
200
  explanation = process_with_gemini(model, page_text, instruction)
201
  progress_bar.progress(60)
202
 
203
+ # Translate to Traditional Chinese
204
  progress_placeholder.text("正在翻譯成繁體中文...")
205
  translation = translate_with_gemini(model, explanation)
206
  progress_bar.progress(90)
207
 
208
+ # Create results DataFrame
209
  results_data = {
210
  "時間戳記": [datetime.now().isoformat()],
211
  "原始內容": [page_text[:5000] + "..." if len(page_text) > 5000 else page_text],
 
214
  }
215
  results_df = pd.DataFrame(results_data)
216
 
217
+ # Save as CSV to a writable location
218
+ csv_path = os.path.join("/tmp", output_filename)
219
+ try:
220
+ results_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
221
+ logger.info(f"CSV saved to {csv_path}")
222
+ except Exception as e:
223
+ logger.error(f"Failed to save CSV: {e}")
224
+ st.error(f"無法保存CSV: {str(e)}")
225
 
226
+ # Complete
227
  progress_bar.progress(100)
228
  progress_placeholder.empty()
229
 
230
+ # Display results
231
  st.success("處理完成!")
232
 
233
+ # Create tabs to display results
234
  tab1, tab2, tab3 = st.tabs(["Gemini解釋", "繁體中文翻譯", "CSV資料"])
235
 
236
  with tab1:
 
247
  st.markdown(get_csv_download_link(results_df, output_filename), unsafe_allow_html=True)
248
  st.info(f"CSV檔案已準備好下載。檔名: {output_filename}")
249
  else:
250
+ # Content to display when no file is uploaded
251
  st.info("👈 請從側邊欄上傳PDF檔案開始")
252
 
253
+ # Display usage instructions
254
  with st.expander("使用說明", expanded=True):
255
  st.markdown("""
256
  ### 如何使用這個工具:
257
 
258
  1. **上傳PDF檔案** - 從側邊欄選擇並上傳PDF檔案
259
  2. **選擇頁面** - 使用滑桿選擇要處理的頁面
260
+ 3. **設定API金鑰** - 輸入您的Gemini API金鑰
261
  4. **自訂指示詞** - 可選擇修改給Gemini的指示詞
262
  5. **處理與翻譯** - 點擊"處理並翻譯"按鈕
263
  6. **查看結果** - 在選項卡中查看Gemini的解釋和繁體中文翻譯
 
271
  - 結果以CSV格式���存
272
  """)
273
 
274
+ # Footer
275
  st.markdown("---")
276
  st.markdown("📄 PDF處理與Gemini翻譯工具 | 由Streamlit和Google Gemini AI提供技術支持")