def generate_transcript(youtube_link):
Browse files- .gitignore +2 -0
- app.py +119 -38
- config/config.py +0 -1
- initializer.py +75 -0
.gitignore
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
local_config.json
|
2 |
*.pyc
|
|
|
|
|
|
1 |
local_config.json
|
2 |
*.pyc
|
3 |
+
*.pem
|
4 |
+
*.key
|
app.py
CHANGED
@@ -1,14 +1,20 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import shutil
|
4 |
import tempfile
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
import vertexai
|
11 |
-
from vertexai.generative_models import GenerativeModel, Part
|
12 |
|
13 |
def mock_question_answer(question, history):
|
14 |
# 假資料模擬回答
|
@@ -32,40 +38,102 @@ def add_to_file_list(file, file_list):
|
|
32 |
temp_path = os.path.join(temp_dir, os.path.basename(file.name))
|
33 |
shutil.copy(file.name, temp_path) # 將文件存儲到臨時目錄
|
34 |
file_list.append(temp_path)
|
35 |
-
display_list = [os.path.basename(path) for path in file_list]
|
36 |
-
return gr.update(choices=display_list), None
|
37 |
-
|
38 |
-
def process_selected_files(selected_files, file_list):
|
39 |
-
selected_paths = [path for path in file_list if os.path.basename(path) in selected_files]
|
40 |
-
# 假資料模擬處理 RAG
|
41 |
-
return f"已處理的文件: {', '.join(selected_paths)}"
|
42 |
-
|
43 |
-
def toggle_visibility(toggle_value):
|
44 |
-
return gr.update(visible=toggle_value)
|
45 |
-
|
46 |
-
def get_youtube_playlist():
|
47 |
-
# 假資料模擬 YouTube 播放清單
|
48 |
-
return [
|
49 |
-
{"id": "yPmgHBRUdns", "title": "【觀念】比與比值"},
|
50 |
-
{"id": "CgLdZpnr_h8", "title": "【觀念】相等的比"},
|
51 |
-
{"id": "-7HVxER-rb0", "title": "【觀念】比例式的運算性質"},
|
52 |
-
]
|
53 |
-
|
54 |
-
def format_youtube_choices(youtube_data):
|
55 |
-
return [f"{item['id']} - {item['title']}" for item in youtube_data]
|
56 |
-
|
57 |
-
def process_selected_videos(selected_videos):
|
58 |
-
# 假資料模擬處理選擇的影片
|
59 |
-
return f"已選擇的影片: {', '.join(selected_videos)}"
|
60 |
|
61 |
def add_youtube_to_list(youtube_link, file_list):
|
62 |
-
|
63 |
-
|
64 |
display_list = [os.path.basename(path) if os.path.basename(path) else path for path in file_list]
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
def process_all_files(file_list):
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
with gr.Blocks() as demo:
|
71 |
gr.Markdown("# AI Notes Assistant")
|
@@ -96,7 +164,6 @@ with gr.Blocks() as demo:
|
|
96 |
process_files_button = gr.Button("處理檔案")
|
97 |
rag_result = gr.Textbox(label="處理結果", interactive=False)
|
98 |
|
99 |
-
process_files_button.click(process_all_files, inputs=[file_list], outputs=[rag_result])
|
100 |
file_list.change(lambda x: gr.update(choices = [os.path.basename(path) if os.path.basename(path) else path for path in x]), inputs=file_list, outputs=file_display)
|
101 |
|
102 |
with gr.Column(visible=True) as chat_column:
|
@@ -110,6 +177,13 @@ with gr.Blocks() as demo:
|
|
110 |
with gr.Tab("摘要生成"):
|
111 |
summary_button = gr.Button("生成摘要")
|
112 |
summary = gr.Textbox(label="摘要", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
with gr.Tab("其他功能"):
|
114 |
gr.Markdown("此處可以添加更多功能卡片")
|
115 |
|
@@ -117,8 +191,15 @@ with gr.Blocks() as demo:
|
|
117 |
chat_toggle.change(toggle_visibility, inputs=chat_toggle, outputs=chat_column)
|
118 |
feature_toggle.change(toggle_visibility, inputs=feature_toggle, outputs=feature_column)
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
history = gr.State([])
|
121 |
-
ask_button.click(mock_question_answer, inputs=[question, history], outputs=[chatbot,
|
122 |
-
summary_button.click(mock_summary,
|
123 |
|
124 |
-
demo.launch()
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
import shutil
|
6 |
import tempfile
|
7 |
+
from google import genai
|
8 |
+
from google.genai import types
|
9 |
+
|
10 |
+
from initializer import initialize_clients
|
11 |
|
12 |
+
# 初始化 Google Cloud Storage 服務和 GENAI 客戶端
|
13 |
+
GCS_SERVICE, GENAI_CLIENT = initialize_clients()
|
14 |
+
GCS_CLIENT = GCS_SERVICE.client
|
15 |
|
16 |
+
def toggle_visibility(toggle_value):
|
17 |
+
return gr.update(visible=toggle_value)
|
|
|
|
|
18 |
|
19 |
def mock_question_answer(question, history):
|
20 |
# 假資料模擬回答
|
|
|
38 |
temp_path = os.path.join(temp_dir, os.path.basename(file.name))
|
39 |
shutil.copy(file.name, temp_path) # 將文件存儲到臨時目錄
|
40 |
file_list.append(temp_path)
|
41 |
+
display_list = [os.path.basename(path) if os.path.basename(path) else path for path in file_list]
|
42 |
+
return gr.update(choices=display_list), None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def add_youtube_to_list(youtube_link, file_list):
|
45 |
+
if youtube_link:
|
46 |
+
file_list.append(youtube_link)
|
47 |
display_list = [os.path.basename(path) if os.path.basename(path) else path for path in file_list]
|
48 |
+
return gr.update(choices=display_list), ""
|
49 |
+
|
50 |
+
def generate_transcript(youtube_link):
|
51 |
+
print(f"\n開始生成 YouTube 逐字稿: {youtube_link}")
|
52 |
+
try:
|
53 |
+
print("初始化 Gemini 模型設定...")
|
54 |
+
video = types.Part.from_uri(
|
55 |
+
file_uri=youtube_link,
|
56 |
+
mime_type="video/*",
|
57 |
+
)
|
58 |
+
|
59 |
+
model = "gemini-2.0-flash-exp"
|
60 |
+
contents = [
|
61 |
+
types.Content(
|
62 |
+
role="user",
|
63 |
+
parts=[
|
64 |
+
video,
|
65 |
+
types.Part.from_text("""請給我帶時間軸的逐字稿,請統一用 zhTW語言""")
|
66 |
+
]
|
67 |
+
)
|
68 |
+
]
|
69 |
+
generate_content_config = types.GenerateContentConfig(
|
70 |
+
temperature=1,
|
71 |
+
top_p=0.95,
|
72 |
+
max_output_tokens=8192,
|
73 |
+
response_modalities=["TEXT"],
|
74 |
+
safety_settings=[
|
75 |
+
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
|
76 |
+
types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"),
|
77 |
+
types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"),
|
78 |
+
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF")
|
79 |
+
],
|
80 |
+
)
|
81 |
+
|
82 |
+
print("開始串流生成逐字稿...")
|
83 |
+
transcript_text = ""
|
84 |
+
for chunk in GENAI_CLIENT.models.generate_content_stream(
|
85 |
+
model=model,
|
86 |
+
contents=contents,
|
87 |
+
config=generate_content_config,
|
88 |
+
):
|
89 |
+
# Extract only text content from candidates
|
90 |
+
if hasattr(chunk, 'candidates') and chunk.candidates:
|
91 |
+
for candidate in chunk.candidates:
|
92 |
+
if (hasattr(candidate, 'content') and
|
93 |
+
hasattr(candidate.content, 'parts')):
|
94 |
+
for part in candidate.content.parts:
|
95 |
+
if hasattr(part, 'text') and part.text:
|
96 |
+
transcript_text += part.text
|
97 |
+
print(".", end="", flush=True)
|
98 |
+
|
99 |
+
print("\n逐字稿生成完成!")
|
100 |
+
return transcript_text
|
101 |
+
except Exception as e:
|
102 |
+
print(f"\n生成逐字稿時發生錯誤: {str(e)}")
|
103 |
+
raise
|
104 |
|
105 |
def process_all_files(file_list):
|
106 |
+
print("\n=== 開始處理檔案 ===")
|
107 |
+
print(f"待處理檔案數量: {len(file_list)}")
|
108 |
+
|
109 |
+
result_text = ""
|
110 |
+
transcript_text = ""
|
111 |
+
|
112 |
+
for index, file in enumerate(file_list, 1):
|
113 |
+
print(f"\n處理第 {index}/{len(file_list)} 個檔案: {file}")
|
114 |
+
|
115 |
+
if "youtube.com" in file or "youtu.be" in file:
|
116 |
+
print(f"檢測到 YouTube 連結,開始生成逐字稿...")
|
117 |
+
try:
|
118 |
+
transcript = generate_transcript(file)
|
119 |
+
print("✓ YouTube 逐字稿生成成功")
|
120 |
+
result_text += f"🟢 YouTube 影片處理完成: {file}\n"
|
121 |
+
transcript_text += f"\n=== {file} 的逐字稿 ===\n{transcript}\n"
|
122 |
+
except Exception as e:
|
123 |
+
print(f"✗ YouTube 逐字稿生成失敗: {str(e)}")
|
124 |
+
result_text += f"🔴 YouTube 影片處理失敗: {file}\n"
|
125 |
+
else:
|
126 |
+
print(f"處理一般檔案: {file}")
|
127 |
+
try:
|
128 |
+
# 這裡可以加入其他檔案的處理邏輯
|
129 |
+
print("✓ 檔案處理成功")
|
130 |
+
result_text += f"🟢 檔案處理完成: {file}\n"
|
131 |
+
except Exception as e:
|
132 |
+
print(f"✗ 檔案處理失敗: {str(e)}")
|
133 |
+
result_text += f"🔴 檔案處理失敗: {file}\n"
|
134 |
+
|
135 |
+
print("\n=== 檔案處理完成 ===")
|
136 |
+
return result_text, transcript_text
|
137 |
|
138 |
with gr.Blocks() as demo:
|
139 |
gr.Markdown("# AI Notes Assistant")
|
|
|
164 |
process_files_button = gr.Button("處理檔案")
|
165 |
rag_result = gr.Textbox(label="處理結果", interactive=False)
|
166 |
|
|
|
167 |
file_list.change(lambda x: gr.update(choices = [os.path.basename(path) if os.path.basename(path) else path for path in x]), inputs=file_list, outputs=file_display)
|
168 |
|
169 |
with gr.Column(visible=True) as chat_column:
|
|
|
177 |
with gr.Tab("摘要生成"):
|
178 |
summary_button = gr.Button("生成摘要")
|
179 |
summary = gr.Textbox(label="摘要", interactive=False)
|
180 |
+
with gr.Tab("逐字稿"):
|
181 |
+
transcript_display = gr.Textbox(
|
182 |
+
label="YouTube 逐字稿",
|
183 |
+
interactive=False,
|
184 |
+
lines=15,
|
185 |
+
placeholder="處理 YouTube 影片後,逐字稿將顯示在這裡..."
|
186 |
+
)
|
187 |
with gr.Tab("其他功能"):
|
188 |
gr.Markdown("此處可以添加更多功能卡片")
|
189 |
|
|
|
191 |
chat_toggle.change(toggle_visibility, inputs=chat_toggle, outputs=chat_column)
|
192 |
feature_toggle.change(toggle_visibility, inputs=feature_toggle, outputs=feature_column)
|
193 |
|
194 |
+
# 更新處理檔案按鈕的事件處理
|
195 |
+
process_files_button.click(
|
196 |
+
process_all_files,
|
197 |
+
inputs=[file_list],
|
198 |
+
outputs=[rag_result, transcript_display]
|
199 |
+
)
|
200 |
+
|
201 |
history = gr.State([])
|
202 |
+
ask_button.click(mock_question_answer, inputs=[question, history], outputs=[chatbot, question])
|
203 |
+
summary_button.click(mock_summary, outputs=summary)
|
204 |
|
205 |
+
demo.launch(share=True)
|
config/config.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
-
import gradio as gr
|
4 |
|
5 |
# KEY CONFIG
|
6 |
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true"
|
|
|
1 |
import os
|
2 |
import json
|
|
|
3 |
|
4 |
# KEY CONFIG
|
5 |
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true"
|
initializer.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from google.oauth2 import service_account
|
4 |
+
from google.cloud import storage
|
5 |
+
from google import genai
|
6 |
+
from storage_service import GoogleCloudStorage
|
7 |
+
|
8 |
+
def initialize_google_credentials():
|
9 |
+
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true"
|
10 |
+
print(f"Environment is local: {is_env_local}")
|
11 |
+
|
12 |
+
try:
|
13 |
+
if is_env_local:
|
14 |
+
config_path = os.path.join(os.path.dirname(__file__), "local_config.json")
|
15 |
+
print(f"Trying to load config from: {config_path}")
|
16 |
+
if not os.path.exists(config_path):
|
17 |
+
print(f"Warning: {config_path} does not exist")
|
18 |
+
return None
|
19 |
+
|
20 |
+
with open(config_path) as f:
|
21 |
+
config = json.load(f)
|
22 |
+
google_credentials_key = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"])
|
23 |
+
else:
|
24 |
+
google_credentials_key = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
25 |
+
if not google_credentials_key:
|
26 |
+
print("Warning: GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable not set")
|
27 |
+
|
28 |
+
return google_credentials_key
|
29 |
+
except Exception as e:
|
30 |
+
print(f"Error initializing credentials: {str(e)}")
|
31 |
+
return None
|
32 |
+
|
33 |
+
def initialize_gcs_service(google_credentials_key):
|
34 |
+
if not google_credentials_key:
|
35 |
+
print("Warning: No credentials provided, GCS service will not be initialized")
|
36 |
+
return None
|
37 |
+
return GoogleCloudStorage(google_credentials_key)
|
38 |
+
|
39 |
+
def initialize_genai_client(google_credentials_key):
|
40 |
+
try:
|
41 |
+
if not google_credentials_key:
|
42 |
+
print("Warning: No credentials provided, using default authentication")
|
43 |
+
return genai.Client(
|
44 |
+
vertexai=True,
|
45 |
+
project='junyiacademy',
|
46 |
+
location='us-central1'
|
47 |
+
)
|
48 |
+
|
49 |
+
google_service_account_info_dict = json.loads(google_credentials_key)
|
50 |
+
GOOGPE_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
|
51 |
+
credentials = service_account.Credentials.from_service_account_info(
|
52 |
+
google_service_account_info_dict, scopes=GOOGPE_SCOPES
|
53 |
+
)
|
54 |
+
|
55 |
+
return genai.Client(
|
56 |
+
vertexai=True,
|
57 |
+
project='junyiacademy',
|
58 |
+
location='us-central1',
|
59 |
+
credentials=credentials
|
60 |
+
)
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error initializing GenAI client: {str(e)}")
|
63 |
+
print("Falling back to default authentication")
|
64 |
+
return genai.Client(
|
65 |
+
vertexai=True,
|
66 |
+
project='junyiacademy',
|
67 |
+
location='us-central1'
|
68 |
+
)
|
69 |
+
|
70 |
+
def initialize_clients():
|
71 |
+
google_credentials_key = initialize_google_credentials()
|
72 |
+
gcs_service = initialize_gcs_service(google_credentials_key)
|
73 |
+
genai_client = initialize_genai_client(google_credentials_key)
|
74 |
+
|
75 |
+
return gcs_service, genai_client
|