vaitor2 / app.py
youngtsai's picture
็›ก้‡ไธ่ฆๅŽปๆ”นๅŽŸๆœฌ็š„ๆ–‡็ซ 
45b66f9
raw
history blame
132 kB
import gradio as gr
import pandas as pd
import requests
from bs4 import BeautifulSoup
from docx import Document
import os
from openai import OpenAI
from groq import Groq
import uuid
from gtts import gTTS
import math
from pydub import AudioSegment
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
import yt_dlp
from moviepy.editor import VideoFileClip
from pytube import YouTube
import os
import io
import time
import json
from urllib.parse import urlparse, parse_qs
from google.cloud import storage
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.http import MediaIoBaseUpload
from educational_material import EducationalMaterial
from storage_service import GoogleCloudStorage
import boto3
from chatbot import Chatbot
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true"
print(f"is_env_local: {is_env_local}")
print("===gr__version__===")
print(gr.__version__)
if is_env_local:
with open("local_config.json") as f:
config = json.load(f)
PASSWORD = config["PASSWORD"]
GCS_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"])
DRIVE_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"])
OPEN_AI_KEY = config["OPEN_AI_KEY"]
GROQ_API_KEY = config["GROQ_API_KEY"]
JUTOR_CHAT_KEY = config["JUTOR_CHAT_KEY"]
AWS_ACCESS_KEY = config["AWS_ACCESS_KEY"]
AWS_SECRET_KEY = config["AWS_SECRET_KEY"]
AWS_REGION_NAME = config["AWS_REGION_NAME"]
OUTPUT_PATH = config["OUTPUT_PATH"]
else:
PASSWORD = os.getenv("PASSWORD")
GCS_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
DRIVE_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
JUTOR_CHAT_KEY = os.getenv("JUTOR_CHAT_KEY")
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
AWS_REGION_NAME = 'us-west-2'
OUTPUT_PATH = 'videos'
TRANSCRIPTS = []
CURRENT_INDEX = 0
OPEN_AI_CLIENT = OpenAI(api_key=OPEN_AI_KEY)
GROQ_CLIENT = Groq(api_key=GROQ_API_KEY)
GCS_SERVICE = GoogleCloudStorage(GCS_KEY)
GCS_CLIENT = GCS_SERVICE.client
BEDROCK_CLIENT = boto3.client(
service_name="bedrock-runtime",
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY,
region_name=AWS_REGION_NAME,
)
# ้ฉ—่ญ‰ password
def verify_password(password):
if password == PASSWORD:
return True
else:
raise gr.Error("ๅฏ†็ขผ้Œฏ่ชค")
# ====gcs====
def gcs_check_file_exists(gcs_client, bucket_name, file_name):
"""
ๆฃ€ๆŸฅ GCS ๅญ˜ๅ‚จๆกถไธญๆ˜ฏๅฆๅญ˜ๅœจๆŒ‡ๅฎš็š„ๆ–‡ไปถ
file_name ๆ ผๅผ๏ผš{folder_name}/{file_name}
"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(file_name)
return blob.exists()
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
"""ไธŠไผ ๆ–‡ไปถๅˆฐๆŒ‡ๅฎš็š„ GCS ๅญ˜ๅ‚จๆกถ"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(file_path)
print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
"""ไธŠไผ ๅญ—็ฌฆไธฒๅˆฐๆŒ‡ๅฎš็š„ GCS ๅญ˜ๅ‚จๆกถ"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_string(json_string)
print(f"JSON string uploaded to {destination_blob_name} in GCS.")
def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
"""ไปŽ GCS ไธ‹่ฝฝๆ–‡ไปถๅ†…ๅฎนๅˆฐๅญ—็ฌฆไธฒ"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(source_blob_name)
return blob.download_as_text()
def make_blob_public(gcs_client, bucket_name, blob_name):
"""ๅฐ†ๆŒ‡ๅฎš็š„ GCS ๅฏน่ฑก่ฎพ็ฝฎไธบๅ…ฌๅ…ฑๅฏ่ฏป"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.make_public()
print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
def get_blob_public_url(gcs_client, bucket_name, blob_name):
"""่Žทๅ–ๆŒ‡ๅฎš GCS ๅฏน่ฑก็š„ๅ…ฌๅผ€ URL"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
return blob.public_url
def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path):
"""ไธŠไผ ๅ›พ็‰‡ๅˆฐ GCS ๅนถ่Žทๅ–ๅ…ถๅ…ฌๅผ€ URL"""
# ไธŠไผ ๅ›พ็‰‡
upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path)
# ๅฐ†ไธŠไผ ็š„ๅ›พ็‰‡่ฎพ็ฝฎไธบๅ…ฌๅผ€
make_blob_public(gcs_client, bucket_name, file_name)
# ่Žทๅ–ๅ›พ็‰‡็š„ๅ…ฌๅผ€ URL
public_url = get_blob_public_url(gcs_client, bucket_name, file_name)
print(f"Public URL for the uploaded image: {public_url}")
return public_url
def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
# Get all files from the folder
query = f"'{drive_folder_id}' in parents and trashed = false"
response = drive_service.files().list(q=query).execute()
files = response.get('files', [])
for file in files:
# Copy each file to GCS
file_id = file['id']
file_name = file['name']
gcs_destination_path = f"{gcs_folder_name}/{file_name}"
copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path):
# Download file content from Drive
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
fh.seek(0)
file_content = fh.getvalue()
# Upload file content to GCS
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(gcs_destination_path)
blob.upload_from_string(file_content)
print(f"File {file_id} copied to GCS at {gcs_destination_path}.")
def delete_blob(gcs_client, bucket_name, blob_name):
"""ๅˆ ้™คๆŒ‡ๅฎš็š„ GCS ๅฏน่ฑก"""
bucket = gcs_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.delete()
print(f"Blob {blob_name} deleted from GCS.")
# # ====drive====ๅˆๅง‹ๅŒ–
def init_drive_service():
credentials_json_string = DRIVE_KEY
credentials_dict = json.loads(credentials_json_string)
SCOPES = ['https://www.googleapis.com/auth/drive']
credentials = service_account.Credentials.from_service_account_info(
credentials_dict, scopes=SCOPES)
service = build('drive', 'v3', credentials=credentials)
return service
def create_folder_if_not_exists(service, folder_name, parent_id):
print("ๆฃ€ๆŸฅๆ˜ฏๅฆๅญ˜ๅœจ็‰นๅฎšๅ็งฐ็š„ๆ–‡ไปถๅคน๏ผŒๅฆ‚ๆžœไธๅญ˜ๅœจๅˆ™ๅˆ›ๅปบ")
query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
folders = response.get('files', [])
if not folders:
# ๆ–‡ไปถๅคนไธๅญ˜ๅœจ๏ผŒๅˆ›ๅปบๆ–ฐๆ–‡ไปถๅคน
file_metadata = {
'name': folder_name,
'mimeType': 'application/vnd.google-apps.folder',
'parents': [parent_id]
}
folder = service.files().create(body=file_metadata, fields='id').execute()
return folder.get('id')
else:
# ๆ–‡ไปถๅคนๅทฒๅญ˜ๅœจ
return folders[0]['id']
# ๆฃ€ๆŸฅGoogle DriveไธŠๆ˜ฏๅฆๅญ˜ๅœจๆ–‡ไปถ
def check_file_exists(service, folder_name, file_name):
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
response = service.files().list(q=query).execute()
files = response.get('files', [])
return len(files) > 0, files[0]['id'] if files else None
def upload_content_directly(service, file_name, folder_id, content):
"""
็›ดๆŽฅๅฐ†ๅ†…ๅฎนไธŠไผ ๅˆฐGoogle Driveไธญ็š„ๆ–ฐๆ–‡ไปถใ€‚
"""
if not file_name:
raise ValueError("ๆ–‡ไปถๅไธ่ƒฝไธบ็ฉบ")
if not folder_id:
raise ValueError("ๆ–‡ไปถๅคนIDไธ่ƒฝไธบ็ฉบ")
if content is None: # ๅ…่ฎธ็ฉบๅญ—็ฌฆไธฒไธŠไผ ๏ผŒไฝ†ไธๅ…่ฎธNone
raise ValueError("ๅ†…ๅฎนไธ่ƒฝไธบ็ฉบ")
file_metadata = {'name': file_name, 'parents': [folder_id]}
# ไฝฟ็”จio.BytesIOไธบๆ–‡ๆœฌๅ†…ๅฎนๅˆ›ๅปบไธ€ไธชๅ†…ๅญ˜ไธญ็š„ๆ–‡ไปถๅฏน่ฑก
try:
with io.BytesIO(content.encode('utf-8')) as fh:
media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
print("==content==")
print(content)
print("==content==")
print("==media==")
print(media)
print("==media==")
# ๆ‰ง่กŒไธŠไผ 
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return file.get('id')
except Exception as e:
print(f"ไธŠไผ ๆ–‡ไปถๆ—ถๅ‘็”Ÿ้”™่ฏฏ: {e}")
raise # ้‡ๆ–ฐๆŠ›ๅ‡บๅผ‚ๅธธ๏ผŒ่ฐƒ็”จ่€…ๅฏไปฅๆ นๆฎ้œ€่ฆๅค„็†ๆˆ–ๅฟฝ็•ฅ
def upload_file_directly(service, file_name, folder_id, file_path):
# ไธŠๅ‚ณ .json to Google Drive
file_metadata = {'name': file_name, 'parents': [folder_id]}
media = MediaFileUpload(file_path, mimetype='application/json')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
# return file.get('id') # ่ฟ”ๅ›žๆ–‡ไปถID
return True
def upload_img_directly(service, file_name, folder_id, file_path):
file_metadata = {'name': file_name, 'parents': [folder_id]}
media = MediaFileUpload(file_path, mimetype='image/jpeg')
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return file.get('id') # ่ฟ”ๅ›žๆ–‡ไปถID
def download_file_as_string(service, file_id):
"""
ไปŽGoogle Driveไธ‹่ฝฝๆ–‡ไปถๅนถๅฐ†ๅ…ถไฝœไธบๅญ—็ฌฆไธฒ่ฟ”ๅ›žใ€‚
"""
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
fh.seek(0)
content = fh.read().decode('utf-8')
return content
def set_public_permission(service, file_id):
service.permissions().create(
fileId=file_id,
body={"type": "anyone", "role": "reader"},
fields='id',
).execute()
def update_file_on_drive(service, file_id, file_content):
"""
ๆ›ดๆ–ฐGoogle DriveไธŠ็š„ๆ–‡ไปถๅ†…ๅฎนใ€‚
ๅ‚ๆ•ฐ:
- service: Google Drive APIๆœๅŠกๅฎžไพ‹ใ€‚
- file_id: ่ฆๆ›ดๆ–ฐ็š„ๆ–‡ไปถ็š„IDใ€‚
- file_content: ๆ–ฐ็š„ๆ–‡ไปถๅ†…ๅฎน๏ผŒๅญ—็ฌฆไธฒๆ ผๅผใ€‚
"""
# ๅฐ†ๆ–ฐ็š„ๆ–‡ไปถๅ†…ๅฎน่ฝฌๆขไธบๅญ—่Š‚ๆต
fh = io.BytesIO(file_content.encode('utf-8'))
media = MediaIoBaseUpload(fh, mimetype='application/json', resumable=True)
# ๆ›ดๆ–ฐๆ–‡ไปถ
updated_file = service.files().update(
fileId=file_id,
media_body=media
).execute()
print(f"ๆ–‡ไปถๅทฒๆ›ดๆ–ฐ๏ผŒๆ–‡ไปถID: {updated_file['id']}")
# ---- Text file ----
def process_file(password, file):
verify_password(password)
# ่ฏปๅ–ๆ–‡ไปถ
if file.name.endswith('.csv'):
df = pd.read_csv(file)
text = df_to_text(df)
elif file.name.endswith('.xlsx'):
df = pd.read_excel(file)
text = df_to_text(df)
elif file.name.endswith('.docx'):
text = docx_to_text(file)
else:
raise ValueError("Unsupported file type")
df_string = df.to_string()
# ๅฎœ่˜ญ๏ผš็งป้™ค@XX@็ฌฆๅท to |
df_string = df_string.replace("@XX@", "|")
# ๆ นๆฎไธŠไผ ็š„ๆ–‡ไปถๅ†…ๅฎน็”Ÿๆˆ้—ฎ้ข˜
questions = generate_questions(df_string)
summary = generate_summarise(df_string)
# ่ฟ”ๅ›žๆŒ‰้’ฎๆ–‡ๆœฌๅ’Œ DataFrame ๅญ—็ฌฆไธฒ
return questions[0] if len(questions) > 0 else "", \
questions[1] if len(questions) > 1 else "", \
questions[2] if len(questions) > 2 else "", \
summary, \
df_string
def df_to_text(df):
# ๅฐ† DataFrame ่ฝฌๆขไธบ็บฏๆ–‡ๆœฌ
return df.to_string()
def docx_to_text(file):
# ๅฐ† Word ๆ–‡ๆกฃ่ฝฌๆขไธบ็บฏๆ–‡ๆœฌ
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
# ---- YouTube link ----
def format_seconds_to_time(seconds):
"""ๅฐ†็ง’ๆ•ฐๆ ผๅผๅŒ–ไธบ ๆ—ถ:ๅˆ†:็ง’ ็š„ๅฝขๅผ"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = int(seconds % 60)
return f"{hours:02}:{minutes:02}:{seconds:02}"
def extract_youtube_id(url):
parsed_url = urlparse(url)
if "youtube.com" in parsed_url.netloc:
# ๅฏนไบŽๆ ‡ๅ‡†้“พๆŽฅ๏ผŒ่ง†้ข‘IDๅœจๆŸฅ่ฏขๅ‚ๆ•ฐ'v'ไธญ
query_params = parse_qs(parsed_url.query)
return query_params.get("v")[0] if "v" in query_params else None
elif "youtu.be" in parsed_url.netloc:
# ๅฏนไบŽ็Ÿญ้“พๆŽฅ๏ผŒ่ง†้ข‘IDๆ˜ฏ่ทฏๅพ„็š„ไธ€้ƒจๅˆ†
return parsed_url.path.lstrip('/')
else:
return None
def get_transcript_by_yt_api(video_id):
languages = ['zh-TW', 'zh-Hant', 'zh', 'en'] # ๅ„ชๅ…ˆ้ †ๅบๅˆ—่กจ
for language in languages:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
print("===transcript===")
print(transcript)
print("===transcript===")
return transcript # ๆˆๅŠŸ็ฒๅ–ๅญ—ๅน•๏ผŒ็›ดๆŽฅ่ฟ”ๅ›ž็ตๆžœ
except NoTranscriptFound:
continue # ็•ถๅ‰่ชž่จ€็š„ๅญ—ๅน•ๆฒ’ๆœ‰ๆ‰พๅˆฐ๏ผŒ็นผ็บŒๅ˜—่ฉฆไธ‹ไธ€ๅ€‹่ชž่จ€
return None # ๆ‰€ๆœ‰ๅ˜—่ฉฆ้ƒฝๅคฑๆ•—๏ผŒ่ฟ”ๅ›žNone
def generate_transcription_by_whisper(video_id):
youtube_url = f'https://www.youtube.com/watch?v={video_id}'
codec_name = "mp3"
outtmpl = f"{OUTPUT_PATH}/{video_id}.%(ext)s"
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': codec_name,
'preferredquality': '192'
}],
'outtmpl': outtmpl,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([youtube_url])
audio_path = f"{OUTPUT_PATH}/{video_id}.{codec_name}"
full_audio = AudioSegment.from_mp3(audio_path)
max_part_duration = 10 * 60 * 1000 # 10 minutes
full_duration = len(full_audio) # in milliseconds
parts = math.ceil(full_duration / max_part_duration)
print(f"parts: {parts}")
transcription = []
for i in range(parts):
print(f"== i: {i}==")
start_time = i * max_part_duration
end_time = min((i + 1) * max_part_duration, full_duration)
print(f"time: {start_time/1000} - {end_time/1000}")
chunk = full_audio[start_time:end_time]
chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}"
chunk.export(chunk_path, format=codec_name)
try:
with open(chunk_path, "rb") as chunk_file:
response = OPEN_AI_CLIENT.audio.transcriptions.create(
model="whisper-1",
file=chunk_file,
response_format="verbose_json",
timestamp_granularities=["segment"],
prompt="Transcribe the following audio file. if content is chinese, please using 'language: zh-TW' ",
)
# Adjusting the timestamps for the chunk based on its position in the full audio
adjusted_segments = [{
'text': segment['text'],
'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds
'end': math.ceil(segment['end'] + start_time / 1000.0),
'duration': math.ceil(segment['end'] - segment['start'])
} for segment in response.segments]
transcription.extend(adjusted_segments)
except Exception as e:
print(f"Error processing chunk {i}: {str(e)}")
# Remove temporary chunk files after processing
os.remove(chunk_path)
return transcription
def process_transcript_and_screenshots_on_gcs(video_id):
print("====process_transcript_and_screenshots_on_gcs====")
# GCS
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
# ้€ๅญ—็จฟๆ–‡ไปถๅ
transcript_file_name = f'{video_id}_transcript.json'
transcript_blob_name = f"{video_id}/{transcript_file_name}"
# ๆฃ€ๆŸฅ้€ๅญ—็จฟๆ˜ฏๅฆๅญ˜ๅœจ
is_new_transcript = False
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
if not is_transcript_exists:
print("้€ๅญ—็จฟๆ–‡ไปถไธๅญ˜ๅœจไบŽGCSไธญ๏ผŒ้‡ๆ–ฐๅปบ็ซ‹")
# ไปŽYouTube่Žทๅ–้€ๅญ—็จฟๅนถไธŠไผ 
try:
transcript = get_transcript_by_yt_api(video_id)
except:
# call open ai whisper
print("===call open ai whisper===")
transcript = generate_transcription_by_whisper(video_id)
if transcript:
print("ๆˆๅŠŸ็ฒๅ–ๅญ—ๅน•")
else:
print("ๆฒ’ๆœ‰ๆ‰พๅˆฐๅญ—ๅน•")
transcript = generate_transcription_by_whisper(video_id)
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
is_new_transcript = True
else:
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("้€ๅญ—็จฟๅทฒๅญ˜ๅœจไบŽGCSไธญ")
transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
transcript = json.loads(transcript_text)
# print("===็ขบ่ชๅ…ถไป–่ก็”Ÿๆ–‡ไปถ===")
# source = "gcs"
# get_questions(video_id, transcript_text, source)
# get_video_id_summary(video_id, transcript_text, source)
# get_mind_map(video_id, transcript_text, source)
# print("===็ขบ่ชๅ…ถไป–่ก็”Ÿๆ–‡ไปถ end ===")
# ่™•็†ๆˆชๅœ–
for entry in transcript:
if 'img_file_id' not in entry:
# ๆชขๆŸฅ OUTPUT_PATH ๆ˜ฏๅฆๅญ˜ๅœจ video_id.mp4
video_path = f'{OUTPUT_PATH}/{video_id}.mp4'
if not os.path.exists(video_path):
# try 5 times ๅฆ‚ๆžœ้ƒฝๅคฑๆ•—ๅฐฑ raise
for i in range(5):
try:
download_youtube_video(video_id)
break
except Exception as e:
if i == 4:
raise gr.Error(f"ไธ‹่ฝฝ่ง†้ข‘ๅคฑ่ดฅ: {str(e)}")
time.sleep(5)
# ๆˆชๅ›พ
screenshot_path = screenshot_youtube_video(video_id, entry['start'])
screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
entry['img_file_id'] = img_file_id
print(f"ๆˆชๅ›พๅทฒไธŠไผ ๅˆฐGCS: {img_file_id}")
is_new_transcript = True
# ็ขบ่ชๆ˜ฏๅฆๆ›ดๆ–ฐ้€ๅญ—็จฟๆ–‡ไปถ
if is_new_transcript:
# ๆ›ดๆ–ฐ้€ๅญ—็จฟๆ–‡ไปถ
print("===ๆ›ดๆ–ฐ้€ๅญ—็จฟๆ–‡ไปถ===")
print(transcript)
print("===ๆ›ดๆ–ฐ้€ๅญ—็จฟๆ–‡ไปถ===")
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
print("้€ๅญ—็จฟๅทฒๆ›ดๆ–ฐ๏ผŒๅŒ…ๆ‹ฌๆˆชๅ›พ้“พๆŽฅ")
updated_transcript_json = json.loads(updated_transcript_text)
else:
updated_transcript_json = transcript
return updated_transcript_json
def process_youtube_link(password, link):
verify_password(password)
# ไฝฟ็”จ YouTube API ่Žทๅ–้€ๅญ—็จฟ
# ๅ‡่ฎพๆ‚จๅทฒ็ป่Žทๅ–ไบ† YouTube ่ง†้ข‘็š„้€ๅญ—็จฟๅนถๅญ˜ๅ‚จๅœจๅ˜้‡ `transcript` ไธญ
video_id = extract_youtube_id(link)
try:
transcript = process_transcript_and_screenshots_on_gcs(video_id)
except Exception as e:
error_msg = f" {video_id} ้€ๅญ—็จฟ้Œฏ่ชค: {str(e)}"
print("===process_youtube_link error===")
print(error_msg)
raise gr.Error(error_msg)
original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
formatted_transcript = []
formatted_simple_transcript =[]
for entry in transcript:
start_time = format_seconds_to_time(entry['start'])
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
embed_url = get_embedded_youtube_link(video_id, entry['start'])
img_file_id = entry['img_file_id']
screenshot_path = img_file_id
line = {
"start_time": start_time,
"end_time": end_time,
"text": entry['text'],
"embed_url": embed_url,
"screenshot_path": screenshot_path
}
formatted_transcript.append(line)
# formatted_simple_transcript ๅช่ฆ start_time, end_time, text
simple_line = {
"start_time": start_time,
"end_time": end_time,
"text": entry['text']
}
formatted_simple_transcript.append(simple_line)
global TRANSCRIPTS
TRANSCRIPTS = formatted_transcript
# ๅŸบไบŽ้€ๅญ—็จฟ็”Ÿๆˆๅ…ถไป–ๆ‰€้œ€็š„่พ“ๅ‡บ
source = "gcs"
questions = get_questions(video_id, formatted_simple_transcript, source)
questions_json = json.dumps(questions, ensure_ascii=False, indent=2)
summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
summary_text = summary_json["summary"]
summary = summary_json["summary"]
key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
key_moments = key_moments_json["key_moments"]
key_moments_text = json.dumps(key_moments, ensure_ascii=False, indent=2)
key_moments_html = get_key_moments_html(key_moments)
html_content = format_transcript_to_html(formatted_transcript)
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
first_image = formatted_transcript[0]['screenshot_path']
# first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
first_text = formatted_transcript[0]['text']
mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
mind_map = mind_map_json["mind_map"]
mind_map_html = get_mind_map_html(mind_map)
reading_passage_json = get_reading_passage(video_id, formatted_simple_transcript, source)
reading_passage_text = reading_passage_json["reading_passage"]
reading_passage = reading_passage_json["reading_passage"]
meta_data = get_meta_data(video_id)
subject = meta_data["subject"]
grade = meta_data["grade"]
# ็กฎไฟ่ฟ”ๅ›žไธŽ UI ็ป„ไปถ้ข„ๆœŸๅŒน้…็š„่พ“ๅ‡บ
return video_id, \
questions_json, \
questions[0] if len(questions) > 0 else "", \
questions[1] if len(questions) > 1 else "", \
questions[2] if len(questions) > 2 else "", \
original_transcript, \
summary_text, \
summary, \
key_moments_text, \
key_moments_html, \
mind_map, \
mind_map_html, \
html_content, \
simple_html_content, \
first_image, \
first_text, \
reading_passage_text, \
reading_passage, \
subject, \
grade
def create_formatted_simple_transcript(transcript):
formatted_simple_transcript = []
for entry in transcript:
start_time = format_seconds_to_time(entry['start'])
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
line = {
"start_time": start_time,
"end_time": end_time,
"text": entry['text']
}
formatted_simple_transcript.append(line)
return formatted_simple_transcript
def create_formatted_transcript(video_id, transcript):
formatted_transcript = []
for entry in transcript:
start_time = format_seconds_to_time(entry['start'])
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
embed_url = get_embedded_youtube_link(video_id, entry['start'])
img_file_id = entry['img_file_id']
screenshot_path = img_file_id
line = {
"start_time": start_time,
"end_time": end_time,
"text": entry['text'],
"embed_url": embed_url,
"screenshot_path": screenshot_path
}
formatted_transcript.append(line)
return formatted_transcript
def format_transcript_to_html(formatted_transcript):
html_content = ""
for entry in formatted_transcript:
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
html_content += f"<p>{entry['text']}</p>"
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
return html_content
def format_simple_transcript_to_html(formatted_transcript):
html_content = ""
for entry in formatted_transcript:
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
html_content += f"<p>{entry['text']}</p>"
return html_content
def get_embedded_youtube_link(video_id, start_time):
int_start_time = int(start_time)
embed_url = f"https://www.youtube.com/embed/{video_id}?start={int_start_time}&autoplay=1"
return embed_url
def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
# Construct the full YouTube URL
youtube_url = f'https://www.youtube.com/watch?v={youtube_id}'
# Create the output directory if it doesn't exist
if not os.path.exists(output_path):
os.makedirs(output_path)
# Download the video
yt = YouTube(youtube_url)
video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
video_stream.download(output_path=output_path, filename=youtube_id+".mp4")
print(f"Video downloaded successfully: {output_path}/{youtube_id}.mp4")
def screenshot_youtube_video(youtube_id, snapshot_sec):
video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
file_name = f"{youtube_id}_{snapshot_sec}.jpg"
with VideoFileClip(video_path) as video:
screenshot_path = f'{OUTPUT_PATH}/{file_name}'
video.save_frame(screenshot_path, snapshot_sec)
return screenshot_path
# ---- Web ----
# def process_web_link(link):
# # ๆŠ“ๅ–ๅ’Œ่งฃๆž็ฝ‘้กตๅ†…ๅฎน
# response = requests.get(link)
# soup = BeautifulSoup(response.content, 'html.parser')
# return soup.get_text()
# ---- LLM Generator ----
def get_reading_passage(video_id, df_string, source):
if source == "gcs":
print("===get_reading_passage on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_reading_passage.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅ reading_passage ๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
reading_passage = generate_reading_passage(df_string)
reading_passage_json = {"reading_passage": str(reading_passage)}
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
print("reading_passageๅทฒไธŠไผ ๅˆฐGCS")
else:
# reading_passageๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("reading_passageๅทฒๅญ˜ๅœจไบŽGCSไธญ")
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
reading_passage_json = json.loads(reading_passage_text)
elif source == "drive":
print("===get_reading_passage on drive===")
service = init_drive_service()
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
file_name = f'{video_id}_reading_passage.json'
# ๆฃ€ๆŸฅ reading_passage ๆ˜ฏๅฆๅญ˜ๅœจ
exists, file_id = check_file_exists(service, folder_id, file_name)
if not exists:
reading_passage = generate_reading_passage(df_string)
reading_passage_json = {"reading_passage": str(reading_passage)}
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
upload_content_directly(service, file_name, folder_id, reading_passage_text)
print("reading_passageๅทฒไธŠๅ‚ณๅˆฐGoogle Drive")
else:
# reading_passageๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("reading_passageๅทฒๅญ˜ๅœจไบŽGoogle Driveไธญ")
reading_passage_text = download_file_as_string(service, file_id)
return reading_passage_json
def generate_reading_passage(df_string):
# ไฝฟ็”จ OpenAI ็”ŸๆˆๅŸบไบŽไธŠไผ ๆ•ฐๆฎ็š„้—ฎ้ข˜
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {df_string}
ๆ–‡ๆœฌ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž
ๅนซๆˆ‘็ต„ๅˆๆˆ Reading Passage
ไธฆๆฝค็จฟ่ฎ“ๆ–‡ๅฅ้€š้ †
่ซ‹ไธ€ๅฎš่ฆไฝฟ็”จ็น้ซ”ไธญๆ–‡ zh-TW๏ผŒไธฆ็”จๅฐ็ฃไบบ็š„ๅฃ่ชž
็”ข็”Ÿ็š„็ตๆžœไธ่ฆๅ‰ๅพŒๆ–‡่งฃ้‡‹๏ผŒไนŸไธ่ฆๆ•˜่ฟฐ้€™็ฏ‡ๆ–‡็ซ ๆ€Ž้บผ็”ข็”Ÿ็š„
ๅช้œ€่ฆๅฐˆๆณจๆไพ› Reading Passage๏ผŒๅญ—ๆ•ธๅœจ 500 ๅญ—ไปฅๅ…ง
ๆ•˜่ฟฐไธญ๏ผŒ่ซ‹ๆŠŠๆ•ธๅญธๆˆ–ๆ˜ฏๅฐˆๆฅญ่ก“่ชž๏ผŒ็”จ Latex ๅŒ…่ฆ†๏ผˆ$...$๏ผ‰๏ผŒไธฆไธ”ไธ่ฆๅŽปๆ”นๅŽŸๆœฌ็š„ๆ–‡็ซ 
ๅŠ ๆธ›ไน˜้™คใ€ๆ น่™Ÿใ€ๆฌกๆ–น็ญ‰็ญ‰็š„้‹็ฎ—ๅผๅฃ่ชžไนŸๆ›ๆˆ LATEX ๆ•ธๅญธ็ฌฆ่™Ÿ
"""
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
reading_passage = response.choices[0].message.content.strip()
print("=====reading_passage=====")
print(reading_passage)
print("=====reading_passage=====")
return reading_passage
def text_to_speech(video_id, text):
tts = gTTS(text, lang='en')
filename = f'{video_id}_reading_passage.mp3'
tts.save(filename)
return filename
def get_mind_map(video_id, df_string, source):
if source == "gcs":
print("===get_mind_map on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_mind_map.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
mind_map = generate_mind_map(df_string)
mind_map_json = {"mind_map": str(mind_map)}
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
print("mind_mapๅทฒไธŠๅ‚ณๅˆฐGCS")
else:
# mindmapๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("mind_mapๅทฒๅญ˜ๅœจไบŽGCSไธญ")
mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
mind_map_json = json.loads(mind_map_text)
elif source == "drive":
print("===get_mind_map on drive===")
service = init_drive_service()
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
file_name = f'{video_id}_mind_map.json'
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
exists, file_id = check_file_exists(service, folder_id, file_name)
if not exists:
mind_map = generate_mind_map(df_string)
mind_map_json = {"mind_map": str(mind_map)}
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
upload_content_directly(service, file_name, folder_id, mind_map_text)
print("mind_mapๅทฒไธŠๅ‚ณๅˆฐGoogle Drive")
else:
# mindmapๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("mind_mapๅทฒๅญ˜ๅœจไบŽGoogle Driveไธญ")
mind_map_text = download_file_as_string(service, file_id)
mind_map_json = json.loads(mind_map_text)
return mind_map_json
def generate_mind_map(df_string):
# ไฝฟ็”จ OpenAI ็”ŸๆˆๅŸบไบŽไธŠไผ ๆ•ฐๆฎ็š„้—ฎ้ข˜
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {df_string} ๆ–‡ๆœฌๅปบ็ซ‹ markdown ๅฟƒๆ™บๅœ–
ๆณจๆ„๏ผšไธ้œ€่ฆๅ‰ๅพŒๆ–‡ๆ•˜่ฟฐ๏ผŒ็›ดๆŽฅ็ตฆๅ‡บ markdown ๆ–‡ๆœฌๅณๅฏ
้€™ๅฐๆˆ‘ๅพˆ้‡่ฆ
"""
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
mind_map = response.choices[0].message.content.strip()
print("=====mind_map=====")
print(mind_map)
print("=====mind_map=====")
return mind_map
def get_mind_map_html(mind_map):
mind_map_markdown = mind_map.replace("```markdown", "").replace("```", "")
mind_map_html = f"""
<div class="markmap">
<script type="text/template">
{mind_map_markdown}
</script>
</div>
"""
return mind_map_html
def get_video_id_summary(video_id, df_string, source):
if source == "gcs":
print("===get_video_id_summary on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_summary.json'
summary_file_blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅ summary_file ๆ˜ฏๅฆๅญ˜ๅœจ
is_summary_file_exists = GCS_SERVICE.check_file_exists(bucket_name, summary_file_blob_name)
if not is_summary_file_exists:
summary = generate_summarise(df_string)
summary_json = {"summary": str(summary)}
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
print("summaryๅทฒไธŠไผ ๅˆฐGCS")
else:
# summaryๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("summaryๅทฒๅญ˜ๅœจไบŽGCSไธญ")
summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
summary_json = json.loads(summary_text)
elif source == "drive":
print("===get_video_id_summary===")
service = init_drive_service()
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
file_name = f'{video_id}_summary.json'
# ๆฃ€ๆŸฅ้€ๅญ—็จฟๆ˜ฏๅฆๅญ˜ๅœจ
exists, file_id = check_file_exists(service, folder_id, file_name)
if not exists:
summary = generate_summarise(df_string)
summary_json = {"summary": str(summary)}
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
try:
upload_content_directly(service, file_name, folder_id, summary_text)
print("summaryๅทฒไธŠๅ‚ณๅˆฐGoogle Drive")
except Exception as e:
error_msg = f" {video_id} ๆ‘˜่ฆ้Œฏ่ชค: {str(e)}"
print("===get_video_id_summary error===")
print(error_msg)
print("===get_video_id_summary error===")
else:
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("summaryๅทฒๅญ˜ๅœจGoogle Driveไธญ")
summary_text = download_file_as_string(service, file_id)
summary_json = json.loads(summary_text)
return summary_json
def generate_summarise(df_string):
# ไฝฟ็”จ OpenAI ็”ŸๆˆๅŸบไบŽไธŠไผ ๆ•ฐๆฎ็š„้—ฎ้ข˜
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {df_string}๏ผŒๅˆคๆ–ท้€™ไปฝๆ–‡ๆœฌ
ๆ ผๅผ็‚บ Markdown
ๆ•ด้ซ”ๆ‘˜่ฆๅœจไธ€็™พๅญ—ไปฅๅ…ง
้‡้ปžๆฆ‚ๅฟตๅˆ—ๅ‡บ bullet points๏ผŒ่‡ณๅฐ‘ไธ‰ๅ€‹๏ผŒๆœ€ๅคšไบ”ๅ€‹
ไปฅๅŠๅฏ่ƒฝ็š„็ต่ซ–่ˆ‡็ตๅฐพๅปถไผธๅฐๅ•้กŒๆไพ›ๅญธ็”Ÿไฝœๅๆ€
ๆ•˜่ฟฐไธญ๏ผŒ่ซ‹ๆŠŠๆ•ธๅญธๆˆ–ๆ˜ฏๅฐˆๆฅญ่ก“่ชž๏ผŒ็”จ Latex ๅŒ…่ฆ†๏ผˆ$...$๏ผ‰
ๅŠ ๆธ›ไน˜้™คใ€ๆ น่™Ÿใ€ๆฌกๆ–น็ญ‰็ญ‰็š„้‹็ฎ—ๅผๅฃ่ชžไนŸๆ›ๆˆ LATEX ๆ•ธๅญธ็ฌฆ่™Ÿ
ๆ•ด้ซ”ๆ ผๅผ็‚บ๏ผš
## ๐Ÿ“š ๆ•ด้ซ”ๆ‘˜่ฆ
- (ไธ€ๅ€‹ bullet point....)
## ๐Ÿ”– ้‡้ปžๆฆ‚ๅฟต
- xxx
- xxx
- xxx
## ๐Ÿ’ก ็‚บไป€้บผๆˆ‘ๅ€‘่ฆๅญธ้€™ๅ€‹๏ผŸ
- (ไธ€ๅ€‹ bullet point....)
## โ“ ๅปถไผธๅฐๅ•้กŒ
- (ไธ€ๅ€‹ bullet point....)
"""
# ๐Ÿ—‚๏ธ 1. ๅ…งๅฎน้กžๅž‹๏ผš๏ผŸ
# ๐Ÿ“š 2. ๆ•ด้ซ”ๆ‘˜่ฆ
# ๐Ÿ”– 3. ๆขๅˆ—ๅผ้‡้ปž
# ๐Ÿ”‘ 4. ้—œ้ตๆ™‚ๅˆป๏ผˆๆฎต่ฝๆ‘˜่ฆ๏ผ‰
# ๐Ÿ’ก 5. ็ต่ซ–ๅๆ€๏ผˆ็‚บไป€้บผๆˆ‘ๅ€‘่ฆๅญธ้€™ๅ€‹๏ผŸ๏ผ‰
# โ“ 6. ๅปถไผธๅฐๅ•้กŒ
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
df_summarise = response.choices[0].message.content.strip()
print("=====df_summarise=====")
print(df_summarise)
print("=====df_summarise=====")
return df_summarise
def get_questions(video_id, df_string, source="gcs"):
if source == "gcs":
# ๅŽป gcs ็ขบ่ชๆ˜ฏๆœ‰ๆœ‰ video_id_questions.json
print("===get_questions on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_questions.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_questions_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_questions_exists:
questions = generate_questions(df_string)
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
print("questionsๅทฒไธŠๅ‚ณๅˆฐGCS")
else:
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("questionsๅทฒๅญ˜ๅœจไบŽGCSไธญ")
questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
questions = json.loads(questions_text)
elif source == "drive":
# ๅŽป g drive ็ขบ่ชๆ˜ฏๆœ‰ๆœ‰ video_id_questions.json
print("===get_questions===")
service = init_drive_service()
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
file_name = f'{video_id}_questions.json'
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
exists, file_id = check_file_exists(service, folder_id, file_name)
if not exists:
questions = generate_questions(df_string)
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
upload_content_directly(service, file_name, folder_id, questions_text)
print("questionsๅทฒไธŠๅ‚ณๅˆฐGoogle Drive")
else:
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("questionsๅทฒๅญ˜ๅœจไบŽGoogle Driveไธญ")
questions_text = download_file_as_string(service, file_id)
questions = json.loads(questions_text)
q1 = questions[0] if len(questions) > 0 else ""
q2 = questions[1] if len(questions) > 1 else ""
q3 = questions[2] if len(questions) > 2 else ""
print("=====get_questions=====")
print(f"q1: {q1}")
print(f"q2: {q2}")
print(f"q3: {q3}")
print("=====get_questions=====")
return q1, q2, q3
def generate_questions(df_string):
# ไฝฟ็”จ OpenAI ็”ŸๆˆๅŸบไบŽไธŠไผ ๆ•ฐๆฎ็š„้—ฎ้ข˜
if isinstance(df_string, str):
df_string_json = json.loads(df_string)
else:
df_string_json = df_string
content_text = ""
for entry in df_string_json:
content_text += entry["text"] + "๏ผŒ"
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไธฆ็”จๆ—ขๆœ‰่ณ‡ๆ–™็‚บๆœฌ่ณช็Œœๆธฌ็”จๆˆถๅฏ่ƒฝๆœƒๅ•็š„ๅ•้กŒ๏ผŒไฝฟ็”จ zh-TW"
user_content = f"่ซ‹ๆ นๆ“š {content_text} ็”Ÿๆˆไธ‰ๅ€‹ๅ•้กŒ๏ผŒไธฆ็”จ JSON ๆ ผๅผ่ฟ”ๅ›ž questions:[q1็š„ๆ•˜่ฟฐtext, q2็š„ๆ•˜่ฟฐtext, q3็š„ๆ•˜่ฟฐtext]"
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
response_format = { "type": "json_object" }
print("=====messages=====")
print(messages)
print("=====messages=====")
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
"response_format": response_format
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
questions = json.loads(response.choices[0].message.content)["questions"]
print("=====json_response=====")
print(questions)
print("=====json_response=====")
return questions
def change_questions(password, df_string):
verify_password(password)
questions = generate_questions(df_string)
q1 = questions[0] if len(questions) > 0 else ""
q2 = questions[1] if len(questions) > 1 else ""
q3 = questions[2] if len(questions) > 2 else ""
print("=====get_questions=====")
print(f"q1: {q1}")
print(f"q2: {q2}")
print(f"q3: {q3}")
print("=====get_questions=====")
return q1, q2, q3
# ใ€Œ้—œ้ตๆ™‚ๅˆปใ€ๅฆๅค–็จ็ซ‹ๆˆไธ€ๅ€‹ tab๏ผŒๆ™‚้–“ๆˆณ่จ˜ๅ’Œๆ–‡ๅญ—็š„ไธ‹ๆ–น้™„ไธŠๅฐๆ‡‰็š„ๆˆชๅœ–๏ผŒ้‡้ปžๆ‘˜่ฆ็š„ใ€Œ้—œ้ตๆ™‚ๅˆปใ€ๅŠ ไธŠๆˆชๅœ–่ณ‡่จŠ
def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source):
if source == "gcs":
print("===get_key_moments on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_key_moments.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_key_moments_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_key_moments_exists:
key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
key_moments_json = {"key_moments": key_moments}
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
print("key_momentsๅทฒไธŠๅ‚ณๅˆฐGCS")
else:
# key_momentsๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("key_momentsๅทฒๅญ˜ๅœจไบŽGCSไธญ")
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
key_moments_json = json.loads(key_moments_text)
# ๆชขๆŸฅ key_moments ๆ˜ฏๅฆๆœ‰ keywords
print("===ๆชขๆŸฅ key_moments ๆ˜ฏๅฆๆœ‰ keywords===")
has_keywords_added = False
for key_moment in key_moments_json["key_moments"]:
if "keywords" not in key_moment:
transcript = key_moment["transcript"]
key_moment["keywords"] = generate_key_moments_keywords(transcript)
print("===keywords===")
print(key_moment["keywords"])
print("===keywords===")
has_keywords_added = True
if has_keywords_added:
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
key_moments_json = json.loads(key_moments_text)
elif source == "drive":
print("===get_key_moments on drive===")
service = init_drive_service()
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
file_name = f'{video_id}_key_moments.json'
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
exists, file_id = check_file_exists(service, folder_id, file_name)
if not exists:
key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
key_moments_json = {"key_moments": key_moments}
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
upload_content_directly(service, file_name, folder_id, key_moments_text)
print("key_momentsๅทฒไธŠๅ‚ณๅˆฐGoogle Drive")
else:
# key_momentsๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("key_momentsๅทฒๅญ˜ๅœจไบŽGoogle Driveไธญ")
key_moments_text = download_file_as_string(service, file_id)
key_moments_json = json.loads(key_moments_text)
return key_moments_json
def generate_key_moments(formatted_simple_transcript, formatted_transcript):
# ไฝฟ็”จ OpenAI ็”ŸๆˆๅŸบไบŽไธŠไผ ๆ•ฐๆฎ็š„้—ฎ้ข˜
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {formatted_simple_transcript} ๆ–‡ๆœฌ๏ผŒๆๅ–ๅ‡บ้‡้ปžๆ‘˜่ฆ๏ผŒไธฆ็ตฆๅ‡บๅฐๆ‡‰็š„ๆ™‚้–“่ปธ
1. ๅฐ็ฏ„ๅœๅˆ‡ๅ‡บไธๅŒๆฎต่ฝ็š„็›ธๅฐๆ‡‰ๆ™‚้–“่ปธ็š„้‡้ปžๆ‘˜่ฆ๏ผŒ
2. ๆฏไธ€ๅฐๆฎตๆœ€ๅคšไธ่ถ…้Ž 1/5 ็š„็ธฝๅ…งๅฎน๏ผŒไนŸๅฐฑๆ˜ฏๅคง็ด„ 3~5ๆฎต็š„้‡้ปž๏ผˆไพ‹ๅฆ‚ไบ”๏ฝžๅๅˆ†้˜็š„ๅฝฑ็‰‡ๅฐฑไธ€ๆฎตๅคง็ด„1~2ๅˆ†้˜๏ผŒๆœ€ๅคšไธ‰ๅˆ†้˜๏ผŒไฝ†ๅฆ‚ๆžœๆ˜ฏ่ถ…้Žๅๅˆ†้˜็š„ๅฝฑ็‰‡๏ผŒ้‚ฃไธ€ๅฐๆฎตๅคง็ด„ 2~3ๅˆ†้˜๏ผŒไปฅๆญค้กžๆŽจ๏ผ‰
3. ๆณจๆ„ไธ่ฆ้บๆผไปปไฝ•ไธ€ๆฎตๆ™‚้–“่ปธ็š„ๅ…งๅฎน ๅพž้›ถ็ง’้–‹ๅง‹
4. ๅฆ‚ๆžœ้ ญๅฐพ็š„ๆƒ…็ฏ€ไธๆ˜ฏ้‡้ปž๏ผŒๅฐฑไฝตๅ…ฅๅˆฐ้™„่ฟ‘็š„ๆฎต่ฝ๏ผŒ็‰นๅˆฅๆ˜ฏๆ‰“ๆ‹›ๅ‘ผๆˆ–ๆ˜ฏไป‹็ดนไบบ็‰ฉๅฐฑๆ˜ฏไธ้‡่ฆ็š„ๆƒ…็ฏ€
5. transcript ้€ๅญ—็จฟ็š„้›†ๅˆ๏ผˆ่ฆๆœ‰ๅˆ็†็š„ๆจ™้ปž็ฌฆ่™Ÿ)๏ผŒ่ฆๅฎŒๆ•ด่ทŸๅŽŸไพ†็š„ไธ€ๆจฃ๏ผŒไธ่ฆ็œ็•ฅ
ไปฅ้€™็จฎๆ–นๅผๅˆ†ๆžๆ•ดๅ€‹ๆ–‡ๆœฌ๏ผŒๅพž้›ถ็ง’้–‹ๅง‹ๅˆ†ๆž๏ผŒ็›ดๅˆฐ็ตๆŸใ€‚้€™ๅพˆ้‡่ฆ
6. ้—œ้ตๅญ—ๅพžtranscript extract to keyword๏ผŒไฟ็•™ๅฐˆๅฎถๅๅญ—ใ€ๅฐˆๆฅญ่ก“่ชžใ€ๅนดไปฝใ€ๆ•ธๅญ—ใ€ๆœŸๅˆŠๅ็จฑใ€ๅœฐๅใ€ๆ•ธๅญธๅ…ฌๅผ
7. text, transcript, keywords please use or transfer zh-TW, it's very important
ไธฆ็”จ JSON ๆ ผๅผ่ฟ”ๅ›ž key_moments:[{{
"start": "00:00",
"end": "01:00",
"text": "้€ๅญ—็จฟ็š„้‡้ปžๆ‘˜่ฆ",
"transcript": "้€ๅญ—็จฟ็š„้›†ๅˆ๏ผˆ่ฆๆœ‰ๅˆ็†็š„ๆจ™้ปž็ฌฆ่™Ÿ)๏ผŒ่ฆๅฎŒๆ•ด่ทŸๅŽŸไพ†็š„ไธ€ๆจฃ๏ผŒไธ่ฆ็œ็•ฅ",
"keywords": ["้—œ้ตๅญ—", "้—œ้ตๅญ—"]
}}]
"""
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
response_format = { "type": "json_object" }
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4096,
"response_format": response_format
}
try:
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
print("===response===")
print(dict(response))
key_moments = json.loads(response.choices[0].message.content)["key_moments"]
except Exception as e:
error_msg = f" {video_id} ้—œ้ตๆ™‚ๅˆป้Œฏ่ชค: {str(e)}"
print("===generate_key_moments error===")
print(error_msg)
print("===generate_key_moments error===")
raise Exception(error_msg)
print("=====key_moments=====")
print(key_moments)
print("=====key_moments=====")
image_links = {entry['start_time']: entry['screenshot_path'] for entry in formatted_transcript}
for moment in key_moments:
start_time = moment['start']
end_time = moment['end']
moment_images = [image_links[time] for time in image_links if start_time <= time <= end_time]
moment['images'] = moment_images
return key_moments
def generate_key_moments_keywords(transcript):
system_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹ๆ นๆ“šไปฅไธ‹ๆ–‡ๆœฌๆๅ–้—œ้ตๅญ—"
user_content = f"""transcript extract to keyword
ไฟ็•™ๅฐˆๅฎถๅๅญ—ใ€ๅฐˆๆฅญ่ก“่ชžใ€ๅนดไปฝใ€ๆ•ธๅญ—ใ€ๆœŸๅˆŠๅ็จฑใ€ๅœฐๅใ€ๆ•ธๅญธๅ…ฌๅผใ€ๆ•ธๅญธ่กจ็คบๅผใ€็‰ฉ็†ๅŒ–ๅญธ็ฌฆ่™Ÿ๏ผŒ
ไธ็”จ็ตฆไธŠไธ‹ๆ–‡๏ผŒ็›ดๆŽฅ็ตฆๅ‡บ้—œ้ตๅญ—๏ผŒไฝฟ็”จ zh-TW๏ผŒ็”จ้€—่™Ÿๅˆ†้š”๏ผŒ example: ้—œ้ตๅญ—1, ้—œ้ตๅญ—2
transcript๏ผš{transcript}
"""
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 100,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
keywords = response.choices[0].message.content.strip().split(", ")
return keywords
def get_key_moments_html(key_moments):
css = """
<style>
#gallery-main {
display: flex;
align-items: center;
margin-bottom: 20px;
}
#gallery {
position: relative;
width: 50%;
flex: 1;
}
#text-content {
flex: 2;
margin-left: 20px;
}
#gallery #gallery-container{
position: relative;
width: 100%;
height: 0px;
padding-bottom: 56.7%; /* 16/9 ratio */
background-color: blue;
}
#gallery #gallery-container #gallery-content{
position: absolute;
top: 0px;
right: 0px;
bottom: 0px;
left: 0px;
height: 100%;
display: flex;
scroll-snap-type: x mandatory;
overflow-x: scroll;
scroll-behavior: smooth;
}
#gallery #gallery-container #gallery-content .gallery__item{
width: 100%;
height: 100%;
flex-shrink: 0;
scroll-snap-align: start;
scroll-snap-stop: always;
position: relative;
}
#gallery #gallery-container #gallery-content .gallery__item img{
display: block;
width: 100%;
height: 100%;
object-fit: contain;
background-color: white;
}
.click-zone{
position: absolute;
width: 20%;
height: 100%;
z-index: 3;
}
.click-zone.click-zone-prev{
left: 0px;
}
.click-zone.click-zone-next{
right: 0px;
}
#gallery:not(:hover) .arrow{
opacity: 0.8;
}
.arrow{
text-align: center;
z-index: 3;
position: absolute;
display: block;
width: 25px;
height: 25px;
line-height: 25px;
background-color: black;
border-radius: 50%;
text-decoration: none;
color: black;
opacity: 0.8;
transition: opacity 200ms ease;
}
.arrow:hover{
opacity: 1;
}
.arrow span{
position: relative;
top: 2px;
}
.arrow.arrow-prev{
top: 50%;
left: 5px;
}
.arrow.arrow-next{
top: 50%;
right: 5px;
}
.arrow.arrow-disabled{
opacity:0.8;
}
#text-content {
padding: 0px 36px;
}
#text-content p {
margin-top: 10px;
}
body{
font-family: sans-serif;
margin: 0px;
padding: 0px;
}
main{
padding: 0px;
margin: 0px;
max-width: 900px;
margin: auto;
}
.hidden{
border: 0;
clip: rect(0 0 0 0);
height: 1px;
margin: -1px;
overflow: hidden;
padding: 0;
position: absolute;
width: 1px;
}
@media (max-width: 768px) {
#gallery-main {
flex-direction: column; /* ๅœจๅฐๅฑๅน•ไธŠๅ †ๅ ๅ…ƒ็ด  */
}
#gallery {
width: 100%; /* ่ฎฉ็”ปๅปŠๅ ๆปกๆ•ดไธชๅฎนๅ™จๅฎฝๅบฆ */
}
#text-content {
margin-left: 0; /* ็งป้™คๅทฆ่พน่ท๏ผŒ่ฎฉๆ–‡ๆœฌๅ†…ๅฎนๅ ๆปกๅฎฝๅบฆ */
margin-top: 20px; /* ไธบๆ–‡ๆœฌๅ†…ๅฎนๆทปๅŠ ้กถ้ƒจ้—ด่ท */
}
#gallery #gallery-container {
height: 350px; /* ๆˆ–่€…ไฝ ๅฏไปฅ่ฎพ็ฝฎไธ€ไธชๅ›บๅฎš็š„้ซ˜ๅบฆ๏ผŒ่€Œไธๆ˜ฏ็”จ padding-bottom */
padding-bottom: 0; /* ็งป้™คๅบ•้ƒจๅกซๅ…… */
}
}
</style>
"""
key_moments_html = css
for i, moment in enumerate(key_moments):
images = moment['images']
image_elements = ""
for j, image in enumerate(images):
current_id = f"img_{i}_{j}"
prev_id = f"img_{i}_{j-1}" if j-1 >= 0 else f"img_{i}_{len(images)-1}"
next_id = f"img_{i}_{j+1}" if j+1 < len(images) else f"img_{i}_0"
image_elements += f"""
<div id="{current_id}" class="gallery__item">
<a href="#{prev_id}" class="click-zone click-zone-prev">
<div class="arrow arrow-disabled arrow-prev"> < </div>
</a>
<a href="#{next_id}" class="click-zone click-zone-next">
<div class="arrow arrow-next"> > </div>
</a>
<img src="{image}">
</div>
"""
gallery_content = f"""
<div id="gallery-content">
{image_elements}
</div>
"""
key_moments_html += f"""
<div class="gallery-container" id="gallery-main">
<div id="gallery"><!-- gallery start -->
<div id="gallery-container">
{gallery_content}
</div>
</div>
<div id="text-content">
<h3>{moment['start']} - {moment['end']}</h3>
<p><strong>ๆ‘˜่ฆ: {moment['text']} </strong></p>
<p>ๅ…งๅฎน: {moment['transcript']}</p>
</div>
</div>
"""
return key_moments_html
# ---- LLM CRUD ----
def get_LLM_content(video_id, kind):
print(f"===get_{kind}===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_{kind}.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅ file ๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if is_file_exists:
content = download_blob_to_string(gcs_client, bucket_name, blob_name)
content_json = json.loads(content)
if kind == "reading_passage":
content_text = content_json["reading_passage"]
elif kind == "summary":
content_text = content_json["summary"]
else:
content_text = json.dumps(content_json, ensure_ascii=False, indent=2)
else:
content_text = ""
return content_text
def enable_edit_mode():
return gr.update(interactive=True)
def delete_LLM_content(video_id, kind):
print(f"===delete_{kind}===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_{kind}.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅ file ๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if is_file_exists:
delete_blob(gcs_client, bucket_name, blob_name)
print(f"{file_name}ๅทฒไปŽGCSไธญๅˆ ้™ค")
return gr.update(value="", interactive=False)
def update_LLM_content(video_id, new_content, kind):
print(f"===upfdate kind on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_{kind}.json'
blob_name = f"{video_id}/{file_name}"
if kind == "reading_passage":
print("=========reading_passage=======")
print(new_content)
reading_passage_json = {"reading_passage": str(new_content)}
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
updated_content = new_content
elif kind == "summary":
summary_json = {"summary": str(new_content)}
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
updated_content = new_content
elif kind == "mind_map":
mind_map_json = {"mind_map": str(new_content)}
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
updated_content = mind_map_text
elif kind == "key_moments":
# from update_LLM_btn -> new_content is a string
# create_LLM_content -> new_content is a list
if isinstance(new_content, str):
key_moments_list = json.loads(new_content)
else:
key_moments_list = new_content
key_moments_json = {"key_moments": key_moments_list}
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
updated_content = key_moments_text
elif kind == "transcript":
if isinstance(new_content, str):
transcript_json = json.loads(new_content)
else:
transcript_json = new_content
transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
updated_content = transcript_text
elif kind == "questions":
# from update_LLM_btn -> new_content is a string
# create_LLM_content -> new_content is a list
if isinstance(new_content, str):
questions_json = json.loads(new_content)
else:
questions_json = new_content
questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
updated_content = questions_text
print(f"{kind} ๅทฒๆ›ดๆ–ฐๅˆฐGCS")
return gr.update(value=updated_content, interactive=False)
def create_LLM_content(video_id, df_string, kind):
print(f"===create_{kind}===")
print(f"video_id: {video_id}")
if kind == "reading_passage":
content = generate_reading_passage(df_string)
update_LLM_content(video_id, content, kind)
elif kind == "summary":
content = generate_summarise(df_string)
update_LLM_content(video_id, content, kind)
elif kind == "mind_map":
content = generate_mind_map(df_string)
update_LLM_content(video_id, content, kind)
elif kind == "key_moments":
if isinstance(df_string, str):
transcript = json.loads(df_string)
else:
transcript = df_string
formatted_simple_transcript = create_formatted_simple_transcript(transcript)
formatted_transcript = create_formatted_transcript(video_id, transcript)
content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
update_LLM_content(video_id, content, kind)
content = json.dumps(content, ensure_ascii=False, indent=2)
elif kind == "transcript":
content = process_transcript_and_screenshots_on_gcs(video_id)
update_LLM_content(video_id, content, kind)
content = json.dumps(content, ensure_ascii=False, indent=2)
elif kind == "questions":
content = generate_questions(df_string)
update_LLM_content(video_id, content, kind)
content = json.dumps(content, ensure_ascii=False, indent=2)
return gr.update(value=content, interactive=False)
# ---- LLM refresh CRUD ----
def reading_passage_add_latex_version(video_id):
# ็ขบ่ช GCS ๆ˜ฏๅฆๆœ‰ reading_passage.json
print("===reading_passage_convert_to_latex===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_reading_passage.json'
blob_name = f"{video_id}/{file_name}"
print(f"blob_name: {blob_name}")
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
raise gr.Error("reading_passage ไธๅญ˜ๅœจ!")
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("reading_passage ๅทฒๅญ˜ๅœจไบŽGCSไธญ๏ผŒ่ฝ‰ๆ› Latex ๆจกๅผ")
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
reading_passage_json = json.loads(reading_passage_text)
original_reading_passage = reading_passage_json["reading_passage"]
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {original_reading_passage}
ๆ•˜่ฟฐไธญ๏ผŒ่ซ‹ๆŠŠๆ•ธๅญธๆˆ–ๆ˜ฏๅฐˆๆฅญ่ก“่ชž๏ผŒ็”จ Latex ๅŒ…่ฆ†๏ผˆ$...$๏ผ‰๏ผŒ็›ก้‡ไธ่ฆๅŽปๆ”นๅŽŸๆœฌ็š„ๆ–‡็ซ 
ๅŠ ๆธ›ไน˜้™คใ€ๆ น่™Ÿใ€ๆฌกๆ–นใ€ๅŒ–ๅญธ็ฌฆ่™Ÿใ€็‰ฉ็†็ฌฆ่™Ÿ็ญ‰็ญ‰็š„้‹็ฎ—ๅผๅฃ่ชžไนŸๆ›ๆˆ LATEX ็ฌฆ่™Ÿ
่ซ‹ไธ€ๅฎš่ฆไฝฟ็”จ็น้ซ”ไธญๆ–‡ zh-TW๏ผŒไธฆ็”จๅฐ็ฃไบบ็š„ๅฃ่ชž
็”ข็”Ÿ็š„็ตๆžœไธ่ฆๅ‰ๅพŒๆ–‡่งฃ้‡‹๏ผŒไนŸไธ่ฆๆ•˜่ฟฐ้€™็ฏ‡ๆ–‡็ซ ๆ€Ž้บผ็”ข็”Ÿ็š„
ๅช้œ€่ฆๅฐˆๆณจๆไพ› Reading Passage๏ผŒๅญ—ๆ•ธๅœจ 200~500 ๅญ—ไปฅๅ…ง
"""
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
new_reading_passage = response.choices[0].message.content.strip()
print("=====new_reading_passage=====")
print(new_reading_passage)
print("=====new_reading_passage=====")
reading_passage_json["reading_passage"] = new_reading_passage
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
# ๅฆๅญ˜็‚บ reading_passage_latex.json
new_file_name = f'{video_id}_reading_passage_latex.json'
new_blob_name = f"{video_id}/{new_file_name}"
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text)
return new_reading_passage
def summary_add_markdown_version(video_id):
# ็ขบ่ช GCS ๆ˜ฏๅฆๆœ‰ summary.json
print("===summary_convert_to_markdown===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_summary.json'
blob_name = f"{video_id}/{file_name}"
print(f"blob_name: {blob_name}")
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
raise gr.Error("summary ไธๅญ˜ๅœจ!")
# ้€ๅญ—็จฟๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝ้€ๅญ—็จฟๅ†…ๅฎน
print("summary ๅทฒๅญ˜ๅœจไบŽGCSไธญ๏ผŒ่ฝ‰ๆ› Markdown ๆจกๅผ")
summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
summary_json = json.loads(summary_text)
original_summary = summary_json["summary"]
sys_content = "ไฝ ๆ˜ฏไธ€ๅ€‹ๆ“…้•ท่ณ‡ๆ–™ๅˆ†ๆž่ทŸๅฝฑ็‰‡ๆ•™ๅญธ็š„่€ๅธซ๏ผŒuser ็‚บๅญธ็”Ÿ๏ผŒ่ซ‹็ฒพ่ฎ€่ณ‡ๆ–™ๆ–‡ๆœฌ๏ผŒ่‡ช่กŒๅˆคๆ–ท่ณ‡ๆ–™็š„็จฎ้กž๏ผŒไฝฟ็”จ zh-TW"
user_content = f"""
่ซ‹ๆ นๆ“š {original_summary}
่ฝ‰ๆ›ๆ ผๅผ็‚บ Markdown
ๅชไฟ็•™๏ผš๐Ÿ“š ๆ•ด้ซ”ๆ‘˜่ฆใ€๐Ÿ”– ้‡้ปžๆฆ‚ๅฟตใ€๐Ÿ’ก ็‚บไป€้บผๆˆ‘ๅ€‘่ฆๅญธ้€™ๅ€‹ใ€โ“ ๅปถไผธๅฐๅ•้กŒ
ๅ…ถไป–็š„ไธ่ฆไฟ็•™
ๆ•ด้ซ”ๆ‘˜่ฆๅœจไธ€็™พๅญ—ไปฅๅ…ง
้‡้ปžๆฆ‚ๅฟต่ฝ‰ๆˆ bullet points
ไปฅๅŠๅฏ่ƒฝ็š„็ต่ซ–่ˆ‡็ตๅฐพๅปถไผธๅฐๅ•้กŒๆไพ›ๅญธ็”Ÿไฝœๅๆ€
ๆ•˜่ฟฐไธญ๏ผŒ่ซ‹ๆŠŠๆ•ธๅญธๆˆ–ๆ˜ฏๅฐˆๆฅญ่ก“่ชž๏ผŒ็”จ Latex ๅŒ…่ฆ†๏ผˆ$...$๏ผ‰
ๅŠ ๆธ›ไน˜้™คใ€ๆ น่™Ÿใ€ๆฌกๆ–น็ญ‰็ญ‰็š„้‹็ฎ—ๅผๅฃ่ชžไนŸๆ›ๆˆ LATEX ๆ•ธๅญธ็ฌฆ่™Ÿ
ๆ•ด้ซ”ๆ ผๅผ็‚บ๏ผš
## ๐Ÿ“š ๆ•ด้ซ”ๆ‘˜่ฆ
- (ไธ€ๅ€‹ bullet point....)
## ๐Ÿ”– ้‡้ปžๆฆ‚ๅฟต
- xxx
- xxx
- xxx
## ๐Ÿ’ก ็‚บไป€้บผๆˆ‘ๅ€‘่ฆๅญธ้€™ๅ€‹๏ผŸ
- (ไธ€ๅ€‹ bullet point....)
## โ“ ๅปถไผธๅฐๅ•้กŒ
- (ไธ€ๅ€‹ bullet point....)
"""
messages = [
{"role": "system", "content": sys_content},
{"role": "user", "content": user_content}
]
request_payload = {
"model": "gpt-4-turbo",
"messages": messages,
"max_tokens": 4000,
}
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
new_summary = response.choices[0].message.content.strip()
print("=====new_summary=====")
print(new_summary)
print("=====new_summary=====")
summary_json["summary"] = new_summary
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
# ๅฆๅญ˜็‚บ summary_markdown.json
new_file_name = f'{video_id}_summary_markdown.json'
new_blob_name = f"{video_id}/{new_file_name}"
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text)
return new_summary
# AI ็”Ÿๆˆๆ•™ๅญธ็ด ๆ
def get_meta_data(video_id, source="gcs"):
if source == "gcs":
print("===get_meta_data on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_meta_data.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
meta_data_json = {
"subject": "",
"grade": "",
}
print("meta_data empty return")
else:
# meta_dataๅทฒๅญ˜ๅœจ๏ผŒไธ‹่ฝฝๅ†…ๅฎน
print("meta_dataๅทฒๅญ˜ๅœจไบŽGCSไธญ")
meta_data_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
meta_data_json = json.loads(meta_data_text)
# meta_data_json grade ๆ•ธๅญ—่ฝ‰ๆ›ๆˆๆ–‡ๅญ—
grade = meta_data_json["grade"]
case = {
1: "ไธ€ๅนด็ดš",
2: "ไบŒๅนด็ดš",
3: "ไธ‰ๅนด็ดš",
4: "ๅ››ๅนด็ดš",
5: "ไบ”ๅนด็ดš",
6: "ๅ…ญๅนด็ดš",
7: "ไธƒๅนด็ดš",
8: "ๅ…ซๅนด็ดš",
9: "ไนๅนด็ดš",
10: "ๅๅนด็ดš",
11: "ๅไธ€ๅนด็ดš",
12: "ๅไบŒๅนด็ดš",
}
grade_text = case.get(grade, "")
meta_data_json["grade"] = grade_text
return meta_data_json
def get_ai_content(password, video_id, df_string, topic, grade, level, specific_feature, content_type, source="gcs"):
verify_password(password)
if source == "gcs":
print("===get_ai_content on gcs===")
gcs_client = GCS_CLIENT
bucket_name = 'video_ai_assistant'
file_name = f'{video_id}_ai_content_list.json'
blob_name = f"{video_id}/{file_name}"
# ๆฃ€ๆŸฅๆช”ๆกˆๆ˜ฏๅฆๅญ˜ๅœจ
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
if not is_file_exists:
# ๅ…ˆๅปบ็ซ‹ไธ€ๅ€‹ ai_content_list.json
ai_content_list = []
ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
print("ai_content_list [] ๅทฒไธŠๅ‚ณๅˆฐGCS")
# ๆญคๆ™‚ ai_content_list ๅทฒๅญ˜ๅœจ
ai_content_list_string = download_blob_to_string(gcs_client, bucket_name, blob_name)
ai_content_list = json.loads(ai_content_list_string)
# by key ๆ‰พๅˆฐ ai_content ๏ผˆtopic, grade, level, specific_feature, content_type๏ผ‰
target_kvs = {
"video_id": video_id,
"level": level,
"specific_feature": specific_feature,
"content_type": content_type
}
ai_content_json = [
item for item in ai_content_list
if all(item[k] == v for k, v in target_kvs.items())
]
if len(ai_content_json) == 0:
ai_content, prompt = generate_ai_content(password, df_string, topic, grade, level, specific_feature, content_type)
ai_content_json = {
"video_id": video_id,
"content": str(ai_content),
"prompt": prompt,
"level": level,
"specific_feature": specific_feature,
"content_type": content_type
}
ai_content_list.append(ai_content_json)
ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
print("ai_contentๅทฒไธŠๅ‚ณๅˆฐGCS")
else:
ai_content_json = ai_content_json[-1]
ai_content = ai_content_json["content"]
prompt = ai_content_json["prompt"]
return ai_content, ai_content, prompt, prompt
def generate_ai_content(password, df_string, topic, grade, level, specific_feature, content_type):
verify_password(password)
material = EducationalMaterial(df_string, topic, grade, level, specific_feature, content_type)
prompt = material.generate_content_prompt()
user_content = material.build_user_content()
messages = material.build_messages(user_content)
ai_model_name = "gpt-4-turbo"
request_payload = {
"model": ai_model_name,
"messages": messages,
"max_tokens": 4000 # ไธพไพ‹๏ผŒๅฎž้™…ไธŠๆ‚จๅฏ่ƒฝ้œ€่ฆๆ›ด่ฏฆ็ป†็š„้…็ฝฎ
}
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload)
return ai_content, prompt
def generate_exam_fine_tune_result(password, exam_result_prompt , df_string_output, exam_result, exam_result_fine_tune_prompt):
verify_password(password)
material = EducationalMaterial(df_string_output, "", "", "", "", "")
user_content = material.build_fine_tune_user_content(exam_result_prompt, exam_result, exam_result_fine_tune_prompt)
messages = material.build_messages(user_content)
ai_model_name = "gpt-4-turbo"
request_payload = {
"model": ai_model_name,
"messages": messages,
"max_tokens": 4000 # ไธพไพ‹๏ผŒๅฎž้™…ไธŠๆ‚จๅฏ่ƒฝ้œ€่ฆๆ›ด่ฏฆ็ป†็š„้…็ฝฎ
}
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload)
return ai_content
def return_original_exam_result(exam_result_original):
return exam_result_original
def create_word(content):
unique_filename = str(uuid.uuid4())
word_file_path = f"/tmp/{unique_filename}.docx"
doc = Document()
doc.add_paragraph(content)
doc.save(word_file_path)
return word_file_path
def download_exam_result(content):
word_path = create_word(content)
return word_path
# ---- Chatbot ----
def get_instructions(content_subject, content_grade, key_moments):
instructions = f"""
subject: {content_subject}
grade: {content_grade}
context: {key_moments}
Assistant Role: you are a {content_subject} teacher
User Role: {content_grade} th-grade student.
Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
Response:
- Single question, under 100 characters
- include math symbols (use LaTeX $ to cover before and after, ex: $x^2$)
- hint with video timestamp which format ใ€ๅƒ่€ƒ๏ผš00:00:00ใ€‘.
- Sometimes encourage user by Taiwanese style with relaxing atmosphere.
- if user ask questions not include in context,
- just tell them to ask the question in context and give them example question.
Restrictions: Answer within video content, no external references
"""
return instructions
def chat_with_ai(ai_name, password, video_id, user_data, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
verify_password(password)
print("=====user_data=====")
print(f"user_data: {user_data}")
if chat_history is not None and len(chat_history) > 11:
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ…้ŽไธŠ้™๏ผˆๅฐ่ฉฑไธ€่ผช10ๆฌก๏ผ‰"
raise gr.Error(error_msg)
if not ai_name in ["jutor", "claude3", "groq"]:
ai_name = "jutor"
if ai_name == "jutor":
ai_client = ""
elif ai_name == "claude3":
ai_client = BEDROCK_CLIENT
elif ai_name == "groq":
ai_client = GROQ_CLIENT
else:
ai_client = ""
if isinstance(trascript_state, str):
simple_transcript = json.loads(trascript_state)
else:
simple_transcript = trascript_state
if isinstance(key_moments, str):
key_moments_json = json.loads(key_moments)
else:
key_moments_json = key_moments
# key_moments_json remove images
for moment in key_moments_json:
moment.pop('images', None)
moment.pop('end', None)
moment.pop('transcript', None)
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
instructions = get_instructions(content_subject, content_grade, key_moments_text)
chatbot_config = {
"video_id": video_id,
"transcript": simple_transcript,
"key_moments": key_moments,
"content_subject": content_subject,
"content_grade": content_grade,
"jutor_chat_key": JUTOR_CHAT_KEY,
"ai_name": ai_name,
"ai_client": ai_client,
"instructions": instructions
}
try:
chatbot = Chatbot(chatbot_config)
response_completion = chatbot.chat(user_message, chat_history, socratic_mode, ai_name)
except Exception as e:
print(f"Error: {e}")
response_completion = "ๅญธ็ฟ’็ฒพ้ˆๆœ‰้ปž็ดฏ๏ผŒ่ซ‹็จๅพŒๅ†่ฉฆ๏ผ"
try:
# ๆ›ดๆ–ฐ่ŠๅคฉๅŽ†ๅฒ
new_chat_history = (user_message, response_completion)
if chat_history is None:
chat_history = [new_chat_history]
else:
chat_history.append(new_chat_history)
# ่ฟ”ๅ›ž่ŠๅคฉๅŽ†ๅฒๅ’Œ็ฉบๅญ—็ฌฆไธฒๆธ…็ฉบ่พ“ๅ…ฅๆก†
return "", chat_history
except Exception as e:
# ๅค„็†้”™่ฏฏๆƒ…ๅ†ต
print(f"Error: {e}")
return "่ฏทๆฑ‚ๅคฑ่ดฅ๏ผŒ่ฏท็จๅŽๅ†่ฏ•๏ผ", chat_history
def chat_with_opan_ai_assistant(password, youtube_id, user_data, thread_id, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
verify_password(password)
print("=====user_data=====")
print(f"user_data: {user_data}")
# ๅ…ˆ่จˆ็ฎ— user_message ๆ˜ฏๅฆ่ถ…้Ž 500 ๅ€‹ๅญ—
if len(user_message) > 1500:
error_msg = "ไฝ ็š„่จŠๆฏๅคช้•ทไบ†๏ผŒ่ซ‹็ธฎ็Ÿญ่จŠๆฏ้•ทๅบฆ่‡ณไบ”็™พๅญ—ไปฅๅ…ง"
raise gr.Error(error_msg)
# ๅฆ‚ๆžœ chat_history ่ถ…้Ž 10 ๅ‰‡่จŠๆฏ๏ผŒ็›ดๆŽฅ return "ๅฐ่ฉฑ่ถ…้ŽไธŠ้™"
if chat_history is not None and len(chat_history) > 10:
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ…้ŽไธŠ้™๏ผˆๅฐ่ฉฑไธ€่ผช10ๆฌก๏ผ‰"
raise gr.Error(error_msg)
try:
assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo
# assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo
client = OPEN_AI_CLIENT
# ็›ดๆŽฅๅฎ‰ๆŽ’้€ๅญ—็จฟ่ณ‡ๆ–™ in instructions
# if isinstance(trascript_state, str):
# trascript_json = json.loads(trascript_state)
# else:
# trascript_json = trascript_state
# # ็งป้™ค embed_url, screenshot_path
# for entry in trascript_json:
# entry.pop('end_time', None)
# trascript_text = json.dumps(trascript_json, ensure_ascii=False)
if isinstance(key_moments, str):
key_moments_json = json.loads(key_moments)
else:
key_moments_json = key_moments
# key_moments_json remove images
for moment in key_moments_json:
moment.pop('images', None)
moment.pop('end', None)
moment.pop('transcript', None)
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
instructions = get_instructions(content_subject, content_grade, key_moments_text)
print("=== instructions ===")
print(instructions)
# ๅˆ›ๅปบ็บฟ็จ‹
if not thread_id:
thread = client.beta.threads.create()
thread_id = thread.id
else:
thread = client.beta.threads.retrieve(thread_id)
# ๅ‘็บฟ็จ‹ๆทปๅŠ ็”จๆˆท็š„ๆถˆๆฏ
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_message + "/n ่ซ‹ๅšดๆ ผ้ตๅพชinstructions๏ผŒๆ“”ไปปไธ€ไฝ่˜‡ๆ ผๆ‹‰ๅบ•ๅฎถๆ•™๏ผŒ็ต•ๅฐไธ่ฆ้‡่ค‡ user ็š„ๅ•ๅฅ๏ผŒ่ซ‹็”จๅผ•ๅฐŽ็š„ๆ–นๅผๆŒ‡ๅผ•ๆ–นๅ‘๏ผŒ่ซ‹ไธ€ๅฎš่ฆ็”จ็น้ซ”ไธญๆ–‡ๅ›ž็ญ” zh-TW๏ผŒไธฆ็”จๅฐ็ฃไบบ็š„็ฆฎ่ฒŒๅฃ่ชž่กจ้”๏ผŒๅ›ž็ญ”ๆ™‚ไธ่ฆ็‰นๅˆฅ่ชชๆ˜Ž้€™ๆ˜ฏๅฐ็ฃไบบ็š„่ชžๆฐฃ๏ผŒ่ซ‹ๅœจๅ›ž็ญ”็š„ๆœ€ๅพŒๆจ™่จปใ€ๅƒ่€ƒ๏ผš๏ผˆๆ™‚๏ผ‰:๏ผˆๅˆ†๏ผ‰:๏ผˆ็ง’๏ผ‰ใ€‘๏ผŒ๏ผˆๅฆ‚ๆžœๆ˜ฏๅๅ•ๅญธ็”Ÿ๏ผŒๅฐฑๅชๅ•ไธ€ๅ€‹ๅ•้กŒ๏ผŒ่ซ‹ๅนซๅŠฉๅญธ็”Ÿๆ›ดๅฅฝ็š„็†่งฃ่ณ‡ๆ–™๏ผŒๅญ—ๆ•ธๅœจ100ๅญ—ไปฅๅ…ง๏ผŒๅ›ž็ญ”ๆ™‚่ซ‹็”จๆ•ธๅญธ็ฌฆ่™Ÿไปฃๆ›ฟๆ–‡ๅญ—๏ผˆLatex ็”จ $ ๅญ—่™Ÿ render, ex: $x^2$)"
)
# ่ฟ่กŒๅŠฉๆ‰‹๏ผŒ็”Ÿๆˆๅ“ๅบ”
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id,
instructions=instructions,
)
# ็ญ‰ๅพ…ๅŠฉๆ‰‹ๅ“ๅบ”๏ผŒ่ฎพๅฎšๆœ€ๅคง็ญ‰ๅพ…ๆ—ถ้—ดไธบ 30 ็ง’
run_status = poll_run_status(run.id, thread.id, timeout=30)
# ่Žทๅ–ๅŠฉๆ‰‹็š„ๅ“ๅบ”ๆถˆๆฏ
if run_status == "completed":
messages = client.beta.threads.messages.list(thread_id=thread.id)
# [MessageContentText(text=Text(annotations=[], value='ๆ‚จๅฅฝ๏ผๆœ‰ไป€้บผๆˆ‘ๅฏไปฅๅนซๅŠฉๆ‚จ็š„ๅ—Ž๏ผŸๅฆ‚ๆžœๆœ‰ไปปไฝ•ๅ•้กŒๆˆ–้œ€่ฆๆŒ‡ๅฐŽ๏ผŒ่ซ‹้šจๆ™‚ๅ‘Š่จดๆˆ‘๏ผ'), type='text')]
response_text = messages.data[0].content[0].text.value
else:
response_text = "ๅญธ็ฟ’็ฒพ้ˆๆœ‰้ปž็ดฏ๏ผŒ่ซ‹็จๅพŒๅ†่ฉฆ๏ผ"
# ๆ›ดๆ–ฐ่ŠๅคฉๅŽ†ๅฒ
new_chat_history = (user_message, response_text)
if chat_history is None:
chat_history = [new_chat_history]
else:
chat_history.append(new_chat_history)
except Exception as e:
print(f"Error: {e}")
raise gr.Error(f"Error: {e}")
# ่ฟ”ๅ›ž่ŠๅคฉๅŽ†ๅฒๅ’Œ็ฉบๅญ—็ฌฆไธฒๆธ…็ฉบ่พ“ๅ…ฅๆก†
return "", chat_history, thread.id
def process_open_ai_audio_to_chatbot(password, audio_url):
verify_password(password)
if audio_url:
with open(audio_url, "rb") as audio_file:
file_size = os.path.getsize(audio_url)
if file_size > 2000000:
raise gr.Error("ๆช”ๆกˆๅคงๅฐ่ถ…้Ž๏ผŒ่ซ‹ไธ่ฆ่ถ…้Ž 60็ง’")
else:
response = OPEN_AI_CLIENT.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
# response ๆ‹†่งฃ dict
print("=== response ===")
print(response)
print("=== response ===")
else:
response = ""
return response
def poll_run_status(run_id, thread_id, timeout=600, poll_interval=5):
"""
Polls the status of a Run and handles different statuses appropriately.
:param run_id: The ID of the Run to poll.
:param thread_id: The ID of the Thread associated with the Run.
:param timeout: Maximum time to wait for the Run to complete, in seconds.
:param poll_interval: Time to wait between each poll, in seconds.
"""
client = OPEN_AI_CLIENT
start_time = time.time()
while time.time() - start_time < timeout:
run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
if run.status in ["completed", "cancelled", "failed"]:
print(f"Run completed with status: {run.status}")
break
elif run.status == "requires_action":
print("Run requires action. Performing required action...")
# Here, you would perform the required action, e.g., running functions
# and then submitting the outputs. This is simplified for this example.
# After performing the required action, you'd complete the action:
# OPEN_AI_CLIENT.beta.threads.runs.complete_required_action(...)
elif run.status == "expired":
print("Run expired. Exiting...")
break
else:
print(f"Run status is {run.status}. Waiting for updates...")
time.sleep(poll_interval)
else:
print("Timeout reached. Run did not complete in the expected time.")
# Once the Run is completed, handle the result accordingly
if run.status == "completed":
# Retrieve and handle messages or run steps as needed
messages = client.beta.threads.messages.list(thread_id=thread_id)
for message in messages.data:
if message.role == "assistant":
print(f"Assistant response: {message.content}")
elif run.status in ["cancelled", "failed"]:
# Handle cancellation or failure
print(f"Run ended with status: {run.status}")
elif run.status == "expired":
# Handle expired run
print("Run expired without completion.")
return run.status
def streaming_chat_with_open_ai(user_message, chat_history, password, user_data, thread_id, trascript, key_moments, content_subject, content_grade):
verify_password(password)
print("=====user_data=====")
print(f"user_data: {user_data}")
print("===streaming_chat_with_open_ai===")
print(thread_id)
# ๅ…ˆ่จˆ็ฎ— user_message ๆ˜ฏๅฆ่ถ…้Ž 500 ๅ€‹ๅญ—
if len(user_message) > 1500:
error_msg = "ไฝ ็š„่จŠๆฏๅคช้•ทไบ†๏ผŒ่ซ‹็ธฎ็Ÿญ่จŠๆฏ้•ทๅบฆ่‡ณไบ”็™พๅญ—ไปฅๅ…ง"
raise gr.Error(error_msg)
# ๅฆ‚ๆžœ chat_history ่ถ…้Ž 10 ๅ‰‡่จŠๆฏ๏ผŒ็›ดๆŽฅ return "ๅฐ่ฉฑ่ถ…้ŽไธŠ้™"
if chat_history is not None and len(chat_history) > 11:
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ…้ŽไธŠ้™๏ผˆๅฐ่ฉฑไธ€่ผช10ๆฌก๏ผ‰"
raise gr.Error(error_msg)
try:
assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo
# assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo
client = OPEN_AI_CLIENT
# ็›ดๆŽฅๅฎ‰ๆŽ’้€ๅญ—็จฟ่ณ‡ๆ–™ in instructions
# if isinstance(trascript, str):
# trascript_json = json.loads(trascript)
# else:
# trascript_json = trascript
# trascript_text = json.dumps(trascript_json, ensure_ascii=False)
# # trascript_text ็งป้™ค \n, ็ฉบ็™ฝ
# trascript_text = trascript_text.replace("\n", "").replace(" ", "")
if isinstance(key_moments, str):
key_moments_json = json.loads(key_moments)
else:
key_moments_json = key_moments
# key_moments_json remove images
for moment in key_moments_json:
moment.pop('images', None)
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
instructions = get_instructions(content_subject, content_grade, key_moments_text)
# ๅˆ›ๅปบ็บฟ็จ‹
if not thread_id:
thread = client.beta.threads.create()
thread_id = thread.id
print(f"new thread_id: {thread_id}")
else:
thread = client.beta.threads.retrieve(thread_id)
print(f"old thread_id: {thread_id}")
# ๅ‘็บฟ็จ‹ๆทปๅŠ ็”จๆˆท็š„ๆถˆๆฏ
client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_message + "/n ่ซ‹ๅšดๆ ผ้ตๅพชinstructions๏ผŒๆ“”ไปปไธ€ไฝ่˜‡ๆ ผๆ‹‰ๅบ•ๅฎถๆ•™๏ผŒ่ซ‹ไธ€ๅฎš่ฆ็”จ็น้ซ”ไธญๆ–‡ๅ›ž็ญ” zh-TW๏ผŒไธฆ็”จๅฐ็ฃไบบ็š„็ฆฎ่ฒŒๅฃ่ชž่กจ้”๏ผŒๅ›ž็ญ”ๆ™‚ไธ่ฆ็‰นๅˆฅ่ชชๆ˜Ž้€™ๆ˜ฏๅฐ็ฃไบบ็š„่ชžๆฐฃ๏ผŒไธ็”จๆๅˆฐใ€Œ้€ๅญ—็จฟใ€้€™ๅ€‹่ฉž๏ผŒ็”จใ€Œๅ…งๅฎนใ€ไปฃๆ›ฟ))๏ผŒ่ซ‹ๅœจๅ›ž็ญ”็š„ๆœ€ๅพŒๆจ™่จปใ€ๅƒ่€ƒ่ณ‡ๆ–™๏ผš๏ผˆๆ™‚๏ผ‰:๏ผˆๅˆ†๏ผ‰:๏ผˆ็ง’๏ผ‰ใ€‘๏ผŒ๏ผˆๅฆ‚ๆžœๆ˜ฏๅๅ•ๅญธ็”Ÿ๏ผŒๅฐฑๅชๅ•ไธ€ๅ€‹ๅ•้กŒ๏ผŒ่ซ‹ๅนซๅŠฉๅญธ็”Ÿๆ›ดๅฅฝ็š„็†่งฃ่ณ‡ๆ–™๏ผŒๅญ—ๆ•ธๅœจ100ๅญ—ไปฅๅ…ง๏ผ‰"
)
with client.beta.threads.runs.stream(
thread_id=thread.id,
assistant_id=assistant_id,
instructions=instructions,
) as stream:
partial_messages = ""
for event in stream:
if event.data and event.data.object == "thread.message.delta":
message = event.data.delta.content[0].text.value
partial_messages += message
yield partial_messages
except Exception as e:
print(f"Error: {e}")
raise gr.Error(f"Error: {e}")
def create_thread_id():
thread = OPEN_AI_CLIENT.beta.threads.create()
thread_id = thread.id
print(f"create new thread_id: {thread_id}")
return thread_id
def chatbot_select(chatbot_name):
chatbot_select_accordion_visible = gr.update(open=False)
chatbot_open_ai_visible = gr.update(visible=False)
chatbot_open_ai_streaming_visible = gr.update(visible=False)
chatbot_jutor_visible = gr.update(visible=False)
if chatbot_name == "chatbot_open_ai":
chatbot_open_ai_visible = gr.update(visible=True)
elif chatbot_name == "chatbot_open_ai_streaming":
chatbot_open_ai_streaming_visible = gr.update(visible=True)
elif chatbot_name == "chatbot_jutor":
chatbot_jutor_visible = gr.update(visible=True)
return chatbot_select_accordion_visible, chatbot_open_ai_visible, chatbot_open_ai_streaming_visible, chatbot_jutor_visible
# --- Slide mode ---
def update_slide(direction):
global TRANSCRIPTS
global CURRENT_INDEX
print("=== ๆ›ดๆ–ฐๆŠ•ๅฝฑ็‰‡ ===")
print(f"CURRENT_INDEX: {CURRENT_INDEX}")
# print(f"TRANSCRIPTS: {TRANSCRIPTS}")
CURRENT_INDEX += direction
if CURRENT_INDEX < 0:
CURRENT_INDEX = 0 # ้˜ฒๆญข็ดขๅผ•ๅฐไบŽ0
elif CURRENT_INDEX >= len(TRANSCRIPTS):
CURRENT_INDEX = len(TRANSCRIPTS) - 1 # ้˜ฒๆญข็ดขๅผ•่ถ…ๅ‡บ่Œƒๅ›ด
# ่Žทๅ–ๅฝ“ๅ‰ๆก็›ฎ็š„ๆ–‡ๆœฌๅ’Œๆˆชๅ›พ URL
current_transcript = TRANSCRIPTS[CURRENT_INDEX]
slide_image = current_transcript["screenshot_path"]
slide_text = current_transcript["text"]
return slide_image, slide_text
def prev_slide():
return update_slide(-1)
def next_slide():
return update_slide(1)
# --- Init params ---
def init_params(text, request: gr.Request):
if request:
print("Request headers dictionary:", request.headers)
print("IP address:", request.client.host)
print("Query parameters:", dict(request.query_params))
# url = request.url
print("Request URL:", request.url)
youtube_link = ""
password_text = ""
admin = gr.update(visible=True)
reading_passage_admin = gr.update(visible=True)
summary_admin = gr.update(visible=True)
see_detail = gr.update(visible=True)
worksheet_accordion = gr.update(visible=True)
lesson_plan_accordion = gr.update(visible=True)
exit_ticket_accordion = gr.update(visible=True)
chatbot_open_ai = gr.update(visible=False)
chatbot_open_ai_streaming = gr.update(visible=False)
chatbot_jutor = gr.update(visible=False)
# if youtube_link in query_params
if "youtube_id" in request.query_params:
youtube_id = request.query_params["youtube_id"]
youtube_link = f"https://www.youtube.com/watch?v={youtube_id}"
print(f"youtube_link: {youtube_link}")
# check if origin is from junyiacademy
origin = request.headers.get("origin", "")
if "junyiacademy" in origin:
password_text = "6161"
admin = gr.update(visible=False)
reading_passage_admin = gr.update(visible=False)
summary_admin = gr.update(visible=False)
see_detail = gr.update(visible=False)
worksheet_accordion = gr.update(visible=False)
lesson_plan_accordion = gr.update(visible=False)
exit_ticket_accordion = gr.update(visible=False)
return admin, reading_passage_admin, summary_admin, see_detail, \
worksheet_accordion, lesson_plan_accordion, exit_ticket_accordion, \
password_text, youtube_link, \
chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor
def update_state(content_subject, content_grade, trascript, key_moments, question_1, question_2, question_3):
# inputs=[content_subject, content_grade, df_string_output],
# outputs=[content_subject_state, content_grade_state, trascript_state]
content_subject_state = content_subject
content_grade_state = content_grade
trascript_json = json.loads(trascript)
formatted_simple_transcript = create_formatted_simple_transcript(trascript_json)
trascript_state = formatted_simple_transcript
key_moments_state = key_moments
streaming_chat_thread_id_state = create_thread_id()
ai_chatbot_question_1 = question_1
ai_chatbot_question_2 = question_2
ai_chatbot_question_3 = question_3
return content_subject_state, content_grade_state, trascript_state, key_moments_state, \
streaming_chat_thread_id_state, \
ai_chatbot_question_1, ai_chatbot_question_2, ai_chatbot_question_3
HEAD = """
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
svg.markmap {{
width: 100%;
height: 100vh;
}}
</style>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<script>
const mind_map_tab_button = document.querySelector("#mind_map_tab-button");
if (mind_map_tab_button) {
mind_map_tab_button.addEventListener('click', function() {
const mind_map_markdown = document.querySelector("#mind_map_markdown > label > textarea");
if (mind_map_markdown) {
// ๅฝ“ๆŒ‰้’ฎ่ขซ็‚นๅ‡ปๆ—ถ๏ผŒๆ‰“ๅฐๅฝ“ๅ‰็š„textarea็š„ๅ€ผ
console.log('Value changed to: ' + mind_map_markdown.value);
markmap.autoLoader.renderAll();
}
});
}
</script>
<script>
function changeImage(direction, count, galleryIndex) {
// Find the current visible image by iterating over possible indices
var currentImage = null;
var currentIndex = -1;
for (var i = 0; i < count; i++) {
var img = document.querySelector('.slide-image-' + galleryIndex + '-' + i);
if (img && img.style.display !== 'none') {
currentImage = img;
currentIndex = i;
break;
}
}
// If no current image is visible, show the first one and return
if (currentImage === null) {
document.querySelector('.slide-image-' + galleryIndex + '-0').style.display = 'block';
console.error('No current image found for galleryIndex ' + galleryIndex + ', defaulting to first image.');
return;
}
// Hide the current image
currentImage.style.display = 'none';
// Calculate the index of the next image to show
var newIndex = (currentIndex + direction + count) % count;
// Select the next image and show it
var nextImage = document.querySelector('.slide-image-' + galleryIndex + '-' + newIndex);
if (nextImage) {
nextImage.style.display = 'block';
} else {
console.error('No image found for galleryIndex ' + galleryIndex + ' and newIndex ' + newIndex);
}
}
</script>
"""
with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.amber, text_size = gr.themes.sizes.text_lg), head=HEAD) as demo:
with gr.Row() as admin:
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
video_id = gr.Textbox(label="video_id", visible=True)
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
youtube_link_btn = gr.Button("Submit_YouTube_Link", elem_id="youtube_link_btn", visible=True)
with gr.Row() as data_state:
content_subject_state = gr.State() # ไฝฟ็”จ gr.State ๅญ˜ๅ‚จ content_subject
content_grade_state = gr.State() # ไฝฟ็”จ gr.State ๅญ˜ๅ‚จ content_grade
trascript_state = gr.State() # ไฝฟ็”จ gr.State ๅญ˜ๅ‚จ trascript
key_moments_state = gr.State() # ไฝฟ็”จ gr.State ๅญ˜ๅ‚จ key_moments
streaming_chat_thread_id_state = gr.State() # ไฝฟ็”จ gr.State ๅญ˜ๅ‚จ streaming_chat_thread_id
with gr.Tab("AIๅฐ็ฒพ้ˆ"):
with gr.Accordion("้ธๆ“‡ AI ๅฐ็ฒพ้ˆ", open=True) as chatbot_select_accordion:
with gr.Row():
with gr.Column(scale=1, variant="panel"):
chatbot_avatar_url = "https://junyitopicimg.s3.amazonaws.com/s4byy--icon.jpe?v=20200513013523726"
chatbot_description = """Hi๏ผŒๆˆ‘ๆ˜ฏไฝ ็š„AIๅญธไผดใ€้ฃ›็‰น็ฒพ้ˆใ€‘๏ผŒ\n
ๆˆ‘ๅฏไปฅ้™ชไฝ ไธ€่ตทๅญธ็ฟ’ๆœฌๆฌก็š„ๅ…งๅฎน๏ผŒๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ\n
๐Ÿค” ๅฆ‚ๆžœไฝ ไธ็Ÿฅ้“ๆ€Ž้บผ็™ผๅ•๏ผŒๅฏไปฅ้ปžๆ“Šๅทฆไธ‹ๆ–น็š„ๅ•้กŒไธ€ใ€ๅ•้กŒไบŒใ€ๅ•้กŒไธ‰๏ผŒๆˆ‘ๆœƒๅนซไฝ ็”Ÿๆˆๅ•้กŒ๏ผ\n
๐Ÿ—ฃ๏ธ ไนŸๅฏไปฅ้ปžๆ“Šๅณไธ‹ๆ–น็”จ่ชž้Ÿณ่ผธๅ…ฅ๏ผŒๆˆ‘ๆœƒๅนซไฝ ่ฝ‰ๆ›ๆˆๆ–‡ๅญ—๏ผŒๅŽฒๅฎณๅง๏ผ\n
๐Ÿ”  ๆˆ–ๆ˜ฏ็›ดๆŽฅ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”๏ผ\n
๐Ÿ’ค ไฝ†ๆˆ‘้‚„ๅœจๆˆ้•ท๏ผŒ้ซ”ๅŠ›ๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ผ\n
๐Ÿฆ„ ๅฆ‚ๆžœ้”ๅˆฐไธŠ้™๏ผŒๆˆ–ๆ˜ฏ้‡ๅˆฐ็ฒพ้ˆๅพˆ็ดฏ๏ผŒ่ซ‹ๅ•ๅ•ๅ…ถไป–ๆœ‹ๅ‹๏ผŒๅƒๆ˜ฏ้ฃ›็‰น้Ÿณ้€Ÿ่ชช่ฉฑ็š„้€Ÿๅบฆๆฏ”่ผƒๅฟซ๏ผŒไฝ ๆ˜ฏๅฆ่ทŸๅพ—ไธŠๅ‘ข๏ผŸไฝ ไนŸๅฏไปฅๅ’Œๅ…ถไป–็ฒพ้ˆไบ’ๅ‹•็œ‹็œ‹ๅ–”๏ผ\n
"""
chatbot_open_ai_name = gr.State("chatbot_open_ai")
gr.Image(value=chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False)
chatbot_open_ai_select_btn = gr.Button("๐Ÿ‘†้ธๆ“‡ใ€้ฃ›็‰น็ฒพ้ˆใ€‘", elem_id="chatbot_btn", visible=True, variant="primary")
gr.Markdown(value=chatbot_description, visible=True)
with gr.Column(scale=1, variant="panel"):
streaming_chatbot_avatar_url = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2020/11/1-%E6%98%9F%E7%A9%BA%E9%A0%AD%E8%B2%BC-%E5%A4%AA%E7%A9%BA%E7%8B%90%E7%8B%B8%E8%B2%93-150x150.png"
streaming_chatbot_description = """Hi๏ผŒๆˆ‘ๆ˜ฏใ€้ฃ›็‰น้Ÿณ้€Ÿใ€‘๏ผŒ \n
่ชช่ฉฑๆฏ”่ผƒๅฟซ๏ผŒไฝ†ๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ \n
๐Ÿš€ ๆˆ‘ๆฒ’ๆœ‰้ ่จญๅ•้กŒใ€ไนŸๆฒ’ๆœ‰่ชž้Ÿณ่ผธๅ…ฅ๏ผŒ้ฉๅˆๅฟซๅ•ๅฟซ็ญ”๏ผŒไธ€่ตท็ทด็ฟ’ๅ•ๅ‡บๅฅฝๅ•้กŒๅง \n
๐Ÿ”  ๆ“…้•ท็”จๆ–‡ๅญ—่กจ้”็š„ไฝ ๏ผŒๅฏไปฅ็”จ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”\n
๐Ÿ’ค ๆˆ‘้‚„ๅœจๆˆ้•ท๏ผŒ้ซ”ๅŠ›ๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ฝž
"""
chatbot_open_ai_streaming_name = gr.State("chatbot_open_ai_streaming")
gr.Image(value=streaming_chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False)
chatbot_open_ai_streaming_select_btn = gr.Button("๐Ÿ‘†้ธๆ“‡ใ€้ฃ›็‰น้Ÿณ้€Ÿใ€‘", elem_id="streaming_chatbot_btn", visible=True, variant="primary")
gr.Markdown(value=streaming_chatbot_description, visible=True)
with gr.Column(scale=1, variant="panel"):
jutor_chatbot_avatar_url = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2019/11/%E5%9B%9B%E6%A0%BC%E6%95%85%E4%BA%8B-04.jpg"
jutor_chatbot_description = """Hi๏ผŒๆˆ‘ๅ€‘ๆ˜ฏใ€ๆขจๆขจใ€้บฅ้บฅใ€็‹็‹ธ่ฒ“ใ€‘๏ผŒ\n
ไนŸๅฏไปฅ้™ชไฝ ไธ€่ตทๅญธ็ฟ’ๆœฌๆฌก็š„ๅ…งๅฎน๏ผŒๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ\n
๐Ÿค” ๅฆ‚ๆžœไฝ ไธ็Ÿฅ้“ๆ€Ž้บผ็™ผๅ•๏ผŒๅฏไปฅ้ปžๆ“Šๅทฆไธ‹ๆ–น็š„ๅ•้กŒไธ€ใ€ๅ•้กŒไบŒใ€ๅ•้กŒไธ‰๏ผŒๆˆ‘ๆœƒๅนซไฝ ็”Ÿๆˆๅ•้กŒ๏ผ\n
๐Ÿ—ฃ๏ธ ไนŸๅฏไปฅ้ปžๆ“Šๅณไธ‹ๆ–น็”จ่ชž้Ÿณ่ผธๅ…ฅ๏ผŒๆˆ‘ๆœƒๅนซไฝ ่ฝ‰ๆ›ๆˆๆ–‡ๅญ—๏ผŒๅŽฒๅฎณๅง๏ผ\n
๐Ÿ”  ๆˆ–ๆ˜ฏ็›ดๆŽฅ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”๏ผ\n
๐Ÿ’ค ็ฒพ้ˆๅ€‘้ซ”ๅŠ›้ƒฝๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ผ\n
"""
chatbot_jutor_name = gr.State("chatbot_jutor")
gr.Image(value=jutor_chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False)
chatbot_jutor_select_btn = gr.Button("๐Ÿ‘†้ธๆ“‡ใ€ๆขจๆขจใ€้บฅ้บฅใ€็‹็‹ธ่ฒ“ใ€‘", elem_id="jutor_chatbot_btn", visible=True, variant="primary")
gr.Markdown(value=jutor_chatbot_description, visible=True)
with gr.Row("้ฃ›็‰น็ฒพ้ˆ") as chatbot_open_ai:
with gr.Column():
user_avatar = "https://em-content.zobj.net/source/google/263/flushed-face_1f633.png"
bot_avatar = "https://junyitopicimg.s3.amazonaws.com/s4byy--icon.jpe?v=20200513013523726"
latex_delimiters = [{"left": "$", "right": "$", "display": False}]
chatbot_greeting = [[
None,
"""Hi๏ผŒๆˆ‘ๆ˜ฏไฝ ็š„AIๅญธไผดใ€้ฃ›็‰น็ฒพ้ˆใ€‘๏ผŒๆˆ‘ๅฏไปฅ้™ชไฝ ไธ€่ตทๅญธ็ฟ’ๆœฌๆฌก็š„ๅ…งๅฎน๏ผŒๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ
๐Ÿค” ๅฆ‚ๆžœไฝ ไธ็Ÿฅ้“ๆ€Ž้บผ็™ผๅ•๏ผŒๅฏไปฅ้ปžๆ“Šๅทฆไธ‹ๆ–น็š„ๅ•้กŒไธ€ใ€ๅ•้กŒไบŒใ€ๅ•้กŒไธ‰๏ผŒๆˆ‘ๆœƒๅนซไฝ ็”Ÿๆˆๅ•้กŒ๏ผ
๐Ÿ—ฃ๏ธ ไนŸๅฏไปฅ้ปžๆ“Šๅณไธ‹ๆ–น็”จ่ชž้Ÿณ่ผธๅ…ฅ๏ผŒๆˆ‘ๆœƒๅนซไฝ ่ฝ‰ๆ›ๆˆๆ–‡ๅญ—๏ผŒๅŽฒๅฎณๅง๏ผ
๐Ÿ”  ๆˆ–ๆ˜ฏ็›ดๆŽฅ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”๏ผ
๐Ÿ’ค ไฝ†ๆˆ‘้‚„ๅœจๆˆ้•ท๏ผŒ้ซ”ๅŠ›ๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ผ
๐Ÿฆ„ ๅฆ‚ๆžœ้”ๅˆฐไธŠ้™๏ผŒๆˆ–ๆ˜ฏ้‡ๅˆฐ็ฒพ้ˆๅพˆ็ดฏ๏ผŒ่ซ‹ๅ•ๅ•ๅ…ถไป–ๆœ‹ๅ‹๏ผŒๅƒๆ˜ฏ้ฃ›็‰น้Ÿณ้€Ÿ่ชช่ฉฑ็š„้€Ÿๅบฆๆฏ”่ผƒๅฟซ๏ผŒไฝ ๆ˜ฏๅฆ่ทŸๅพ—ไธŠๅ‘ข๏ผŸไฝ ไนŸๅฏไปฅๅ’Œๅ…ถไป–็ฒพ้ˆไบ’ๅ‹•็œ‹็œ‹ๅ–”๏ผ
""",
]]
with gr.Row():
chatbot = gr.Chatbot(avatar_images=[user_avatar, bot_avatar], label="OPEN AI", show_share_button=False, likeable=True, show_label=False, latex_delimiters=latex_delimiters,value=chatbot_greeting)
with gr.Row():
thread_id = gr.Textbox(label="thread_id", visible=False)
socratic_mode_btn = gr.Checkbox(label="่˜‡ๆ ผๆ‹‰ๅบ•ๅฎถๆ•™ๅŠฉ็†ๆจกๅผ", value=True, visible=False)
with gr.Row():
with gr.Accordion("ไฝ ไนŸๆœ‰้กžไผผ็š„ๅ•้กŒๆƒณๅ•ๅ—Ž๏ผŸ", open=False) as ask_questions_accordion:
btn_1 = gr.Button("ๅ•้กŒไธ€")
btn_2 = gr.Button("ๅ•้กŒไธ€")
btn_3 = gr.Button("ๅ•้กŒไธ€")
gr.Markdown("### ้‡ๆ–ฐ็”Ÿๆˆๅ•้กŒ")
btn_create_question = gr.Button("็”Ÿๆˆๅ…ถไป–ๅ•้กŒ", variant="primary")
openai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60, label="่ชž้Ÿณ่ผธๅ…ฅ")
with gr.Row():
msg = gr.Textbox(label="่จŠๆฏ",scale=3)
send_button = gr.Button("้€ๅ‡บ", variant="primary", scale=1)
with gr.Row("้ฃ›็‰น้Ÿณ้€Ÿ") as chatbot_open_ai_streaming:
with gr.Column():
streaming_chat_greeting = """
Hi๏ผŒๆˆ‘ๆ˜ฏใ€้ฃ›็‰น้Ÿณ้€Ÿใ€‘๏ผŒ่ชช่ฉฑๆฏ”่ผƒๅฟซ๏ผŒไฝ†ๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ \n
๐Ÿš€ ๆˆ‘ๆฒ’ๆœ‰้ ่จญๅ•้กŒใ€ไนŸๆฒ’ๆœ‰่ชž้Ÿณ่ผธๅ…ฅ๏ผŒ้ฉๅˆๅฟซๅ•ๅฟซ็ญ”็š„ไฝ  \n
๐Ÿ”  ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”๏ผ\n
๐Ÿ’ค ๆˆ‘้‚„ๅœจๆˆ้•ท๏ผŒ้ซ”ๅŠ›ๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ผ
"""
additional_inputs = [password, user_data, streaming_chat_thread_id_state, trascript_state, key_moments_state, content_subject_state, content_grade_state]
streaming_chat = gr.ChatInterface(
fn=streaming_chat_with_open_ai,
additional_inputs=additional_inputs,
submit_btn="้€ๅ‡บ",
retry_btn=None,
undo_btn="โช ไธŠไธ€ๆญฅ",
clear_btn="๐Ÿ—‘๏ธ ๆธ…้™คๅ…จ้ƒจ",
stop_btn=None,
description=streaming_chat_greeting
)
with gr.Row("ๅ…ถไป–็ฒพ้ˆ") as chatbot_jutor:
with gr.Column():
ai_chatbot_greeting = [[
None,
"""Hi๏ผŒๆˆ‘ๆ˜ฏ้ฃ›็‰น็ฒพ้ˆ็š„ๆœ‹ๅ‹ๅ€‘ใ€ๆขจๆขจใ€้บฅ้บฅใ€็‹็‹ธ่ฒ“ใ€‘๏ผŒไนŸๅฏไปฅ้™ชไฝ ไธ€่ตทๅญธ็ฟ’ๆœฌๆฌก็š„ๅ…งๅฎน๏ผŒๆœ‰ไป€้บผๅ•้กŒ้ƒฝๅฏไปฅๅ•ๆˆ‘ๅ–”๏ผ
๐Ÿค” ๅฆ‚ๆžœไฝ ไธ็Ÿฅ้“ๆ€Ž้บผ็™ผๅ•๏ผŒๅฏไปฅ้ปžๆ“Šๅทฆไธ‹ๆ–น็š„ๅ•้กŒไธ€ใ€ๅ•้กŒไบŒใ€ๅ•้กŒไธ‰๏ผŒๆˆ‘ๆœƒๅนซไฝ ็”Ÿๆˆๅ•้กŒ๏ผ
๐Ÿ—ฃ๏ธ ไนŸๅฏไปฅ้ปžๆ“Šๅณไธ‹ๆ–น็”จ่ชž้Ÿณ่ผธๅ…ฅ๏ผŒๆˆ‘ๆœƒๅนซไฝ ่ฝ‰ๆ›ๆˆๆ–‡ๅญ—๏ผŒๅŽฒๅฎณๅง๏ผ
๐Ÿ”  ๆˆ–ๆ˜ฏ็›ดๆŽฅ้ต็›ค่ผธๅ…ฅไฝ ็š„ๅ•้กŒ๏ผŒๆˆ‘ๆœƒ็›กๅŠ›ๅ›ž็ญ”ไฝ ็š„ๅ•้กŒๅ–”๏ผ
๐Ÿ’ค ็ฒพ้ˆๅ€‘้ซ”ๅŠ›้ƒฝๆœ‰้™๏ผŒๆฏไธ€ๆฌกๅญธ็ฟ’ๅช่ƒฝๅ›ž็ญ”ๅๅ€‹ๅ•้กŒ๏ผŒ่ซ‹่ฎ“ๆˆ‘ไผ‘ๆฏไธ€ไธ‹ๅ†ๅ•ๅ•้กŒๅ–”๏ผ
""",
]]
ai_chatbot_bot_avatar = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2019/11/%E5%9B%9B%E6%A0%BC%E6%95%85%E4%BA%8B-04.jpg"
ai_name = gr.Dropdown(label="้ธๆ“‡ AI ๅŠฉ็†", choices=[("ๆขจๆขจ","jutor"), ("้บฅ้บฅ","claude3"), ("็‹็‹ธ่ฒ“","groq")], value="jutor")
ai_chatbot = gr.Chatbot(avatar_images=[user_avatar, ai_chatbot_bot_avatar], label="ai_chatbot", show_share_button=False, likeable=True, show_label=False, latex_delimiters=latex_delimiters, value=ai_chatbot_greeting)
ai_chatbot_socratic_mode_btn = gr.Checkbox(label="่˜‡ๆ ผๆ‹‰ๅบ•ๅฎถๆ•™ๅŠฉ็†ๆจกๅผ", value=True, visible=False)
with gr.Row():
with gr.Accordion("ไฝ ไนŸๆœ‰้กžไผผ็š„ๅ•้กŒๆƒณๅ•ๅ—Ž๏ผŸ", open=False) as ask_questions_accordion_2:
ai_chatbot_question_1 = gr.Button("ๅ•้กŒไธ€")
ai_chatbot_question_2 = gr.Button("ๅ•้กŒไธ€")
ai_chatbot_question_3 = gr.Button("ๅ•้กŒไธ€")
ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60, label="่ชž้Ÿณ่ผธๅ…ฅ")
with gr.Row():
ai_msg = gr.Textbox(label="่จŠๆฏ่ผธๅ…ฅ",scale=3)
ai_send_button = gr.Button("้€ๅ‡บ", variant="primary",scale=1)
with gr.Tab("ๆ–‡็ซ ๆจกๅผ"):
with gr.Row():
reading_passage = gr.Markdown(show_label=False, latex_delimiters = [{"left": "$", "right": "$", "display": False}])
reading_passage_speak_button = gr.Button("Speak", visible=False)
reading_passage_audio_output = gr.Audio(label="Audio Output", visible=False)
with gr.Tab("้‡้ปžๆ‘˜่ฆ"):
with gr.Row():
df_summarise = gr.Markdown(show_label=False, latex_delimiters = [{"left": "$", "right": "$", "display": False}])
with gr.Tab("้—œ้ตๆ™‚ๅˆป"):
with gr.Row():
key_moments_html = gr.HTML(value="")
with gr.Tab("ๆ•™ๅญธๅ‚™่ชฒ"):
with gr.Row():
content_subject = gr.Dropdown(label="้ธๆ“‡ไธป้กŒ", choices=["ๆ•ธๅญธ", "่‡ช็„ถ", "ๅœ‹ๆ–‡", "่‹ฑๆ–‡", "็คพๆœƒ","็‰ฉ็†", "ๅŒ–ๅญธ", "็”Ÿ็‰ฉ", "ๅœฐ็†", "ๆญทๅฒ", "ๅ…ฌๆฐ‘"], value="", visible=False)
content_grade = gr.Dropdown(label="้ธๆ“‡ๅนด็ดš", choices=["ไธ€ๅนด็ดš", "ไบŒๅนด็ดš", "ไธ‰ๅนด็ดš", "ๅ››ๅนด็ดš", "ไบ”ๅนด็ดš", "ๅ…ญๅนด็ดš", "ไธƒๅนด็ดš", "ๅ…ซๅนด็ดš", "ไนๅนด็ดš", "ๅๅนด็ดš", "ๅไธ€ๅนด็ดš", "ๅไบŒๅนด็ดš"], value="", visible=False)
content_level = gr.Dropdown(label="ๅทฎ็•ฐๅŒ–ๆ•™ๅญธ", choices=["ๅŸบ็คŽ", "ไธญ็ดš", "้€ฒ้šŽ"], value="ๅŸบ็คŽ")
with gr.Row():
with gr.Tab("ๅญธ็ฟ’ๅ–ฎ"):
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
worksheet_content_type_name = gr.Textbox(value="worksheet", visible=False)
worksheet_algorithm = gr.Dropdown(label="้ธๆ“‡ๆ•™ๅญธ็ญ–็•ฅๆˆ–็†่ซ–", choices=["Bloom่ช็Ÿฅ้šŽๅฑค็†่ซ–", "Polyaๆ•ธๅญธ่งฃ้กŒๆณ•", "CRAๆ•™ๅญธๆณ•"], value="Bloom่ช็Ÿฅ้šŽๅฑค็†่ซ–", visible=False)
worksheet_content_btn = gr.Button("็”Ÿๆˆๅญธ็ฟ’ๅ–ฎ ๐Ÿ“„", variant="primary")
with gr.Accordion("ๅพฎ่ชฟ", open=False):
worksheet_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ“š็ตๆžœ๏ผŒ่ผธๅ…ฅไฝ ๆƒณๆ›ดๆ”น็š„ๆƒณๆณ•")
worksheet_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆžœ", variant="primary")
worksheet_exam_result_retrun_original = gr.Button("่ฟ”ๅ›žๅŽŸๅง‹็ตๆžœ")
with gr.Accordion("prompt", open=False) as worksheet_accordion:
worksheet_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40)
with gr.Column(scale=2):
# ็”Ÿๆˆๅฐๆ‡‰ไธๅŒๆจกๅผ็š„็ตๆžœ
worksheet_exam_result_prompt = gr.Textbox(visible=False)
worksheet_exam_result_original = gr.Textbox(visible=False)
# worksheet_exam_result = gr.Textbox(label="ๅˆๆฌก็”Ÿๆˆ็ตๆžœ", show_copy_button=True, interactive=True, lines=40)
worksheet_exam_result = gr.Markdown(label="ๅˆๆฌก็”Ÿๆˆ็ตๆžœ", latex_delimiters = [{"left": "$", "right": "$", "display": False}])
worksheet_download_exam_result_button = gr.Button("่ฝ‰ๆˆ word๏ผŒๅฎŒๆˆๅพŒ่ซ‹้ปžๆ“Šๅณไธ‹่ง’ download ๆŒ‰้ˆ•", variant="primary")
worksheet_exam_result_word_link = gr.File(label="Download Word")
with gr.Tab("่ชฒ็จ‹่จˆ็•ซ"):
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
lesson_plan_content_type_name = gr.Textbox(value="lesson_plan", visible=False)
lesson_plan_time = gr.Slider(label="้ธๆ“‡่ชฒ็จ‹ๆ™‚้–“(ๅˆ†้˜)", minimum=10, maximum=120, step=5, value=40)
lesson_plan_btn = gr.Button("็”Ÿๆˆ่ชฒ็จ‹่จˆ็•ซ ๐Ÿ“•", variant="primary")
with gr.Accordion("ๅพฎ่ชฟ", open=False):
lesson_plan_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ“š็ตๆžœ๏ผŒ่ผธๅ…ฅไฝ ๆƒณๆ›ดๆ”น็š„ๆƒณๆณ•")
lesson_plan_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆžœ", variant="primary")
lesson_plan_exam_result_retrun_original = gr.Button("่ฟ”ๅ›žๅŽŸๅง‹็ตๆžœ")
with gr.Accordion("prompt", open=False) as lesson_plan_accordion:
lesson_plan_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40)
with gr.Column(scale=2):
# ็”Ÿๆˆๅฐๆ‡‰ไธๅŒๆจกๅผ็š„็ตๆžœ
lesson_plan_exam_result_prompt = gr.Textbox(visible=False)
lesson_plan_exam_result_original = gr.Textbox(visible=False)
lesson_plan_exam_result = gr.Markdown(label="ๅˆๆฌก็”Ÿๆˆ็ตๆžœ", latex_delimiters = [{"left": "$", "right": "$", "display": False}])
lesson_plan_download_exam_result_button = gr.Button("่ฝ‰ๆˆ word๏ผŒๅฎŒๆˆๅพŒ่ซ‹้ปžๆ“Šๅณไธ‹่ง’ download ๆŒ‰้ˆ•", variant="primary")
lesson_plan_exam_result_word_link = gr.File(label="Download Word")
with gr.Tab("ๅ‡บๅ ดๅˆธ"):
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
exit_ticket_content_type_name = gr.Textbox(value="exit_ticket", visible=False)
exit_ticket_time = gr.Slider(label="้ธๆ“‡ๅ‡บๅ ดๅˆธๆ™‚้–“(ๅˆ†้˜)", minimum=5, maximum=10, step=1, value=8)
exit_ticket_btn = gr.Button("็”Ÿๆˆๅ‡บๅ ดๅˆธ ๐ŸŽŸ๏ธ", variant="primary")
with gr.Accordion("ๅพฎ่ชฟ", open=False):
exit_ticket_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ“š็ตๆžœ๏ผŒ่ผธๅ…ฅไฝ ๆƒณๆ›ดๆ”น็š„ๆƒณๆณ•")
exit_ticket_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆžœ", variant="primary")
exit_ticket_exam_result_retrun_original = gr.Button("่ฟ”ๅ›žๅŽŸๅง‹็ตๆžœ")
with gr.Accordion("prompt", open=False) as exit_ticket_accordion:
exit_ticket_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40)
with gr.Column(scale=2):
# ็”Ÿๆˆๅฐๆ‡‰ไธๅŒๆจกๅผ็š„็ตๆžœ
exit_ticket_exam_result_prompt = gr.Textbox(visible=False)
exit_ticket_exam_result_original = gr.Textbox(visible=False)
exit_ticket_exam_result = gr.Markdown(label="ๅˆๆฌก็”Ÿๆˆ็ตๆžœ", latex_delimiters = [{"left": "$", "right": "$", "display": False}])
exit_ticket_download_exam_result_button = gr.Button("่ฝ‰ๆˆ word๏ผŒๅฎŒๆˆๅพŒ่ซ‹้ปžๆ“Šๅณไธ‹่ง’ download ๆŒ‰้ˆ•", variant="primary")
exit_ticket_exam_result_word_link = gr.File(label="Download Word")
# with gr.Tab("็ด ้คŠๅฐŽๅ‘้–ฑ่ฎ€้กŒ็ต„"):
# literacy_oriented_reading_content = gr.Textbox(label="่ผธๅ…ฅ้–ฑ่ฎ€ๆๆ–™")
# literacy_oriented_reading_content_btn = gr.Button("็”Ÿๆˆ้–ฑ่ฎ€็†่งฃ้กŒ")
# with gr.Tab("่‡ชๆˆ‘่ฉ•ไผฐ"):
# self_assessment_content = gr.Textbox(label="่ผธๅ…ฅ่‡ช่ฉ•ๅ•ๅทๆˆ–ๆชขๆŸฅ่กจ")
# self_assessment_content_btn = gr.Button("็”Ÿๆˆ่‡ช่ฉ•ๅ•ๅท")
# with gr.Tab("่‡ชๆˆ‘ๅๆ€่ฉ•้‡"):
# self_reflection_content = gr.Textbox(label="่ผธๅ…ฅ่‡ชๆˆ‘ๅๆ€ๆดปๅ‹•")
# self_reflection_content_btn = gr.Button("็”Ÿๆˆ่‡ชๆˆ‘ๅๆ€ๆดปๅ‹•")
# with gr.Tab("ๅพŒ่จญ่ช็Ÿฅ"):
# metacognition_content = gr.Textbox(label="่ผธๅ…ฅๅพŒ่จญ่ช็Ÿฅ็›ธ้—œๅ•้กŒ")
# metacognition_content_btn = gr.Button("็”ŸๆˆๅพŒ่จญ่ช็Ÿฅๅ•้กŒ")
with gr.Accordion("See Details", open=False) as see_details:
with gr.Tab("้€ๅญ—็จฟๆœฌๆ–‡"):
with gr.Row() as transcript_admmin:
transcript_kind = gr.Textbox(value="transcript", show_label=False)
transcript_get_button = gr.Button("ๅ–ๅพ—", size="sm", variant="primary")
transcript_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary")
transcript_update_button = gr.Button("ๅ„ฒๅญ˜", size="sm", variant="primary")
transcript_delete_button = gr.Button("ๅˆช้™ค", size="sm", variant="primary")
transcript_create_button = gr.Button("ๅปบ็ซ‹", size="sm", variant="primary")
with gr.Row():
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
with gr.Tab("ๆ–‡็ซ ๆœฌๆ–‡"):
with gr.Row() as reading_passage_admin:
with gr.Column():
with gr.Row():
reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
with gr.Row():
reading_passage_text_to_latex = gr.Button("ๆ–ฐๅขž LaTeX", size="sm", variant="primary")
reading_passage_get_button = gr.Button("ๅ–ๅพ—", size="sm", variant="primary")
reading_passage_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary")
reading_passage_update_button = gr.Button("ๆ›ดๆ–ฐ", size="sm", variant="primary")
reading_passage_delete_button = gr.Button("ๅˆช้™ค", size="sm", variant="primary")
reading_passage_create_button = gr.Button("ๅปบ็ซ‹", size="sm", variant="primary")
with gr.Row():
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True)
with gr.Tab("้‡้ปžๆ‘˜่ฆๆœฌๆ–‡"):
with gr.Row() as summary_admmin:
with gr.Column():
with gr.Row():
summary_kind = gr.Textbox(value="summary", show_label=False)
with gr.Row():
summary_to_markdown = gr.Button("ๆ–ฐๅขž Markdown", size="sm", variant="primary")
summary_get_button = gr.Button("ๅ–ๅพ—", size="sm", variant="primary")
summary_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary")
summary_update_button = gr.Button("ๆ›ดๆ–ฐ", size="sm", variant="primary")
summary_delete_button = gr.Button("ๅˆช้™ค", size="sm", variant="primary")
summary_create_button = gr.Button("ๅปบ็ซ‹", size="sm", variant="primary")
with gr.Row():
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True)
with gr.Tab("้—œ้ตๆ™‚ๅˆปๆœฌๆ–‡"):
with gr.Row() as key_moments_admin:
key_moments_kind = gr.Textbox(value="key_moments", show_label=False)
key_moments_get_button = gr.Button("ๅ–ๅพ—", size="sm", variant="primary")
key_moments_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary")
key_moments_update_button = gr.Button("ๅ„ฒๅญ˜", size="sm", variant="primary")
key_moments_delete_button = gr.Button("ๅˆช้™ค", size="sm", variant="primary")
key_moments_create_button = gr.Button("ๅปบ็ซ‹", size="sm", variant="primary")
with gr.Row():
key_moments = gr.Textbox(label="Key Moments", lines=40, interactive=False, show_copy_button=True)
with gr.Tab("ๅ•้กŒๆœฌๆ–‡"):
with gr.Row() as question_list_admin:
questions_kind = gr.Textbox(value="questions", show_label=False)
questions_get_button = gr.Button("ๅ–ๅพ—", size="sm", variant="primary")
questions_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary")
questions_update_button = gr.Button("ๅ„ฒๅญ˜", size="sm", variant="primary")
questions_delete_button = gr.Button("ๅˆช้™ค", size="sm", variant="primary")
questions_create_button = gr.Button("ๅปบ็ซ‹", size="sm", variant="primary")
with gr.Row():
questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
with gr.Tab("้€ๅญ—็จฟ"):
simple_html_content = gr.HTML(label="Simple Transcript")
with gr.Tab("ๅœ–ๆ–‡"):
transcript_html = gr.HTML(label="YouTube Transcript and Video")
with gr.Tab("ๆŠ•ๅฝฑ็‰‡"):
slide_image = gr.Image()
slide_text = gr.Textbox()
with gr.Row():
prev_button = gr.Button("Previous")
next_button = gr.Button("Next")
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
with gr.Tab("markdown"):
gr.Markdown("## ่ซ‹่ค‡่ฃฝไปฅไธ‹ markdown ไธฆ่ฒผๅˆฐไฝ ็š„ๅฟƒๆ™บๅœ–ๅทฅๅ…ทไธญ๏ผŒๅปบ่ญฐไฝฟ็”จ๏ผšhttps://markmap.js.org/repl")
mind_map = gr.Textbox(container=True, show_copy_button=True, lines=40, elem_id="mind_map_markdown")
with gr.Tab("ๅฟƒๆ™บๅœ–",elem_id="mind_map_tab"):
mind_map_html = gr.HTML()
# --- Event ---
# CHATBOT SELECT
chatbot_open_ai_select_btn.click(
chatbot_select,
inputs=[chatbot_open_ai_name],
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor]
)
chatbot_open_ai_streaming_select_btn.click(
chatbot_select,
inputs=[chatbot_open_ai_streaming_name],
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor]
)
chatbot_jutor_select_btn.click(
chatbot_select,
inputs=[chatbot_jutor_name],
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor]
)
# OPENAI ASSISTANT CHATBOT ๆจกๅผ
send_button.click(
chat_with_opan_ai_assistant,
inputs=[password, video_id, user_data, thread_id, trascript_state, key_moments, msg, chatbot, content_subject, content_grade, socratic_mode_btn],
outputs=[msg, chatbot, thread_id],
scroll_to_output=True
)
openai_chatbot_audio_input.change(
process_open_ai_audio_to_chatbot,
inputs=[password, openai_chatbot_audio_input],
outputs=[msg]
)
# OPENAI ASSISTANT CHATBOT ้€ฃๆŽฅๆŒ‰้ˆ•้ปžๆ“Šไบ‹ไปถ
btn_1_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_1, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
btn_2_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_2, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
btn_3_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_3, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
btn_1.click(
chat_with_opan_ai_assistant,
inputs=btn_1_chat_with_opan_ai_assistant_input,
outputs=[msg, chatbot, thread_id],
scroll_to_output=True
)
btn_2.click(
chat_with_opan_ai_assistant,
inputs=btn_2_chat_with_opan_ai_assistant_input,
outputs=[msg, chatbot, thread_id],
scroll_to_output=True
)
btn_3.click(
chat_with_opan_ai_assistant,
inputs=btn_3_chat_with_opan_ai_assistant_input,
outputs=[msg, chatbot, thread_id],
scroll_to_output=True
)
btn_create_question.click(
change_questions,
inputs = [password, df_string_output],
outputs = [btn_1, btn_2, btn_3]
)
# ๅ…ถไป–็ฒพ้ˆ ai_chatbot ๆจกๅผ
ai_send_button.click(
chat_with_ai,
inputs=[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_msg, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn],
outputs=[ai_msg, ai_chatbot],
scroll_to_output=True
)
# ๅ…ถไป–็ฒพ้ˆ ai_chatbot ่ฟžๆŽฅๆŒ‰้’ฎ็‚นๅ‡ปไบ‹ไปถ
ai_chatbot_question_1_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_1, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
ai_chatbot_question_2_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_2, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
ai_chatbot_question_3_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_3, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
ai_chatbot_question_1.click(
chat_with_ai,
inputs=ai_chatbot_question_1_chat_with_ai_input,
outputs=[ai_msg, ai_chatbot],
scroll_to_output=True
)
ai_chatbot_question_2.click(
chat_with_ai,
inputs=ai_chatbot_question_2_chat_with_ai_input,
outputs=[ai_msg, ai_chatbot],
scroll_to_output=True
)
ai_chatbot_question_3.click(
chat_with_ai,
inputs=ai_chatbot_question_3_chat_with_ai_input,
outputs=[ai_msg, ai_chatbot],
scroll_to_output=True
)
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
# file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
# ๅฝ“่พ“ๅ…ฅ YouTube ้“พๆŽฅๆ—ถ่งฆๅ‘
process_youtube_link_inputs = [password, youtube_link]
process_youtube_link_outputs = [
video_id,
questions_json,
btn_1,
btn_2,
btn_3,
df_string_output,
summary_text,
df_summarise,
key_moments,
key_moments_html,
mind_map,
mind_map_html,
transcript_html,
simple_html_content,
slide_image,
slide_text,
reading_passage_text,
reading_passage,
content_subject,
content_grade,
]
update_state_inputs = [
content_subject,
content_grade,
df_string_output,
key_moments,
btn_1,
btn_2,
btn_3
]
update_state_outputs = [
content_subject_state,
content_grade_state,
trascript_state,
key_moments_state,
streaming_chat_thread_id_state,
ai_chatbot_question_1,
ai_chatbot_question_2,
ai_chatbot_question_3
]
youtube_link.change(
process_youtube_link,
inputs=process_youtube_link_inputs,
outputs=process_youtube_link_outputs
).then(
update_state,
inputs=update_state_inputs,
outputs=update_state_outputs
)
youtube_link_btn.click(
process_youtube_link,
inputs=process_youtube_link_inputs,
outputs=process_youtube_link_outputs
).then(
update_state,
inputs=update_state_inputs,
outputs=update_state_outputs
)
# ๅฝ“่พ“ๅ…ฅ็ฝ‘้กต้“พๆŽฅๆ—ถ่งฆๅ‘
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
# reading_passage event
reading_passage_text_to_latex.click(
reading_passage_add_latex_version,
inputs=[video_id],
outputs=[reading_passage_text]
)
reading_passage_get_button.click(
get_LLM_content,
inputs=[video_id, reading_passage_kind],
outputs=[reading_passage_text]
)
reading_passage_create_button.click(
create_LLM_content,
inputs=[video_id, df_string_output, reading_passage_kind],
outputs=[reading_passage_text]
)
reading_passage_delete_button.click(
delete_LLM_content,
inputs=[video_id, reading_passage_kind],
outputs=[reading_passage_text]
)
reading_passage_edit_button.click(
enable_edit_mode,
inputs=[],
outputs=[reading_passage_text]
)
reading_passage_update_button.click(
update_LLM_content,
inputs=[video_id, reading_passage_text, reading_passage_kind],
outputs=[reading_passage_text]
)
# summary event
summary_to_markdown.click(
summary_add_markdown_version,
inputs=[video_id],
outputs=[summary_text]
)
summary_get_button.click(
get_LLM_content,
inputs=[video_id, summary_kind],
outputs=[summary_text]
)
summary_create_button.click(
create_LLM_content,
inputs=[video_id, df_string_output, summary_kind],
outputs=[summary_text]
)
summary_delete_button.click(
delete_LLM_content,
inputs=[video_id, summary_kind],
outputs=[summary_text]
)
summary_edit_button.click(
enable_edit_mode,
inputs=[],
outputs=[summary_text]
)
summary_update_button.click(
update_LLM_content,
inputs=[video_id, summary_text, summary_kind],
outputs=[summary_text]
)
# transcript event
transcript_get_button.click(
get_LLM_content,
inputs=[video_id, transcript_kind],
outputs=[df_string_output]
)
transcript_create_button.click(
create_LLM_content,
inputs=[video_id, df_string_output, transcript_kind],
outputs=[df_string_output]
)
transcript_delete_button.click(
delete_LLM_content,
inputs=[video_id, transcript_kind],
outputs=[df_string_output]
)
transcript_edit_button.click(
enable_edit_mode,
inputs=[],
outputs=[df_string_output]
)
transcript_update_button.click(
update_LLM_content,
inputs=[video_id, df_string_output, transcript_kind],
outputs=[df_string_output]
)
# key_moments event
key_moments_get_button.click(
get_LLM_content,
inputs=[video_id, key_moments_kind],
outputs=[key_moments]
)
key_moments_create_button.click(
create_LLM_content,
inputs=[video_id, df_string_output, key_moments_kind],
outputs=[key_moments]
)
key_moments_delete_button.click(
delete_LLM_content,
inputs=[video_id, key_moments_kind],
outputs=[key_moments]
)
key_moments_edit_button.click(
enable_edit_mode,
inputs=[],
outputs=[key_moments]
)
key_moments_update_button.click(
update_LLM_content,
inputs=[video_id, key_moments, key_moments_kind],
outputs=[key_moments]
)
# question_list event
questions_get_button.click(
get_LLM_content,
inputs=[video_id, questions_kind],
outputs=[questions_json]
)
questions_create_button.click(
create_LLM_content,
inputs=[video_id, df_string_output, questions_kind],
outputs=[questions_json]
)
questions_delete_button.click(
delete_LLM_content,
inputs=[video_id, questions_kind],
outputs=[questions_json]
)
questions_edit_button.click(
enable_edit_mode,
inputs=[],
outputs=[questions_json]
)
questions_update_button.click(
update_LLM_content,
inputs=[video_id, questions_json, questions_kind],
outputs=[questions_json]
)
# ๆ•™ๅธซ็‰ˆ
worksheet_content_btn.click(
get_ai_content,
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, worksheet_algorithm, worksheet_content_type_name],
outputs=[worksheet_exam_result_original, worksheet_exam_result, worksheet_prompt, worksheet_exam_result_prompt]
)
lesson_plan_btn.click(
get_ai_content,
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, lesson_plan_time, lesson_plan_content_type_name],
outputs=[lesson_plan_exam_result_original, lesson_plan_exam_result, lesson_plan_prompt, lesson_plan_exam_result_prompt]
)
exit_ticket_btn.click(
get_ai_content,
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, exit_ticket_time, exit_ticket_content_type_name],
outputs=[exit_ticket_exam_result_original, exit_ticket_exam_result, exit_ticket_prompt, exit_ticket_exam_result_prompt]
)
# ็”Ÿๆˆ็ตๆžœๅพฎ่ชฟ
worksheet_exam_result_fine_tune_btn.click(
generate_exam_fine_tune_result,
inputs=[password, worksheet_exam_result_prompt, df_string_output, worksheet_exam_result, worksheet_exam_result_fine_tune_prompt],
outputs=[worksheet_exam_result]
)
worksheet_download_exam_result_button.click(
download_exam_result,
inputs=[worksheet_exam_result],
outputs=[worksheet_exam_result_word_link]
)
worksheet_exam_result_retrun_original.click(
return_original_exam_result,
inputs=[worksheet_exam_result_original],
outputs=[worksheet_exam_result]
)
lesson_plan_exam_result_fine_tune_btn.click(
generate_exam_fine_tune_result,
inputs=[password, lesson_plan_exam_result_prompt, df_string_output, lesson_plan_exam_result, lesson_plan_exam_result_fine_tune_prompt],
outputs=[lesson_plan_exam_result]
)
lesson_plan_download_exam_result_button.click(
download_exam_result,
inputs=[lesson_plan_exam_result],
outputs=[lesson_plan_exam_result_word_link]
)
lesson_plan_exam_result_retrun_original.click(
return_original_exam_result,
inputs=[lesson_plan_exam_result_original],
outputs=[lesson_plan_exam_result]
)
exit_ticket_exam_result_fine_tune_btn.click(
generate_exam_fine_tune_result,
inputs=[password, exit_ticket_exam_result_prompt, df_string_output, exit_ticket_exam_result, exit_ticket_exam_result_fine_tune_prompt],
outputs=[exit_ticket_exam_result]
)
exit_ticket_download_exam_result_button.click(
download_exam_result,
inputs=[exit_ticket_exam_result],
outputs=[exit_ticket_exam_result_word_link]
)
exit_ticket_exam_result_retrun_original.click(
return_original_exam_result,
inputs=[exit_ticket_exam_result_original],
outputs=[exit_ticket_exam_result]
)
# init_params
init_outputs = [
admin,
reading_passage_admin,
summary_admmin,
see_details,
worksheet_accordion,
lesson_plan_accordion,
exit_ticket_accordion,
password,
youtube_link,
chatbot_open_ai,
chatbot_open_ai_streaming,
chatbot_jutor
]
demo.load(
init_params,
inputs =[youtube_link],
outputs = init_outputs
)
demo.launch(allowed_paths=["videos"])