Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
from docx import Document | |
import os | |
from openai import OpenAI | |
from groq import Groq | |
import uuid | |
from gtts import gTTS | |
import math | |
from pydub import AudioSegment | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api._errors import NoTranscriptFound | |
import yt_dlp | |
from moviepy.editor import VideoFileClip | |
from pytube import YouTube | |
import os | |
import io | |
import time | |
import json | |
from urllib.parse import urlparse, parse_qs | |
from google.cloud import storage | |
from google.oauth2 import service_account | |
from googleapiclient.discovery import build | |
from googleapiclient.http import MediaFileUpload | |
from googleapiclient.http import MediaIoBaseDownload | |
from googleapiclient.http import MediaIoBaseUpload | |
from educational_material import EducationalMaterial | |
from storage_service import GoogleCloudStorage | |
import boto3 | |
from chatbot import Chatbot | |
is_env_local = os.getenv("IS_ENV_LOCAL", "false") == "true" | |
print(f"is_env_local: {is_env_local}") | |
print("===gr__version__===") | |
print(gr.__version__) | |
if is_env_local: | |
with open("local_config.json") as f: | |
config = json.load(f) | |
PASSWORD = config["PASSWORD"] | |
GCS_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"]) | |
DRIVE_KEY = json.dumps(config["GOOGLE_APPLICATION_CREDENTIALS_JSON"]) | |
OPEN_AI_KEY = config["OPEN_AI_KEY"] | |
GROQ_API_KEY = config["GROQ_API_KEY"] | |
JUTOR_CHAT_KEY = config["JUTOR_CHAT_KEY"] | |
AWS_ACCESS_KEY = config["AWS_ACCESS_KEY"] | |
AWS_SECRET_KEY = config["AWS_SECRET_KEY"] | |
AWS_REGION_NAME = config["AWS_REGION_NAME"] | |
OUTPUT_PATH = config["OUTPUT_PATH"] | |
else: | |
PASSWORD = os.getenv("PASSWORD") | |
GCS_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON") | |
DRIVE_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON") | |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY") | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
JUTOR_CHAT_KEY = os.getenv("JUTOR_CHAT_KEY") | |
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY") | |
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY") | |
AWS_REGION_NAME = 'us-west-2' | |
OUTPUT_PATH = 'videos' | |
TRANSCRIPTS = [] | |
CURRENT_INDEX = 0 | |
OPEN_AI_CLIENT = OpenAI(api_key=OPEN_AI_KEY) | |
GROQ_CLIENT = Groq(api_key=GROQ_API_KEY) | |
GCS_SERVICE = GoogleCloudStorage(GCS_KEY) | |
GCS_CLIENT = GCS_SERVICE.client | |
BEDROCK_CLIENT = boto3.client( | |
service_name="bedrock-runtime", | |
aws_access_key_id=AWS_ACCESS_KEY, | |
aws_secret_access_key=AWS_SECRET_KEY, | |
region_name=AWS_REGION_NAME, | |
) | |
# ้ฉ่ญ password | |
def verify_password(password): | |
if password == PASSWORD: | |
return True | |
else: | |
raise gr.Error("ๅฏ็ขผ้ฏ่ชค") | |
# ====gcs==== | |
def gcs_check_file_exists(gcs_client, bucket_name, file_name): | |
""" | |
ๆฃๆฅ GCS ๅญๅจๆกถไธญๆฏๅฆๅญๅจๆๅฎ็ๆไปถ | |
file_name ๆ ผๅผ๏ผ{folder_name}/{file_name} | |
""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(file_name) | |
return blob.exists() | |
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path): | |
"""ไธไผ ๆไปถๅฐๆๅฎ็ GCS ๅญๅจๆกถ""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(destination_blob_name) | |
blob.upload_from_filename(file_path) | |
print(f"File {file_path} uploaded to {destination_blob_name} in GCS.") | |
def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string): | |
"""ไธไผ ๅญ็ฌฆไธฒๅฐๆๅฎ็ GCS ๅญๅจๆกถ""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(destination_blob_name) | |
blob.upload_from_string(json_string) | |
print(f"JSON string uploaded to {destination_blob_name} in GCS.") | |
def download_blob_to_string(gcs_client, bucket_name, source_blob_name): | |
"""ไป GCS ไธ่ฝฝๆไปถๅ ๅฎนๅฐๅญ็ฌฆไธฒ""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(source_blob_name) | |
return blob.download_as_text() | |
def make_blob_public(gcs_client, bucket_name, blob_name): | |
"""ๅฐๆๅฎ็ GCS ๅฏน่ฑก่ฎพ็ฝฎไธบๅ ฌๅ ฑๅฏ่ฏป""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(blob_name) | |
blob.make_public() | |
print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}") | |
def get_blob_public_url(gcs_client, bucket_name, blob_name): | |
"""่ทๅๆๅฎ GCS ๅฏน่ฑก็ๅ ฌๅผ URL""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(blob_name) | |
return blob.public_url | |
def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path): | |
"""ไธไผ ๅพ็ๅฐ GCS ๅนถ่ทๅๅ ถๅ ฌๅผ URL""" | |
# ไธไผ ๅพ็ | |
upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path) | |
# ๅฐไธไผ ็ๅพ็่ฎพ็ฝฎไธบๅ ฌๅผ | |
make_blob_public(gcs_client, bucket_name, file_name) | |
# ่ทๅๅพ็็ๅ ฌๅผ URL | |
public_url = get_blob_public_url(gcs_client, bucket_name, file_name) | |
print(f"Public URL for the uploaded image: {public_url}") | |
return public_url | |
def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name): | |
# Get all files from the folder | |
query = f"'{drive_folder_id}' in parents and trashed = false" | |
response = drive_service.files().list(q=query).execute() | |
files = response.get('files', []) | |
for file in files: | |
# Copy each file to GCS | |
file_id = file['id'] | |
file_name = file['name'] | |
gcs_destination_path = f"{gcs_folder_name}/{file_name}" | |
copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path) | |
def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path): | |
# Download file content from Drive | |
request = drive_service.files().get_media(fileId=file_id) | |
fh = io.BytesIO() | |
downloader = MediaIoBaseDownload(fh, request) | |
done = False | |
while not done: | |
status, done = downloader.next_chunk() | |
fh.seek(0) | |
file_content = fh.getvalue() | |
# Upload file content to GCS | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(gcs_destination_path) | |
blob.upload_from_string(file_content) | |
print(f"File {file_id} copied to GCS at {gcs_destination_path}.") | |
def delete_blob(gcs_client, bucket_name, blob_name): | |
"""ๅ ้คๆๅฎ็ GCS ๅฏน่ฑก""" | |
bucket = gcs_client.bucket(bucket_name) | |
blob = bucket.blob(blob_name) | |
blob.delete() | |
print(f"Blob {blob_name} deleted from GCS.") | |
# # ====drive====ๅๅงๅ | |
def init_drive_service(): | |
credentials_json_string = DRIVE_KEY | |
credentials_dict = json.loads(credentials_json_string) | |
SCOPES = ['https://www.googleapis.com/auth/drive'] | |
credentials = service_account.Credentials.from_service_account_info( | |
credentials_dict, scopes=SCOPES) | |
service = build('drive', 'v3', credentials=credentials) | |
return service | |
def create_folder_if_not_exists(service, folder_name, parent_id): | |
print("ๆฃๆฅๆฏๅฆๅญๅจ็นๅฎๅ็งฐ็ๆไปถๅคน๏ผๅฆๆไธๅญๅจๅๅๅปบ") | |
query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false" | |
response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute() | |
folders = response.get('files', []) | |
if not folders: | |
# ๆไปถๅคนไธๅญๅจ๏ผๅๅปบๆฐๆไปถๅคน | |
file_metadata = { | |
'name': folder_name, | |
'mimeType': 'application/vnd.google-apps.folder', | |
'parents': [parent_id] | |
} | |
folder = service.files().create(body=file_metadata, fields='id').execute() | |
return folder.get('id') | |
else: | |
# ๆไปถๅคนๅทฒๅญๅจ | |
return folders[0]['id'] | |
# ๆฃๆฅGoogle Driveไธๆฏๅฆๅญๅจๆไปถ | |
def check_file_exists(service, folder_name, file_name): | |
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false" | |
response = service.files().list(q=query).execute() | |
files = response.get('files', []) | |
return len(files) > 0, files[0]['id'] if files else None | |
def upload_content_directly(service, file_name, folder_id, content): | |
""" | |
็ดๆฅๅฐๅ ๅฎนไธไผ ๅฐGoogle Driveไธญ็ๆฐๆไปถใ | |
""" | |
if not file_name: | |
raise ValueError("ๆไปถๅไธ่ฝไธบ็ฉบ") | |
if not folder_id: | |
raise ValueError("ๆไปถๅคนIDไธ่ฝไธบ็ฉบ") | |
if content is None: # ๅ ่ฎธ็ฉบๅญ็ฌฆไธฒไธไผ ๏ผไฝไธๅ ่ฎธNone | |
raise ValueError("ๅ ๅฎนไธ่ฝไธบ็ฉบ") | |
file_metadata = {'name': file_name, 'parents': [folder_id]} | |
# ไฝฟ็จio.BytesIOไธบๆๆฌๅ ๅฎนๅๅปบไธไธชๅ ๅญไธญ็ๆไปถๅฏน่ฑก | |
try: | |
with io.BytesIO(content.encode('utf-8')) as fh: | |
media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True) | |
print("==content==") | |
print(content) | |
print("==content==") | |
print("==media==") | |
print(media) | |
print("==media==") | |
# ๆง่กไธไผ | |
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute() | |
return file.get('id') | |
except Exception as e: | |
print(f"ไธไผ ๆไปถๆถๅ็้่ฏฏ: {e}") | |
raise # ้ๆฐๆๅบๅผๅธธ๏ผ่ฐ็จ่ ๅฏไปฅๆ นๆฎ้่ฆๅค็ๆๅฟฝ็ฅ | |
def upload_file_directly(service, file_name, folder_id, file_path): | |
# ไธๅณ .json to Google Drive | |
file_metadata = {'name': file_name, 'parents': [folder_id]} | |
media = MediaFileUpload(file_path, mimetype='application/json') | |
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute() | |
# return file.get('id') # ่ฟๅๆไปถID | |
return True | |
def upload_img_directly(service, file_name, folder_id, file_path): | |
file_metadata = {'name': file_name, 'parents': [folder_id]} | |
media = MediaFileUpload(file_path, mimetype='image/jpeg') | |
file = service.files().create(body=file_metadata, media_body=media, fields='id').execute() | |
return file.get('id') # ่ฟๅๆไปถID | |
def download_file_as_string(service, file_id): | |
""" | |
ไปGoogle Driveไธ่ฝฝๆไปถๅนถๅฐๅ ถไฝไธบๅญ็ฌฆไธฒ่ฟๅใ | |
""" | |
request = service.files().get_media(fileId=file_id) | |
fh = io.BytesIO() | |
downloader = MediaIoBaseDownload(fh, request) | |
done = False | |
while done is False: | |
status, done = downloader.next_chunk() | |
fh.seek(0) | |
content = fh.read().decode('utf-8') | |
return content | |
def set_public_permission(service, file_id): | |
service.permissions().create( | |
fileId=file_id, | |
body={"type": "anyone", "role": "reader"}, | |
fields='id', | |
).execute() | |
def update_file_on_drive(service, file_id, file_content): | |
""" | |
ๆดๆฐGoogle Driveไธ็ๆไปถๅ ๅฎนใ | |
ๅๆฐ: | |
- service: Google Drive APIๆๅกๅฎไพใ | |
- file_id: ่ฆๆดๆฐ็ๆไปถ็IDใ | |
- file_content: ๆฐ็ๆไปถๅ ๅฎน๏ผๅญ็ฌฆไธฒๆ ผๅผใ | |
""" | |
# ๅฐๆฐ็ๆไปถๅ ๅฎน่ฝฌๆขไธบๅญ่ๆต | |
fh = io.BytesIO(file_content.encode('utf-8')) | |
media = MediaIoBaseUpload(fh, mimetype='application/json', resumable=True) | |
# ๆดๆฐๆไปถ | |
updated_file = service.files().update( | |
fileId=file_id, | |
media_body=media | |
).execute() | |
print(f"ๆไปถๅทฒๆดๆฐ๏ผๆไปถID: {updated_file['id']}") | |
# ---- Text file ---- | |
def process_file(password, file): | |
verify_password(password) | |
# ่ฏปๅๆไปถ | |
if file.name.endswith('.csv'): | |
df = pd.read_csv(file) | |
text = df_to_text(df) | |
elif file.name.endswith('.xlsx'): | |
df = pd.read_excel(file) | |
text = df_to_text(df) | |
elif file.name.endswith('.docx'): | |
text = docx_to_text(file) | |
else: | |
raise ValueError("Unsupported file type") | |
df_string = df.to_string() | |
# ๅฎ่ญ๏ผ็งป้ค@XX@็ฌฆๅท to | | |
df_string = df_string.replace("@XX@", "|") | |
# ๆ นๆฎไธไผ ็ๆไปถๅ ๅฎน็ๆ้ฎ้ข | |
questions = generate_questions(df_string) | |
summary = generate_summarise(df_string) | |
# ่ฟๅๆ้ฎๆๆฌๅ DataFrame ๅญ็ฌฆไธฒ | |
return questions[0] if len(questions) > 0 else "", \ | |
questions[1] if len(questions) > 1 else "", \ | |
questions[2] if len(questions) > 2 else "", \ | |
summary, \ | |
df_string | |
def df_to_text(df): | |
# ๅฐ DataFrame ่ฝฌๆขไธบ็บฏๆๆฌ | |
return df.to_string() | |
def docx_to_text(file): | |
# ๅฐ Word ๆๆกฃ่ฝฌๆขไธบ็บฏๆๆฌ | |
doc = Document(file) | |
return "\n".join([para.text for para in doc.paragraphs]) | |
# ---- YouTube link ---- | |
def format_seconds_to_time(seconds): | |
"""ๅฐ็งๆฐๆ ผๅผๅไธบ ๆถ:ๅ:็ง ็ๅฝขๅผ""" | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds = int(seconds % 60) | |
return f"{hours:02}:{minutes:02}:{seconds:02}" | |
def extract_youtube_id(url): | |
parsed_url = urlparse(url) | |
if "youtube.com" in parsed_url.netloc: | |
# ๅฏนไบๆ ๅ้พๆฅ๏ผ่ง้ขIDๅจๆฅ่ฏขๅๆฐ'v'ไธญ | |
query_params = parse_qs(parsed_url.query) | |
return query_params.get("v")[0] if "v" in query_params else None | |
elif "youtu.be" in parsed_url.netloc: | |
# ๅฏนไบ็ญ้พๆฅ๏ผ่ง้ขIDๆฏ่ทฏๅพ็ไธ้จๅ | |
return parsed_url.path.lstrip('/') | |
else: | |
return None | |
def get_transcript_by_yt_api(video_id): | |
languages = ['zh-TW', 'zh-Hant', 'zh', 'en'] # ๅชๅ ้ ๅบๅ่กจ | |
for language in languages: | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language]) | |
print("===transcript===") | |
print(transcript) | |
print("===transcript===") | |
return transcript # ๆๅ็ฒๅๅญๅน๏ผ็ดๆฅ่ฟๅ็ตๆ | |
except NoTranscriptFound: | |
continue # ็ถๅ่ช่จ็ๅญๅนๆฒๆๆพๅฐ๏ผ็นผ็บๅ่ฉฆไธไธๅ่ช่จ | |
return None # ๆๆๅ่ฉฆ้ฝๅคฑๆ๏ผ่ฟๅNone | |
def generate_transcription_by_whisper(video_id): | |
youtube_url = f'https://www.youtube.com/watch?v={video_id}' | |
codec_name = "mp3" | |
outtmpl = f"{OUTPUT_PATH}/{video_id}.%(ext)s" | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': codec_name, | |
'preferredquality': '192' | |
}], | |
'outtmpl': outtmpl, | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([youtube_url]) | |
audio_path = f"{OUTPUT_PATH}/{video_id}.{codec_name}" | |
full_audio = AudioSegment.from_mp3(audio_path) | |
max_part_duration = 10 * 60 * 1000 # 10 minutes | |
full_duration = len(full_audio) # in milliseconds | |
parts = math.ceil(full_duration / max_part_duration) | |
print(f"parts: {parts}") | |
transcription = [] | |
for i in range(parts): | |
print(f"== i: {i}==") | |
start_time = i * max_part_duration | |
end_time = min((i + 1) * max_part_duration, full_duration) | |
print(f"time: {start_time/1000} - {end_time/1000}") | |
chunk = full_audio[start_time:end_time] | |
chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}" | |
chunk.export(chunk_path, format=codec_name) | |
try: | |
with open(chunk_path, "rb") as chunk_file: | |
response = OPEN_AI_CLIENT.audio.transcriptions.create( | |
model="whisper-1", | |
file=chunk_file, | |
response_format="verbose_json", | |
timestamp_granularities=["segment"], | |
prompt="Transcribe the following audio file. if content is chinese, please using 'language: zh-TW' ", | |
) | |
# Adjusting the timestamps for the chunk based on its position in the full audio | |
adjusted_segments = [{ | |
'text': segment['text'], | |
'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds | |
'end': math.ceil(segment['end'] + start_time / 1000.0), | |
'duration': math.ceil(segment['end'] - segment['start']) | |
} for segment in response.segments] | |
transcription.extend(adjusted_segments) | |
except Exception as e: | |
print(f"Error processing chunk {i}: {str(e)}") | |
# Remove temporary chunk files after processing | |
os.remove(chunk_path) | |
return transcription | |
def process_transcript_and_screenshots_on_gcs(video_id): | |
print("====process_transcript_and_screenshots_on_gcs====") | |
# GCS | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
# ้ๅญ็จฟๆไปถๅ | |
transcript_file_name = f'{video_id}_transcript.json' | |
transcript_blob_name = f"{video_id}/{transcript_file_name}" | |
# ๆฃๆฅ้ๅญ็จฟๆฏๅฆๅญๅจ | |
is_new_transcript = False | |
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name) | |
if not is_transcript_exists: | |
print("้ๅญ็จฟๆไปถไธๅญๅจไบGCSไธญ๏ผ้ๆฐๅปบ็ซ") | |
# ไปYouTube่ทๅ้ๅญ็จฟๅนถไธไผ | |
try: | |
transcript = get_transcript_by_yt_api(video_id) | |
except: | |
# call open ai whisper | |
print("===call open ai whisper===") | |
transcript = generate_transcription_by_whisper(video_id) | |
if transcript: | |
print("ๆๅ็ฒๅๅญๅน") | |
else: | |
print("ๆฒๆๆพๅฐๅญๅน") | |
transcript = generate_transcription_by_whisper(video_id) | |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text) | |
is_new_transcript = True | |
else: | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("้ๅญ็จฟๅทฒๅญๅจไบGCSไธญ") | |
transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name) | |
transcript = json.loads(transcript_text) | |
# print("===็ขบ่ชๅ ถไป่ก็ๆไปถ===") | |
# source = "gcs" | |
# get_questions(video_id, transcript_text, source) | |
# get_video_id_summary(video_id, transcript_text, source) | |
# get_mind_map(video_id, transcript_text, source) | |
# print("===็ขบ่ชๅ ถไป่ก็ๆไปถ end ===") | |
# ่็ๆชๅ | |
for entry in transcript: | |
if 'img_file_id' not in entry: | |
# ๆชขๆฅ OUTPUT_PATH ๆฏๅฆๅญๅจ video_id.mp4 | |
video_path = f'{OUTPUT_PATH}/{video_id}.mp4' | |
if not os.path.exists(video_path): | |
# try 5 times ๅฆๆ้ฝๅคฑๆๅฐฑ raise | |
for i in range(5): | |
try: | |
download_youtube_video(video_id) | |
break | |
except Exception as e: | |
if i == 4: | |
raise gr.Error(f"ไธ่ฝฝ่ง้ขๅคฑ่ดฅ: {str(e)}") | |
time.sleep(5) | |
# ๆชๅพ | |
screenshot_path = screenshot_youtube_video(video_id, entry['start']) | |
screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg" | |
img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path) | |
entry['img_file_id'] = img_file_id | |
print(f"ๆชๅพๅทฒไธไผ ๅฐGCS: {img_file_id}") | |
is_new_transcript = True | |
# ็ขบ่ชๆฏๅฆๆดๆฐ้ๅญ็จฟๆไปถ | |
if is_new_transcript: | |
# ๆดๆฐ้ๅญ็จฟๆไปถ | |
print("===ๆดๆฐ้ๅญ็จฟๆไปถ===") | |
print(transcript) | |
print("===ๆดๆฐ้ๅญ็จฟๆไปถ===") | |
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text) | |
print("้ๅญ็จฟๅทฒๆดๆฐ๏ผๅ ๆฌๆชๅพ้พๆฅ") | |
updated_transcript_json = json.loads(updated_transcript_text) | |
else: | |
updated_transcript_json = transcript | |
return updated_transcript_json | |
def process_youtube_link(password, link): | |
verify_password(password) | |
# ไฝฟ็จ YouTube API ่ทๅ้ๅญ็จฟ | |
# ๅ่ฎพๆจๅทฒ็ป่ทๅไบ YouTube ่ง้ข็้ๅญ็จฟๅนถๅญๅจๅจๅ้ `transcript` ไธญ | |
video_id = extract_youtube_id(link) | |
try: | |
transcript = process_transcript_and_screenshots_on_gcs(video_id) | |
except Exception as e: | |
error_msg = f" {video_id} ้ๅญ็จฟ้ฏ่ชค: {str(e)}" | |
print("===process_youtube_link error===") | |
print(error_msg) | |
raise gr.Error(error_msg) | |
original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2) | |
formatted_transcript = [] | |
formatted_simple_transcript =[] | |
for entry in transcript: | |
start_time = format_seconds_to_time(entry['start']) | |
end_time = format_seconds_to_time(entry['start'] + entry['duration']) | |
embed_url = get_embedded_youtube_link(video_id, entry['start']) | |
img_file_id = entry['img_file_id'] | |
screenshot_path = img_file_id | |
line = { | |
"start_time": start_time, | |
"end_time": end_time, | |
"text": entry['text'], | |
"embed_url": embed_url, | |
"screenshot_path": screenshot_path | |
} | |
formatted_transcript.append(line) | |
# formatted_simple_transcript ๅช่ฆ start_time, end_time, text | |
simple_line = { | |
"start_time": start_time, | |
"end_time": end_time, | |
"text": entry['text'] | |
} | |
formatted_simple_transcript.append(simple_line) | |
global TRANSCRIPTS | |
TRANSCRIPTS = formatted_transcript | |
# ๅบไบ้ๅญ็จฟ็ๆๅ ถไปๆ้็่พๅบ | |
source = "gcs" | |
questions = get_questions(video_id, formatted_simple_transcript, source) | |
questions_json = json.dumps(questions, ensure_ascii=False, indent=2) | |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source) | |
summary_text = summary_json["summary"] | |
summary = summary_json["summary"] | |
key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source) | |
key_moments = key_moments_json["key_moments"] | |
key_moments_text = json.dumps(key_moments, ensure_ascii=False, indent=2) | |
key_moments_html = get_key_moments_html(key_moments) | |
html_content = format_transcript_to_html(formatted_transcript) | |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript) | |
first_image = formatted_transcript[0]['screenshot_path'] | |
# first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png" | |
first_text = formatted_transcript[0]['text'] | |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source) | |
mind_map = mind_map_json["mind_map"] | |
mind_map_html = get_mind_map_html(mind_map) | |
reading_passage_json = get_reading_passage(video_id, formatted_simple_transcript, source) | |
reading_passage_text = reading_passage_json["reading_passage"] | |
reading_passage = reading_passage_json["reading_passage"] | |
meta_data = get_meta_data(video_id) | |
subject = meta_data["subject"] | |
grade = meta_data["grade"] | |
# ็กฎไฟ่ฟๅไธ UI ็ปไปถ้ขๆๅน้ ็่พๅบ | |
return video_id, \ | |
questions_json, \ | |
questions[0] if len(questions) > 0 else "", \ | |
questions[1] if len(questions) > 1 else "", \ | |
questions[2] if len(questions) > 2 else "", \ | |
original_transcript, \ | |
summary_text, \ | |
summary, \ | |
key_moments_text, \ | |
key_moments_html, \ | |
mind_map, \ | |
mind_map_html, \ | |
html_content, \ | |
simple_html_content, \ | |
first_image, \ | |
first_text, \ | |
reading_passage_text, \ | |
reading_passage, \ | |
subject, \ | |
grade | |
def create_formatted_simple_transcript(transcript): | |
formatted_simple_transcript = [] | |
for entry in transcript: | |
start_time = format_seconds_to_time(entry['start']) | |
end_time = format_seconds_to_time(entry['start'] + entry['duration']) | |
line = { | |
"start_time": start_time, | |
"end_time": end_time, | |
"text": entry['text'] | |
} | |
formatted_simple_transcript.append(line) | |
return formatted_simple_transcript | |
def create_formatted_transcript(video_id, transcript): | |
formatted_transcript = [] | |
for entry in transcript: | |
start_time = format_seconds_to_time(entry['start']) | |
end_time = format_seconds_to_time(entry['start'] + entry['duration']) | |
embed_url = get_embedded_youtube_link(video_id, entry['start']) | |
img_file_id = entry['img_file_id'] | |
screenshot_path = img_file_id | |
line = { | |
"start_time": start_time, | |
"end_time": end_time, | |
"text": entry['text'], | |
"embed_url": embed_url, | |
"screenshot_path": screenshot_path | |
} | |
formatted_transcript.append(line) | |
return formatted_transcript | |
def format_transcript_to_html(formatted_transcript): | |
html_content = "" | |
for entry in formatted_transcript: | |
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>" | |
html_content += f"<p>{entry['text']}</p>" | |
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />" | |
return html_content | |
def format_simple_transcript_to_html(formatted_transcript): | |
html_content = "" | |
for entry in formatted_transcript: | |
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>" | |
html_content += f"<p>{entry['text']}</p>" | |
return html_content | |
def get_embedded_youtube_link(video_id, start_time): | |
int_start_time = int(start_time) | |
embed_url = f"https://www.youtube.com/embed/{video_id}?start={int_start_time}&autoplay=1" | |
return embed_url | |
def download_youtube_video(youtube_id, output_path=OUTPUT_PATH): | |
# Construct the full YouTube URL | |
youtube_url = f'https://www.youtube.com/watch?v={youtube_id}' | |
# Create the output directory if it doesn't exist | |
if not os.path.exists(output_path): | |
os.makedirs(output_path) | |
# Download the video | |
yt = YouTube(youtube_url) | |
video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() | |
video_stream.download(output_path=output_path, filename=youtube_id+".mp4") | |
print(f"Video downloaded successfully: {output_path}/{youtube_id}.mp4") | |
def screenshot_youtube_video(youtube_id, snapshot_sec): | |
video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4' | |
file_name = f"{youtube_id}_{snapshot_sec}.jpg" | |
with VideoFileClip(video_path) as video: | |
screenshot_path = f'{OUTPUT_PATH}/{file_name}' | |
video.save_frame(screenshot_path, snapshot_sec) | |
return screenshot_path | |
# ---- Web ---- | |
# def process_web_link(link): | |
# # ๆๅๅ่งฃๆ็ฝ้กตๅ ๅฎน | |
# response = requests.get(link) | |
# soup = BeautifulSoup(response.content, 'html.parser') | |
# return soup.get_text() | |
# ---- LLM Generator ---- | |
def get_reading_passage(video_id, df_string, source): | |
if source == "gcs": | |
print("===get_reading_passage on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_reading_passage.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅ reading_passage ๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
reading_passage = generate_reading_passage(df_string) | |
reading_passage_json = {"reading_passage": str(reading_passage)} | |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text) | |
print("reading_passageๅทฒไธไผ ๅฐGCS") | |
else: | |
# reading_passageๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("reading_passageๅทฒๅญๅจไบGCSไธญ") | |
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
reading_passage_json = json.loads(reading_passage_text) | |
elif source == "drive": | |
print("===get_reading_passage on drive===") | |
service = init_drive_service() | |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' | |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id) | |
file_name = f'{video_id}_reading_passage.json' | |
# ๆฃๆฅ reading_passage ๆฏๅฆๅญๅจ | |
exists, file_id = check_file_exists(service, folder_id, file_name) | |
if not exists: | |
reading_passage = generate_reading_passage(df_string) | |
reading_passage_json = {"reading_passage": str(reading_passage)} | |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2) | |
upload_content_directly(service, file_name, folder_id, reading_passage_text) | |
print("reading_passageๅทฒไธๅณๅฐGoogle Drive") | |
else: | |
# reading_passageๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("reading_passageๅทฒๅญๅจไบGoogle Driveไธญ") | |
reading_passage_text = download_file_as_string(service, file_id) | |
return reading_passage_json | |
def generate_reading_passage(df_string): | |
# ไฝฟ็จ OpenAI ็ๆๅบไบไธไผ ๆฐๆฎ็้ฎ้ข | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {df_string} | |
ๆๆฌ่ช่กๅคๆท่ณๆ็็จฎ้ก | |
ๅนซๆ็ตๅๆ Reading Passage | |
ไธฆๆฝค็จฟ่ฎๆๅฅ้้ | |
่ซไธๅฎ่ฆไฝฟ็จ็น้ซไธญๆ zh-TW๏ผไธฆ็จๅฐ็ฃไบบ็ๅฃ่ช | |
็ข็็็ตๆไธ่ฆๅๅพๆ่งฃ้๏ผไนไธ่ฆๆ่ฟฐ้็ฏๆ็ซ ๆ้บผ็ข็็ | |
ๅช้่ฆๅฐๆณจๆไพ Reading Passage๏ผๅญๆธๅจ 500 ๅญไปฅๅ ง | |
ๆ่ฟฐไธญ๏ผ่ซๆๆธๅญธๆๆฏๅฐๆฅญ่ก่ช๏ผ็จ Latex ๅ ่ฆ๏ผ$...$๏ผ๏ผไธฆไธไธ่ฆๅปๆนๅๆฌ็ๆ็ซ | |
ๅ ๆธไน้คใๆ น่ใๆฌกๆน็ญ็ญ็้็ฎๅผๅฃ่ชไนๆๆ LATEX ๆธๅญธ็ฌฆ่ | |
""" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
reading_passage = response.choices[0].message.content.strip() | |
print("=====reading_passage=====") | |
print(reading_passage) | |
print("=====reading_passage=====") | |
return reading_passage | |
def text_to_speech(video_id, text): | |
tts = gTTS(text, lang='en') | |
filename = f'{video_id}_reading_passage.mp3' | |
tts.save(filename) | |
return filename | |
def get_mind_map(video_id, df_string, source): | |
if source == "gcs": | |
print("===get_mind_map on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_mind_map.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
mind_map = generate_mind_map(df_string) | |
mind_map_json = {"mind_map": str(mind_map)} | |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text) | |
print("mind_mapๅทฒไธๅณๅฐGCS") | |
else: | |
# mindmapๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("mind_mapๅทฒๅญๅจไบGCSไธญ") | |
mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
mind_map_json = json.loads(mind_map_text) | |
elif source == "drive": | |
print("===get_mind_map on drive===") | |
service = init_drive_service() | |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' | |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id) | |
file_name = f'{video_id}_mind_map.json' | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
exists, file_id = check_file_exists(service, folder_id, file_name) | |
if not exists: | |
mind_map = generate_mind_map(df_string) | |
mind_map_json = {"mind_map": str(mind_map)} | |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2) | |
upload_content_directly(service, file_name, folder_id, mind_map_text) | |
print("mind_mapๅทฒไธๅณๅฐGoogle Drive") | |
else: | |
# mindmapๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("mind_mapๅทฒๅญๅจไบGoogle Driveไธญ") | |
mind_map_text = download_file_as_string(service, file_id) | |
mind_map_json = json.loads(mind_map_text) | |
return mind_map_json | |
def generate_mind_map(df_string): | |
# ไฝฟ็จ OpenAI ็ๆๅบไบไธไผ ๆฐๆฎ็้ฎ้ข | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {df_string} ๆๆฌๅปบ็ซ markdown ๅฟๆบๅ | |
ๆณจๆ๏ผไธ้่ฆๅๅพๆๆ่ฟฐ๏ผ็ดๆฅ็ตฆๅบ markdown ๆๆฌๅณๅฏ | |
้ๅฐๆๅพ้่ฆ | |
""" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
mind_map = response.choices[0].message.content.strip() | |
print("=====mind_map=====") | |
print(mind_map) | |
print("=====mind_map=====") | |
return mind_map | |
def get_mind_map_html(mind_map): | |
mind_map_markdown = mind_map.replace("```markdown", "").replace("```", "") | |
mind_map_html = f""" | |
<div class="markmap"> | |
<script type="text/template"> | |
{mind_map_markdown} | |
</script> | |
</div> | |
""" | |
return mind_map_html | |
def get_video_id_summary(video_id, df_string, source): | |
if source == "gcs": | |
print("===get_video_id_summary on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_summary.json' | |
summary_file_blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅ summary_file ๆฏๅฆๅญๅจ | |
is_summary_file_exists = GCS_SERVICE.check_file_exists(bucket_name, summary_file_blob_name) | |
if not is_summary_file_exists: | |
summary = generate_summarise(df_string) | |
summary_json = {"summary": str(summary)} | |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text) | |
print("summaryๅทฒไธไผ ๅฐGCS") | |
else: | |
# summaryๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("summaryๅทฒๅญๅจไบGCSไธญ") | |
summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name) | |
summary_json = json.loads(summary_text) | |
elif source == "drive": | |
print("===get_video_id_summary===") | |
service = init_drive_service() | |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' | |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id) | |
file_name = f'{video_id}_summary.json' | |
# ๆฃๆฅ้ๅญ็จฟๆฏๅฆๅญๅจ | |
exists, file_id = check_file_exists(service, folder_id, file_name) | |
if not exists: | |
summary = generate_summarise(df_string) | |
summary_json = {"summary": str(summary)} | |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2) | |
try: | |
upload_content_directly(service, file_name, folder_id, summary_text) | |
print("summaryๅทฒไธๅณๅฐGoogle Drive") | |
except Exception as e: | |
error_msg = f" {video_id} ๆ่ฆ้ฏ่ชค: {str(e)}" | |
print("===get_video_id_summary error===") | |
print(error_msg) | |
print("===get_video_id_summary error===") | |
else: | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("summaryๅทฒๅญๅจGoogle Driveไธญ") | |
summary_text = download_file_as_string(service, file_id) | |
summary_json = json.loads(summary_text) | |
return summary_json | |
def generate_summarise(df_string): | |
# ไฝฟ็จ OpenAI ็ๆๅบไบไธไผ ๆฐๆฎ็้ฎ้ข | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {df_string}๏ผๅคๆท้ไปฝๆๆฌ | |
ๆ ผๅผ็บ Markdown | |
ๆด้ซๆ่ฆๅจไธ็พๅญไปฅๅ ง | |
้้ปๆฆๅฟตๅๅบ bullet points๏ผ่ณๅฐไธๅ๏ผๆๅคไบๅ | |
ไปฅๅๅฏ่ฝ็็ต่ซ่็ตๅฐพๅปถไผธๅฐๅ้กๆไพๅญธ็ไฝๅๆ | |
ๆ่ฟฐไธญ๏ผ่ซๆๆธๅญธๆๆฏๅฐๆฅญ่ก่ช๏ผ็จ Latex ๅ ่ฆ๏ผ$...$๏ผ | |
ๅ ๆธไน้คใๆ น่ใๆฌกๆน็ญ็ญ็้็ฎๅผๅฃ่ชไนๆๆ LATEX ๆธๅญธ็ฌฆ่ | |
ๆด้ซๆ ผๅผ็บ๏ผ | |
## ๐ ๆด้ซๆ่ฆ | |
- (ไธๅ bullet point....) | |
## ๐ ้้ปๆฆๅฟต | |
- xxx | |
- xxx | |
- xxx | |
## ๐ก ็บไป้บผๆๅ่ฆๅญธ้ๅ๏ผ | |
- (ไธๅ bullet point....) | |
## โ ๅปถไผธๅฐๅ้ก | |
- (ไธๅ bullet point....) | |
""" | |
# ๐๏ธ 1. ๅ งๅฎน้กๅ๏ผ๏ผ | |
# ๐ 2. ๆด้ซๆ่ฆ | |
# ๐ 3. ๆขๅๅผ้้ป | |
# ๐ 4. ้้ตๆๅป๏ผๆฎต่ฝๆ่ฆ๏ผ | |
# ๐ก 5. ็ต่ซๅๆ๏ผ็บไป้บผๆๅ่ฆๅญธ้ๅ๏ผ๏ผ | |
# โ 6. ๅปถไผธๅฐๅ้ก | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
df_summarise = response.choices[0].message.content.strip() | |
print("=====df_summarise=====") | |
print(df_summarise) | |
print("=====df_summarise=====") | |
return df_summarise | |
def get_questions(video_id, df_string, source="gcs"): | |
if source == "gcs": | |
# ๅป gcs ็ขบ่ชๆฏๆๆ video_id_questions.json | |
print("===get_questions on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_questions.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_questions_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_questions_exists: | |
questions = generate_questions(df_string) | |
questions_text = json.dumps(questions, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text) | |
print("questionsๅทฒไธๅณๅฐGCS") | |
else: | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("questionsๅทฒๅญๅจไบGCSไธญ") | |
questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
questions = json.loads(questions_text) | |
elif source == "drive": | |
# ๅป g drive ็ขบ่ชๆฏๆๆ video_id_questions.json | |
print("===get_questions===") | |
service = init_drive_service() | |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' | |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id) | |
file_name = f'{video_id}_questions.json' | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
exists, file_id = check_file_exists(service, folder_id, file_name) | |
if not exists: | |
questions = generate_questions(df_string) | |
questions_text = json.dumps(questions, ensure_ascii=False, indent=2) | |
upload_content_directly(service, file_name, folder_id, questions_text) | |
print("questionsๅทฒไธๅณๅฐGoogle Drive") | |
else: | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("questionsๅทฒๅญๅจไบGoogle Driveไธญ") | |
questions_text = download_file_as_string(service, file_id) | |
questions = json.loads(questions_text) | |
q1 = questions[0] if len(questions) > 0 else "" | |
q2 = questions[1] if len(questions) > 1 else "" | |
q3 = questions[2] if len(questions) > 2 else "" | |
print("=====get_questions=====") | |
print(f"q1: {q1}") | |
print(f"q2: {q2}") | |
print(f"q3: {q3}") | |
print("=====get_questions=====") | |
return q1, q2, q3 | |
def generate_questions(df_string): | |
# ไฝฟ็จ OpenAI ็ๆๅบไบไธไผ ๆฐๆฎ็้ฎ้ข | |
if isinstance(df_string, str): | |
df_string_json = json.loads(df_string) | |
else: | |
df_string_json = df_string | |
content_text = "" | |
for entry in df_string_json: | |
content_text += entry["text"] + "๏ผ" | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไธฆ็จๆขๆ่ณๆ็บๆฌ่ณช็ๆธฌ็จๆถๅฏ่ฝๆๅ็ๅ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f"่ซๆ นๆ {content_text} ็ๆไธๅๅ้ก๏ผไธฆ็จ JSON ๆ ผๅผ่ฟๅ questions:[q1็ๆ่ฟฐtext, q2็ๆ่ฟฐtext, q3็ๆ่ฟฐtext]" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
response_format = { "type": "json_object" } | |
print("=====messages=====") | |
print(messages) | |
print("=====messages=====") | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
"response_format": response_format | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
questions = json.loads(response.choices[0].message.content)["questions"] | |
print("=====json_response=====") | |
print(questions) | |
print("=====json_response=====") | |
return questions | |
def change_questions(password, df_string): | |
verify_password(password) | |
questions = generate_questions(df_string) | |
q1 = questions[0] if len(questions) > 0 else "" | |
q2 = questions[1] if len(questions) > 1 else "" | |
q3 = questions[2] if len(questions) > 2 else "" | |
print("=====get_questions=====") | |
print(f"q1: {q1}") | |
print(f"q2: {q2}") | |
print(f"q3: {q3}") | |
print("=====get_questions=====") | |
return q1, q2, q3 | |
# ใ้้ตๆๅปใๅฆๅค็จ็ซๆไธๅ tab๏ผๆ้ๆณ่จๅๆๅญ็ไธๆน้ไธๅฐๆ็ๆชๅ๏ผ้้ปๆ่ฆ็ใ้้ตๆๅปใๅ ไธๆชๅ่ณ่จ | |
def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source): | |
if source == "gcs": | |
print("===get_key_moments on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_key_moments.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_key_moments_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_key_moments_exists: | |
key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript) | |
key_moments_json = {"key_moments": key_moments} | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text) | |
print("key_momentsๅทฒไธๅณๅฐGCS") | |
else: | |
# key_momentsๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("key_momentsๅทฒๅญๅจไบGCSไธญ") | |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
key_moments_json = json.loads(key_moments_text) | |
# ๆชขๆฅ key_moments ๆฏๅฆๆ keywords | |
print("===ๆชขๆฅ key_moments ๆฏๅฆๆ keywords===") | |
has_keywords_added = False | |
for key_moment in key_moments_json["key_moments"]: | |
if "keywords" not in key_moment: | |
transcript = key_moment["transcript"] | |
key_moment["keywords"] = generate_key_moments_keywords(transcript) | |
print("===keywords===") | |
print(key_moment["keywords"]) | |
print("===keywords===") | |
has_keywords_added = True | |
if has_keywords_added: | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text) | |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
key_moments_json = json.loads(key_moments_text) | |
elif source == "drive": | |
print("===get_key_moments on drive===") | |
service = init_drive_service() | |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' | |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id) | |
file_name = f'{video_id}_key_moments.json' | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
exists, file_id = check_file_exists(service, folder_id, file_name) | |
if not exists: | |
key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript) | |
key_moments_json = {"key_moments": key_moments} | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2) | |
upload_content_directly(service, file_name, folder_id, key_moments_text) | |
print("key_momentsๅทฒไธๅณๅฐGoogle Drive") | |
else: | |
# key_momentsๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("key_momentsๅทฒๅญๅจไบGoogle Driveไธญ") | |
key_moments_text = download_file_as_string(service, file_id) | |
key_moments_json = json.loads(key_moments_text) | |
return key_moments_json | |
def generate_key_moments(formatted_simple_transcript, formatted_transcript): | |
# ไฝฟ็จ OpenAI ็ๆๅบไบไธไผ ๆฐๆฎ็้ฎ้ข | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {formatted_simple_transcript} ๆๆฌ๏ผๆๅๅบ้้ปๆ่ฆ๏ผไธฆ็ตฆๅบๅฐๆ็ๆ้่ปธ | |
1. ๅฐ็ฏๅๅๅบไธๅๆฎต่ฝ็็ธๅฐๆๆ้่ปธ็้้ปๆ่ฆ๏ผ | |
2. ๆฏไธๅฐๆฎตๆๅคไธ่ถ ้ 1/5 ็็ธฝๅ งๅฎน๏ผไนๅฐฑๆฏๅคง็ด 3~5ๆฎต็้้ป๏ผไพๅฆไบ๏ฝๅๅ้็ๅฝฑ็ๅฐฑไธๆฎตๅคง็ด1~2ๅ้๏ผๆๅคไธๅ้๏ผไฝๅฆๆๆฏ่ถ ้ๅๅ้็ๅฝฑ็๏ผ้ฃไธๅฐๆฎตๅคง็ด 2~3ๅ้๏ผไปฅๆญค้กๆจ๏ผ | |
3. ๆณจๆไธ่ฆ้บๆผไปปไฝไธๆฎตๆ้่ปธ็ๅ งๅฎน ๅพ้ถ็ง้ๅง | |
4. ๅฆๆ้ ญๅฐพ็ๆ ็ฏไธๆฏ้้ป๏ผๅฐฑไฝตๅ ฅๅฐ้่ฟ็ๆฎต่ฝ๏ผ็นๅฅๆฏๆๆๅผๆๆฏไป็ดนไบบ็ฉๅฐฑๆฏไธ้่ฆ็ๆ ็ฏ | |
5. transcript ้ๅญ็จฟ็้ๅ๏ผ่ฆๆๅ็็ๆจ้ป็ฌฆ่)๏ผ่ฆๅฎๆด่ทๅไพ็ไธๆจฃ๏ผไธ่ฆ็็ฅ | |
ไปฅ้็จฎๆนๅผๅๆๆดๅๆๆฌ๏ผๅพ้ถ็ง้ๅงๅๆ๏ผ็ดๅฐ็ตๆใ้ๅพ้่ฆ | |
6. ้้ตๅญๅพtranscript extract to keyword๏ผไฟ็ๅฐๅฎถๅๅญใๅฐๆฅญ่ก่ชใๅนดไปฝใๆธๅญใๆๅๅ็จฑใๅฐๅใๆธๅญธๅ ฌๅผ | |
7. text, transcript, keywords please use or transfer zh-TW, it's very important | |
ไธฆ็จ JSON ๆ ผๅผ่ฟๅ key_moments:[{{ | |
"start": "00:00", | |
"end": "01:00", | |
"text": "้ๅญ็จฟ็้้ปๆ่ฆ", | |
"transcript": "้ๅญ็จฟ็้ๅ๏ผ่ฆๆๅ็็ๆจ้ป็ฌฆ่)๏ผ่ฆๅฎๆด่ทๅไพ็ไธๆจฃ๏ผไธ่ฆ็็ฅ", | |
"keywords": ["้้ตๅญ", "้้ตๅญ"] | |
}}] | |
""" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
response_format = { "type": "json_object" } | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4096, | |
"response_format": response_format | |
} | |
try: | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
print("===response===") | |
print(dict(response)) | |
key_moments = json.loads(response.choices[0].message.content)["key_moments"] | |
except Exception as e: | |
error_msg = f" {video_id} ้้ตๆๅป้ฏ่ชค: {str(e)}" | |
print("===generate_key_moments error===") | |
print(error_msg) | |
print("===generate_key_moments error===") | |
raise Exception(error_msg) | |
print("=====key_moments=====") | |
print(key_moments) | |
print("=====key_moments=====") | |
image_links = {entry['start_time']: entry['screenshot_path'] for entry in formatted_transcript} | |
for moment in key_moments: | |
start_time = moment['start'] | |
end_time = moment['end'] | |
moment_images = [image_links[time] for time in image_links if start_time <= time <= end_time] | |
moment['images'] = moment_images | |
return key_moments | |
def generate_key_moments_keywords(transcript): | |
system_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซๆ นๆไปฅไธๆๆฌๆๅ้้ตๅญ" | |
user_content = f"""transcript extract to keyword | |
ไฟ็ๅฐๅฎถๅๅญใๅฐๆฅญ่ก่ชใๅนดไปฝใๆธๅญใๆๅๅ็จฑใๅฐๅใๆธๅญธๅ ฌๅผใๆธๅญธ่กจ็คบๅผใ็ฉ็ๅๅญธ็ฌฆ่๏ผ | |
ไธ็จ็ตฆไธไธๆ๏ผ็ดๆฅ็ตฆๅบ้้ตๅญ๏ผไฝฟ็จ zh-TW๏ผ็จ้่ๅ้๏ผ example: ้้ตๅญ1, ้้ตๅญ2 | |
transcript๏ผ{transcript} | |
""" | |
messages = [ | |
{"role": "system", "content": system_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 100, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
keywords = response.choices[0].message.content.strip().split(", ") | |
return keywords | |
def get_key_moments_html(key_moments): | |
css = """ | |
<style> | |
#gallery-main { | |
display: flex; | |
align-items: center; | |
margin-bottom: 20px; | |
} | |
#gallery { | |
position: relative; | |
width: 50%; | |
flex: 1; | |
} | |
#text-content { | |
flex: 2; | |
margin-left: 20px; | |
} | |
#gallery #gallery-container{ | |
position: relative; | |
width: 100%; | |
height: 0px; | |
padding-bottom: 56.7%; /* 16/9 ratio */ | |
background-color: blue; | |
} | |
#gallery #gallery-container #gallery-content{ | |
position: absolute; | |
top: 0px; | |
right: 0px; | |
bottom: 0px; | |
left: 0px; | |
height: 100%; | |
display: flex; | |
scroll-snap-type: x mandatory; | |
overflow-x: scroll; | |
scroll-behavior: smooth; | |
} | |
#gallery #gallery-container #gallery-content .gallery__item{ | |
width: 100%; | |
height: 100%; | |
flex-shrink: 0; | |
scroll-snap-align: start; | |
scroll-snap-stop: always; | |
position: relative; | |
} | |
#gallery #gallery-container #gallery-content .gallery__item img{ | |
display: block; | |
width: 100%; | |
height: 100%; | |
object-fit: contain; | |
background-color: white; | |
} | |
.click-zone{ | |
position: absolute; | |
width: 20%; | |
height: 100%; | |
z-index: 3; | |
} | |
.click-zone.click-zone-prev{ | |
left: 0px; | |
} | |
.click-zone.click-zone-next{ | |
right: 0px; | |
} | |
#gallery:not(:hover) .arrow{ | |
opacity: 0.8; | |
} | |
.arrow{ | |
text-align: center; | |
z-index: 3; | |
position: absolute; | |
display: block; | |
width: 25px; | |
height: 25px; | |
line-height: 25px; | |
background-color: black; | |
border-radius: 50%; | |
text-decoration: none; | |
color: black; | |
opacity: 0.8; | |
transition: opacity 200ms ease; | |
} | |
.arrow:hover{ | |
opacity: 1; | |
} | |
.arrow span{ | |
position: relative; | |
top: 2px; | |
} | |
.arrow.arrow-prev{ | |
top: 50%; | |
left: 5px; | |
} | |
.arrow.arrow-next{ | |
top: 50%; | |
right: 5px; | |
} | |
.arrow.arrow-disabled{ | |
opacity:0.8; | |
} | |
#text-content { | |
padding: 0px 36px; | |
} | |
#text-content p { | |
margin-top: 10px; | |
} | |
body{ | |
font-family: sans-serif; | |
margin: 0px; | |
padding: 0px; | |
} | |
main{ | |
padding: 0px; | |
margin: 0px; | |
max-width: 900px; | |
margin: auto; | |
} | |
.hidden{ | |
border: 0; | |
clip: rect(0 0 0 0); | |
height: 1px; | |
margin: -1px; | |
overflow: hidden; | |
padding: 0; | |
position: absolute; | |
width: 1px; | |
} | |
@media (max-width: 768px) { | |
#gallery-main { | |
flex-direction: column; /* ๅจๅฐๅฑๅนไธๅ ๅ ๅ ็ด */ | |
} | |
#gallery { | |
width: 100%; /* ่ฎฉ็ปๅปๅ ๆปกๆดไธชๅฎนๅจๅฎฝๅบฆ */ | |
} | |
#text-content { | |
margin-left: 0; /* ็งป้คๅทฆ่พน่ท๏ผ่ฎฉๆๆฌๅ ๅฎนๅ ๆปกๅฎฝๅบฆ */ | |
margin-top: 20px; /* ไธบๆๆฌๅ ๅฎนๆทปๅ ้กถ้จ้ด่ท */ | |
} | |
#gallery #gallery-container { | |
height: 350px; /* ๆ่ ไฝ ๅฏไปฅ่ฎพ็ฝฎไธไธชๅบๅฎ็้ซๅบฆ๏ผ่ไธๆฏ็จ padding-bottom */ | |
padding-bottom: 0; /* ็งป้คๅบ้จๅกซๅ */ | |
} | |
} | |
</style> | |
""" | |
key_moments_html = css | |
for i, moment in enumerate(key_moments): | |
images = moment['images'] | |
image_elements = "" | |
for j, image in enumerate(images): | |
current_id = f"img_{i}_{j}" | |
prev_id = f"img_{i}_{j-1}" if j-1 >= 0 else f"img_{i}_{len(images)-1}" | |
next_id = f"img_{i}_{j+1}" if j+1 < len(images) else f"img_{i}_0" | |
image_elements += f""" | |
<div id="{current_id}" class="gallery__item"> | |
<a href="#{prev_id}" class="click-zone click-zone-prev"> | |
<div class="arrow arrow-disabled arrow-prev"> < </div> | |
</a> | |
<a href="#{next_id}" class="click-zone click-zone-next"> | |
<div class="arrow arrow-next"> > </div> | |
</a> | |
<img src="{image}"> | |
</div> | |
""" | |
gallery_content = f""" | |
<div id="gallery-content"> | |
{image_elements} | |
</div> | |
""" | |
key_moments_html += f""" | |
<div class="gallery-container" id="gallery-main"> | |
<div id="gallery"><!-- gallery start --> | |
<div id="gallery-container"> | |
{gallery_content} | |
</div> | |
</div> | |
<div id="text-content"> | |
<h3>{moment['start']} - {moment['end']}</h3> | |
<p><strong>ๆ่ฆ: {moment['text']} </strong></p> | |
<p>ๅ งๅฎน: {moment['transcript']}</p> | |
</div> | |
</div> | |
""" | |
return key_moments_html | |
# ---- LLM CRUD ---- | |
def get_LLM_content(video_id, kind): | |
print(f"===get_{kind}===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_{kind}.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅ file ๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if is_file_exists: | |
content = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
content_json = json.loads(content) | |
if kind == "reading_passage": | |
content_text = content_json["reading_passage"] | |
elif kind == "summary": | |
content_text = content_json["summary"] | |
else: | |
content_text = json.dumps(content_json, ensure_ascii=False, indent=2) | |
else: | |
content_text = "" | |
return content_text | |
def enable_edit_mode(): | |
return gr.update(interactive=True) | |
def delete_LLM_content(video_id, kind): | |
print(f"===delete_{kind}===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_{kind}.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅ file ๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if is_file_exists: | |
delete_blob(gcs_client, bucket_name, blob_name) | |
print(f"{file_name}ๅทฒไปGCSไธญๅ ้ค") | |
return gr.update(value="", interactive=False) | |
def update_LLM_content(video_id, new_content, kind): | |
print(f"===upfdate kind on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_{kind}.json' | |
blob_name = f"{video_id}/{file_name}" | |
if kind == "reading_passage": | |
print("=========reading_passage=======") | |
print(new_content) | |
reading_passage_json = {"reading_passage": str(new_content)} | |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text) | |
updated_content = new_content | |
elif kind == "summary": | |
summary_json = {"summary": str(new_content)} | |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text) | |
updated_content = new_content | |
elif kind == "mind_map": | |
mind_map_json = {"mind_map": str(new_content)} | |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text) | |
updated_content = mind_map_text | |
elif kind == "key_moments": | |
# from update_LLM_btn -> new_content is a string | |
# create_LLM_content -> new_content is a list | |
if isinstance(new_content, str): | |
key_moments_list = json.loads(new_content) | |
else: | |
key_moments_list = new_content | |
key_moments_json = {"key_moments": key_moments_list} | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text) | |
updated_content = key_moments_text | |
elif kind == "transcript": | |
if isinstance(new_content, str): | |
transcript_json = json.loads(new_content) | |
else: | |
transcript_json = new_content | |
transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text) | |
updated_content = transcript_text | |
elif kind == "questions": | |
# from update_LLM_btn -> new_content is a string | |
# create_LLM_content -> new_content is a list | |
if isinstance(new_content, str): | |
questions_json = json.loads(new_content) | |
else: | |
questions_json = new_content | |
questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text) | |
updated_content = questions_text | |
print(f"{kind} ๅทฒๆดๆฐๅฐGCS") | |
return gr.update(value=updated_content, interactive=False) | |
def create_LLM_content(video_id, df_string, kind): | |
print(f"===create_{kind}===") | |
print(f"video_id: {video_id}") | |
if kind == "reading_passage": | |
content = generate_reading_passage(df_string) | |
update_LLM_content(video_id, content, kind) | |
elif kind == "summary": | |
content = generate_summarise(df_string) | |
update_LLM_content(video_id, content, kind) | |
elif kind == "mind_map": | |
content = generate_mind_map(df_string) | |
update_LLM_content(video_id, content, kind) | |
elif kind == "key_moments": | |
if isinstance(df_string, str): | |
transcript = json.loads(df_string) | |
else: | |
transcript = df_string | |
formatted_simple_transcript = create_formatted_simple_transcript(transcript) | |
formatted_transcript = create_formatted_transcript(video_id, transcript) | |
content = generate_key_moments(formatted_simple_transcript, formatted_transcript) | |
update_LLM_content(video_id, content, kind) | |
content = json.dumps(content, ensure_ascii=False, indent=2) | |
elif kind == "transcript": | |
content = process_transcript_and_screenshots_on_gcs(video_id) | |
update_LLM_content(video_id, content, kind) | |
content = json.dumps(content, ensure_ascii=False, indent=2) | |
elif kind == "questions": | |
content = generate_questions(df_string) | |
update_LLM_content(video_id, content, kind) | |
content = json.dumps(content, ensure_ascii=False, indent=2) | |
return gr.update(value=content, interactive=False) | |
# ---- LLM refresh CRUD ---- | |
def reading_passage_add_latex_version(video_id): | |
# ็ขบ่ช GCS ๆฏๅฆๆ reading_passage.json | |
print("===reading_passage_convert_to_latex===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_reading_passage.json' | |
blob_name = f"{video_id}/{file_name}" | |
print(f"blob_name: {blob_name}") | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
raise gr.Error("reading_passage ไธๅญๅจ!") | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("reading_passage ๅทฒๅญๅจไบGCSไธญ๏ผ่ฝๆ Latex ๆจกๅผ") | |
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
reading_passage_json = json.loads(reading_passage_text) | |
original_reading_passage = reading_passage_json["reading_passage"] | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {original_reading_passage} | |
ๆ่ฟฐไธญ๏ผ่ซๆๆธๅญธๆๆฏๅฐๆฅญ่ก่ช๏ผ็จ Latex ๅ ่ฆ๏ผ$...$๏ผ๏ผ็ก้ไธ่ฆๅปๆนๅๆฌ็ๆ็ซ | |
ๅ ๆธไน้คใๆ น่ใๆฌกๆนใๅๅญธ็ฌฆ่ใ็ฉ็็ฌฆ่็ญ็ญ็้็ฎๅผๅฃ่ชไนๆๆ LATEX ็ฌฆ่ | |
่ซไธๅฎ่ฆไฝฟ็จ็น้ซไธญๆ zh-TW๏ผไธฆ็จๅฐ็ฃไบบ็ๅฃ่ช | |
็ข็็็ตๆไธ่ฆๅๅพๆ่งฃ้๏ผไนไธ่ฆๆ่ฟฐ้็ฏๆ็ซ ๆ้บผ็ข็็ | |
ๅช้่ฆๅฐๆณจๆไพ Reading Passage๏ผๅญๆธๅจ 200~500 ๅญไปฅๅ ง | |
""" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
new_reading_passage = response.choices[0].message.content.strip() | |
print("=====new_reading_passage=====") | |
print(new_reading_passage) | |
print("=====new_reading_passage=====") | |
reading_passage_json["reading_passage"] = new_reading_passage | |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2) | |
# ๅฆๅญ็บ reading_passage_latex.json | |
new_file_name = f'{video_id}_reading_passage_latex.json' | |
new_blob_name = f"{video_id}/{new_file_name}" | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text) | |
return new_reading_passage | |
def summary_add_markdown_version(video_id): | |
# ็ขบ่ช GCS ๆฏๅฆๆ summary.json | |
print("===summary_convert_to_markdown===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_summary.json' | |
blob_name = f"{video_id}/{file_name}" | |
print(f"blob_name: {blob_name}") | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
raise gr.Error("summary ไธๅญๅจ!") | |
# ้ๅญ็จฟๅทฒๅญๅจ๏ผไธ่ฝฝ้ๅญ็จฟๅ ๅฎน | |
print("summary ๅทฒๅญๅจไบGCSไธญ๏ผ่ฝๆ Markdown ๆจกๅผ") | |
summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
summary_json = json.loads(summary_text) | |
original_summary = summary_json["summary"] | |
sys_content = "ไฝ ๆฏไธๅๆ ้ท่ณๆๅๆ่ทๅฝฑ็ๆๅญธ็่ๅธซ๏ผuser ็บๅญธ็๏ผ่ซ็ฒพ่ฎ่ณๆๆๆฌ๏ผ่ช่กๅคๆท่ณๆ็็จฎ้ก๏ผไฝฟ็จ zh-TW" | |
user_content = f""" | |
่ซๆ นๆ {original_summary} | |
่ฝๆๆ ผๅผ็บ Markdown | |
ๅชไฟ็๏ผ๐ ๆด้ซๆ่ฆใ๐ ้้ปๆฆๅฟตใ๐ก ็บไป้บผๆๅ่ฆๅญธ้ๅใโ ๅปถไผธๅฐๅ้ก | |
ๅ ถไป็ไธ่ฆไฟ็ | |
ๆด้ซๆ่ฆๅจไธ็พๅญไปฅๅ ง | |
้้ปๆฆๅฟต่ฝๆ bullet points | |
ไปฅๅๅฏ่ฝ็็ต่ซ่็ตๅฐพๅปถไผธๅฐๅ้กๆไพๅญธ็ไฝๅๆ | |
ๆ่ฟฐไธญ๏ผ่ซๆๆธๅญธๆๆฏๅฐๆฅญ่ก่ช๏ผ็จ Latex ๅ ่ฆ๏ผ$...$๏ผ | |
ๅ ๆธไน้คใๆ น่ใๆฌกๆน็ญ็ญ็้็ฎๅผๅฃ่ชไนๆๆ LATEX ๆธๅญธ็ฌฆ่ | |
ๆด้ซๆ ผๅผ็บ๏ผ | |
## ๐ ๆด้ซๆ่ฆ | |
- (ไธๅ bullet point....) | |
## ๐ ้้ปๆฆๅฟต | |
- xxx | |
- xxx | |
- xxx | |
## ๐ก ็บไป้บผๆๅ่ฆๅญธ้ๅ๏ผ | |
- (ไธๅ bullet point....) | |
## โ ๅปถไผธๅฐๅ้ก | |
- (ไธๅ bullet point....) | |
""" | |
messages = [ | |
{"role": "system", "content": sys_content}, | |
{"role": "user", "content": user_content} | |
] | |
request_payload = { | |
"model": "gpt-4-turbo", | |
"messages": messages, | |
"max_tokens": 4000, | |
} | |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload) | |
new_summary = response.choices[0].message.content.strip() | |
print("=====new_summary=====") | |
print(new_summary) | |
print("=====new_summary=====") | |
summary_json["summary"] = new_summary | |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2) | |
# ๅฆๅญ็บ summary_markdown.json | |
new_file_name = f'{video_id}_summary_markdown.json' | |
new_blob_name = f"{video_id}/{new_file_name}" | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text) | |
return new_summary | |
# AI ็ๆๆๅญธ็ด ๆ | |
def get_meta_data(video_id, source="gcs"): | |
if source == "gcs": | |
print("===get_meta_data on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_meta_data.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
meta_data_json = { | |
"subject": "", | |
"grade": "", | |
} | |
print("meta_data empty return") | |
else: | |
# meta_dataๅทฒๅญๅจ๏ผไธ่ฝฝๅ ๅฎน | |
print("meta_dataๅทฒๅญๅจไบGCSไธญ") | |
meta_data_text = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
meta_data_json = json.loads(meta_data_text) | |
# meta_data_json grade ๆธๅญ่ฝๆๆๆๅญ | |
grade = meta_data_json["grade"] | |
case = { | |
1: "ไธๅนด็ด", | |
2: "ไบๅนด็ด", | |
3: "ไธๅนด็ด", | |
4: "ๅๅนด็ด", | |
5: "ไบๅนด็ด", | |
6: "ๅ ญๅนด็ด", | |
7: "ไธๅนด็ด", | |
8: "ๅ ซๅนด็ด", | |
9: "ไนๅนด็ด", | |
10: "ๅๅนด็ด", | |
11: "ๅไธๅนด็ด", | |
12: "ๅไบๅนด็ด", | |
} | |
grade_text = case.get(grade, "") | |
meta_data_json["grade"] = grade_text | |
return meta_data_json | |
def get_ai_content(password, video_id, df_string, topic, grade, level, specific_feature, content_type, source="gcs"): | |
verify_password(password) | |
if source == "gcs": | |
print("===get_ai_content on gcs===") | |
gcs_client = GCS_CLIENT | |
bucket_name = 'video_ai_assistant' | |
file_name = f'{video_id}_ai_content_list.json' | |
blob_name = f"{video_id}/{file_name}" | |
# ๆฃๆฅๆชๆกๆฏๅฆๅญๅจ | |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name) | |
if not is_file_exists: | |
# ๅ ๅปบ็ซไธๅ ai_content_list.json | |
ai_content_list = [] | |
ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text) | |
print("ai_content_list [] ๅทฒไธๅณๅฐGCS") | |
# ๆญคๆ ai_content_list ๅทฒๅญๅจ | |
ai_content_list_string = download_blob_to_string(gcs_client, bucket_name, blob_name) | |
ai_content_list = json.loads(ai_content_list_string) | |
# by key ๆพๅฐ ai_content ๏ผtopic, grade, level, specific_feature, content_type๏ผ | |
target_kvs = { | |
"video_id": video_id, | |
"level": level, | |
"specific_feature": specific_feature, | |
"content_type": content_type | |
} | |
ai_content_json = [ | |
item for item in ai_content_list | |
if all(item[k] == v for k, v in target_kvs.items()) | |
] | |
if len(ai_content_json) == 0: | |
ai_content, prompt = generate_ai_content(password, df_string, topic, grade, level, specific_feature, content_type) | |
ai_content_json = { | |
"video_id": video_id, | |
"content": str(ai_content), | |
"prompt": prompt, | |
"level": level, | |
"specific_feature": specific_feature, | |
"content_type": content_type | |
} | |
ai_content_list.append(ai_content_json) | |
ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2) | |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text) | |
print("ai_contentๅทฒไธๅณๅฐGCS") | |
else: | |
ai_content_json = ai_content_json[-1] | |
ai_content = ai_content_json["content"] | |
prompt = ai_content_json["prompt"] | |
return ai_content, ai_content, prompt, prompt | |
def generate_ai_content(password, df_string, topic, grade, level, specific_feature, content_type): | |
verify_password(password) | |
material = EducationalMaterial(df_string, topic, grade, level, specific_feature, content_type) | |
prompt = material.generate_content_prompt() | |
user_content = material.build_user_content() | |
messages = material.build_messages(user_content) | |
ai_model_name = "gpt-4-turbo" | |
request_payload = { | |
"model": ai_model_name, | |
"messages": messages, | |
"max_tokens": 4000 # ไธพไพ๏ผๅฎ้ ไธๆจๅฏ่ฝ้่ฆๆด่ฏฆ็ป็้ ็ฝฎ | |
} | |
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload) | |
return ai_content, prompt | |
def generate_exam_fine_tune_result(password, exam_result_prompt , df_string_output, exam_result, exam_result_fine_tune_prompt): | |
verify_password(password) | |
material = EducationalMaterial(df_string_output, "", "", "", "", "") | |
user_content = material.build_fine_tune_user_content(exam_result_prompt, exam_result, exam_result_fine_tune_prompt) | |
messages = material.build_messages(user_content) | |
ai_model_name = "gpt-4-turbo" | |
request_payload = { | |
"model": ai_model_name, | |
"messages": messages, | |
"max_tokens": 4000 # ไธพไพ๏ผๅฎ้ ไธๆจๅฏ่ฝ้่ฆๆด่ฏฆ็ป็้ ็ฝฎ | |
} | |
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload) | |
return ai_content | |
def return_original_exam_result(exam_result_original): | |
return exam_result_original | |
def create_word(content): | |
unique_filename = str(uuid.uuid4()) | |
word_file_path = f"/tmp/{unique_filename}.docx" | |
doc = Document() | |
doc.add_paragraph(content) | |
doc.save(word_file_path) | |
return word_file_path | |
def download_exam_result(content): | |
word_path = create_word(content) | |
return word_path | |
# ---- Chatbot ---- | |
def get_instructions(content_subject, content_grade, key_moments): | |
instructions = f""" | |
subject: {content_subject} | |
grade: {content_grade} | |
context: {key_moments} | |
Assistant Role: you are a {content_subject} teacher | |
User Role: {content_grade} th-grade student. | |
Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following. | |
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level. | |
Response: | |
- Single question, under 100 characters | |
- include math symbols (use LaTeX $ to cover before and after, ex: $x^2$) | |
- hint with video timestamp which format ใๅ่๏ผ00:00:00ใ. | |
- Sometimes encourage user by Taiwanese style with relaxing atmosphere. | |
- if user ask questions not include in context, | |
- just tell them to ask the question in context and give them example question. | |
Restrictions: Answer within video content, no external references | |
""" | |
return instructions | |
def chat_with_ai(ai_name, password, video_id, user_data, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False): | |
verify_password(password) | |
print("=====user_data=====") | |
print(f"user_data: {user_data}") | |
if chat_history is not None and len(chat_history) > 11: | |
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ ้ไธ้๏ผๅฐ่ฉฑไธ่ผช10ๆฌก๏ผ" | |
raise gr.Error(error_msg) | |
if not ai_name in ["jutor", "claude3", "groq"]: | |
ai_name = "jutor" | |
if ai_name == "jutor": | |
ai_client = "" | |
elif ai_name == "claude3": | |
ai_client = BEDROCK_CLIENT | |
elif ai_name == "groq": | |
ai_client = GROQ_CLIENT | |
else: | |
ai_client = "" | |
if isinstance(trascript_state, str): | |
simple_transcript = json.loads(trascript_state) | |
else: | |
simple_transcript = trascript_state | |
if isinstance(key_moments, str): | |
key_moments_json = json.loads(key_moments) | |
else: | |
key_moments_json = key_moments | |
# key_moments_json remove images | |
for moment in key_moments_json: | |
moment.pop('images', None) | |
moment.pop('end', None) | |
moment.pop('transcript', None) | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False) | |
instructions = get_instructions(content_subject, content_grade, key_moments_text) | |
chatbot_config = { | |
"video_id": video_id, | |
"transcript": simple_transcript, | |
"key_moments": key_moments, | |
"content_subject": content_subject, | |
"content_grade": content_grade, | |
"jutor_chat_key": JUTOR_CHAT_KEY, | |
"ai_name": ai_name, | |
"ai_client": ai_client, | |
"instructions": instructions | |
} | |
try: | |
chatbot = Chatbot(chatbot_config) | |
response_completion = chatbot.chat(user_message, chat_history, socratic_mode, ai_name) | |
except Exception as e: | |
print(f"Error: {e}") | |
response_completion = "ๅญธ็ฟ็ฒพ้ๆ้ป็ดฏ๏ผ่ซ็จๅพๅ่ฉฆ๏ผ" | |
try: | |
# ๆดๆฐ่ๅคฉๅๅฒ | |
new_chat_history = (user_message, response_completion) | |
if chat_history is None: | |
chat_history = [new_chat_history] | |
else: | |
chat_history.append(new_chat_history) | |
# ่ฟๅ่ๅคฉๅๅฒๅ็ฉบๅญ็ฌฆไธฒๆธ ็ฉบ่พๅ ฅๆก | |
return "", chat_history | |
except Exception as e: | |
# ๅค็้่ฏฏๆ ๅต | |
print(f"Error: {e}") | |
return "่ฏทๆฑๅคฑ่ดฅ๏ผ่ฏท็จๅๅ่ฏ๏ผ", chat_history | |
def chat_with_opan_ai_assistant(password, youtube_id, user_data, thread_id, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False): | |
verify_password(password) | |
print("=====user_data=====") | |
print(f"user_data: {user_data}") | |
# ๅ ่จ็ฎ user_message ๆฏๅฆ่ถ ้ 500 ๅๅญ | |
if len(user_message) > 1500: | |
error_msg = "ไฝ ็่จๆฏๅคช้ทไบ๏ผ่ซ็ธฎ็ญ่จๆฏ้ทๅบฆ่ณไบ็พๅญไปฅๅ ง" | |
raise gr.Error(error_msg) | |
# ๅฆๆ chat_history ่ถ ้ 10 ๅ่จๆฏ๏ผ็ดๆฅ return "ๅฐ่ฉฑ่ถ ้ไธ้" | |
if chat_history is not None and len(chat_history) > 10: | |
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ ้ไธ้๏ผๅฐ่ฉฑไธ่ผช10ๆฌก๏ผ" | |
raise gr.Error(error_msg) | |
try: | |
assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo | |
# assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo | |
client = OPEN_AI_CLIENT | |
# ็ดๆฅๅฎๆ้ๅญ็จฟ่ณๆ in instructions | |
# if isinstance(trascript_state, str): | |
# trascript_json = json.loads(trascript_state) | |
# else: | |
# trascript_json = trascript_state | |
# # ็งป้ค embed_url, screenshot_path | |
# for entry in trascript_json: | |
# entry.pop('end_time', None) | |
# trascript_text = json.dumps(trascript_json, ensure_ascii=False) | |
if isinstance(key_moments, str): | |
key_moments_json = json.loads(key_moments) | |
else: | |
key_moments_json = key_moments | |
# key_moments_json remove images | |
for moment in key_moments_json: | |
moment.pop('images', None) | |
moment.pop('end', None) | |
moment.pop('transcript', None) | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False) | |
instructions = get_instructions(content_subject, content_grade, key_moments_text) | |
print("=== instructions ===") | |
print(instructions) | |
# ๅๅปบ็บฟ็จ | |
if not thread_id: | |
thread = client.beta.threads.create() | |
thread_id = thread.id | |
else: | |
thread = client.beta.threads.retrieve(thread_id) | |
# ๅ็บฟ็จๆทปๅ ็จๆท็ๆถๆฏ | |
client.beta.threads.messages.create( | |
thread_id=thread.id, | |
role="user", | |
content=user_message + "/n ่ซๅดๆ ผ้ตๅพชinstructions๏ผๆไปปไธไฝ่ๆ ผๆๅบๅฎถๆ๏ผ็ตๅฐไธ่ฆ้่ค user ็ๅๅฅ๏ผ่ซ็จๅผๅฐ็ๆนๅผๆๅผๆนๅ๏ผ่ซไธๅฎ่ฆ็จ็น้ซไธญๆๅ็ญ zh-TW๏ผไธฆ็จๅฐ็ฃไบบ็็ฆฎ่ฒๅฃ่ช่กจ้๏ผๅ็ญๆไธ่ฆ็นๅฅ่ชชๆ้ๆฏๅฐ็ฃไบบ็่ชๆฐฃ๏ผ่ซๅจๅ็ญ็ๆๅพๆจ่จปใๅ่๏ผ๏ผๆ๏ผ:๏ผๅ๏ผ:๏ผ็ง๏ผใ๏ผ๏ผๅฆๆๆฏๅๅๅญธ็๏ผๅฐฑๅชๅไธๅๅ้ก๏ผ่ซๅนซๅฉๅญธ็ๆดๅฅฝ็็่งฃ่ณๆ๏ผๅญๆธๅจ100ๅญไปฅๅ ง๏ผๅ็ญๆ่ซ็จๆธๅญธ็ฌฆ่ไปฃๆฟๆๅญ๏ผLatex ็จ $ ๅญ่ render, ex: $x^2$)" | |
) | |
# ่ฟ่กๅฉๆ๏ผ็ๆๅๅบ | |
run = client.beta.threads.runs.create( | |
thread_id=thread.id, | |
assistant_id=assistant_id, | |
instructions=instructions, | |
) | |
# ็ญๅพ ๅฉๆๅๅบ๏ผ่ฎพๅฎๆๅคง็ญๅพ ๆถ้ดไธบ 30 ็ง | |
run_status = poll_run_status(run.id, thread.id, timeout=30) | |
# ่ทๅๅฉๆ็ๅๅบๆถๆฏ | |
if run_status == "completed": | |
messages = client.beta.threads.messages.list(thread_id=thread.id) | |
# [MessageContentText(text=Text(annotations=[], value='ๆจๅฅฝ๏ผๆไป้บผๆๅฏไปฅๅนซๅฉๆจ็ๅ๏ผๅฆๆๆไปปไฝๅ้กๆ้่ฆๆๅฐ๏ผ่ซ้จๆๅ่จดๆ๏ผ'), type='text')] | |
response_text = messages.data[0].content[0].text.value | |
else: | |
response_text = "ๅญธ็ฟ็ฒพ้ๆ้ป็ดฏ๏ผ่ซ็จๅพๅ่ฉฆ๏ผ" | |
# ๆดๆฐ่ๅคฉๅๅฒ | |
new_chat_history = (user_message, response_text) | |
if chat_history is None: | |
chat_history = [new_chat_history] | |
else: | |
chat_history.append(new_chat_history) | |
except Exception as e: | |
print(f"Error: {e}") | |
raise gr.Error(f"Error: {e}") | |
# ่ฟๅ่ๅคฉๅๅฒๅ็ฉบๅญ็ฌฆไธฒๆธ ็ฉบ่พๅ ฅๆก | |
return "", chat_history, thread.id | |
def process_open_ai_audio_to_chatbot(password, audio_url): | |
verify_password(password) | |
if audio_url: | |
with open(audio_url, "rb") as audio_file: | |
file_size = os.path.getsize(audio_url) | |
if file_size > 2000000: | |
raise gr.Error("ๆชๆกๅคงๅฐ่ถ ้๏ผ่ซไธ่ฆ่ถ ้ 60็ง") | |
else: | |
response = OPEN_AI_CLIENT.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
# response ๆ่งฃ dict | |
print("=== response ===") | |
print(response) | |
print("=== response ===") | |
else: | |
response = "" | |
return response | |
def poll_run_status(run_id, thread_id, timeout=600, poll_interval=5): | |
""" | |
Polls the status of a Run and handles different statuses appropriately. | |
:param run_id: The ID of the Run to poll. | |
:param thread_id: The ID of the Thread associated with the Run. | |
:param timeout: Maximum time to wait for the Run to complete, in seconds. | |
:param poll_interval: Time to wait between each poll, in seconds. | |
""" | |
client = OPEN_AI_CLIENT | |
start_time = time.time() | |
while time.time() - start_time < timeout: | |
run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id) | |
if run.status in ["completed", "cancelled", "failed"]: | |
print(f"Run completed with status: {run.status}") | |
break | |
elif run.status == "requires_action": | |
print("Run requires action. Performing required action...") | |
# Here, you would perform the required action, e.g., running functions | |
# and then submitting the outputs. This is simplified for this example. | |
# After performing the required action, you'd complete the action: | |
# OPEN_AI_CLIENT.beta.threads.runs.complete_required_action(...) | |
elif run.status == "expired": | |
print("Run expired. Exiting...") | |
break | |
else: | |
print(f"Run status is {run.status}. Waiting for updates...") | |
time.sleep(poll_interval) | |
else: | |
print("Timeout reached. Run did not complete in the expected time.") | |
# Once the Run is completed, handle the result accordingly | |
if run.status == "completed": | |
# Retrieve and handle messages or run steps as needed | |
messages = client.beta.threads.messages.list(thread_id=thread_id) | |
for message in messages.data: | |
if message.role == "assistant": | |
print(f"Assistant response: {message.content}") | |
elif run.status in ["cancelled", "failed"]: | |
# Handle cancellation or failure | |
print(f"Run ended with status: {run.status}") | |
elif run.status == "expired": | |
# Handle expired run | |
print("Run expired without completion.") | |
return run.status | |
def streaming_chat_with_open_ai(user_message, chat_history, password, user_data, thread_id, trascript, key_moments, content_subject, content_grade): | |
verify_password(password) | |
print("=====user_data=====") | |
print(f"user_data: {user_data}") | |
print("===streaming_chat_with_open_ai===") | |
print(thread_id) | |
# ๅ ่จ็ฎ user_message ๆฏๅฆ่ถ ้ 500 ๅๅญ | |
if len(user_message) > 1500: | |
error_msg = "ไฝ ็่จๆฏๅคช้ทไบ๏ผ่ซ็ธฎ็ญ่จๆฏ้ทๅบฆ่ณไบ็พๅญไปฅๅ ง" | |
raise gr.Error(error_msg) | |
# ๅฆๆ chat_history ่ถ ้ 10 ๅ่จๆฏ๏ผ็ดๆฅ return "ๅฐ่ฉฑ่ถ ้ไธ้" | |
if chat_history is not None and len(chat_history) > 11: | |
error_msg = "ๆญคๆฌกๅฐ่ฉฑ่ถ ้ไธ้๏ผๅฐ่ฉฑไธ่ผช10ๆฌก๏ผ" | |
raise gr.Error(error_msg) | |
try: | |
assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo | |
# assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo | |
client = OPEN_AI_CLIENT | |
# ็ดๆฅๅฎๆ้ๅญ็จฟ่ณๆ in instructions | |
# if isinstance(trascript, str): | |
# trascript_json = json.loads(trascript) | |
# else: | |
# trascript_json = trascript | |
# trascript_text = json.dumps(trascript_json, ensure_ascii=False) | |
# # trascript_text ็งป้ค \n, ็ฉบ็ฝ | |
# trascript_text = trascript_text.replace("\n", "").replace(" ", "") | |
if isinstance(key_moments, str): | |
key_moments_json = json.loads(key_moments) | |
else: | |
key_moments_json = key_moments | |
# key_moments_json remove images | |
for moment in key_moments_json: | |
moment.pop('images', None) | |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False) | |
instructions = get_instructions(content_subject, content_grade, key_moments_text) | |
# ๅๅปบ็บฟ็จ | |
if not thread_id: | |
thread = client.beta.threads.create() | |
thread_id = thread.id | |
print(f"new thread_id: {thread_id}") | |
else: | |
thread = client.beta.threads.retrieve(thread_id) | |
print(f"old thread_id: {thread_id}") | |
# ๅ็บฟ็จๆทปๅ ็จๆท็ๆถๆฏ | |
client.beta.threads.messages.create( | |
thread_id=thread.id, | |
role="user", | |
content=user_message + "/n ่ซๅดๆ ผ้ตๅพชinstructions๏ผๆไปปไธไฝ่ๆ ผๆๅบๅฎถๆ๏ผ่ซไธๅฎ่ฆ็จ็น้ซไธญๆๅ็ญ zh-TW๏ผไธฆ็จๅฐ็ฃไบบ็็ฆฎ่ฒๅฃ่ช่กจ้๏ผๅ็ญๆไธ่ฆ็นๅฅ่ชชๆ้ๆฏๅฐ็ฃไบบ็่ชๆฐฃ๏ผไธ็จๆๅฐใ้ๅญ็จฟใ้ๅ่ฉ๏ผ็จใๅ งๅฎนใไปฃๆฟ))๏ผ่ซๅจๅ็ญ็ๆๅพๆจ่จปใๅ่่ณๆ๏ผ๏ผๆ๏ผ:๏ผๅ๏ผ:๏ผ็ง๏ผใ๏ผ๏ผๅฆๆๆฏๅๅๅญธ็๏ผๅฐฑๅชๅไธๅๅ้ก๏ผ่ซๅนซๅฉๅญธ็ๆดๅฅฝ็็่งฃ่ณๆ๏ผๅญๆธๅจ100ๅญไปฅๅ ง๏ผ" | |
) | |
with client.beta.threads.runs.stream( | |
thread_id=thread.id, | |
assistant_id=assistant_id, | |
instructions=instructions, | |
) as stream: | |
partial_messages = "" | |
for event in stream: | |
if event.data and event.data.object == "thread.message.delta": | |
message = event.data.delta.content[0].text.value | |
partial_messages += message | |
yield partial_messages | |
except Exception as e: | |
print(f"Error: {e}") | |
raise gr.Error(f"Error: {e}") | |
def create_thread_id(): | |
thread = OPEN_AI_CLIENT.beta.threads.create() | |
thread_id = thread.id | |
print(f"create new thread_id: {thread_id}") | |
return thread_id | |
def chatbot_select(chatbot_name): | |
chatbot_select_accordion_visible = gr.update(open=False) | |
chatbot_open_ai_visible = gr.update(visible=False) | |
chatbot_open_ai_streaming_visible = gr.update(visible=False) | |
chatbot_jutor_visible = gr.update(visible=False) | |
if chatbot_name == "chatbot_open_ai": | |
chatbot_open_ai_visible = gr.update(visible=True) | |
elif chatbot_name == "chatbot_open_ai_streaming": | |
chatbot_open_ai_streaming_visible = gr.update(visible=True) | |
elif chatbot_name == "chatbot_jutor": | |
chatbot_jutor_visible = gr.update(visible=True) | |
return chatbot_select_accordion_visible, chatbot_open_ai_visible, chatbot_open_ai_streaming_visible, chatbot_jutor_visible | |
# --- Slide mode --- | |
def update_slide(direction): | |
global TRANSCRIPTS | |
global CURRENT_INDEX | |
print("=== ๆดๆฐๆๅฝฑ็ ===") | |
print(f"CURRENT_INDEX: {CURRENT_INDEX}") | |
# print(f"TRANSCRIPTS: {TRANSCRIPTS}") | |
CURRENT_INDEX += direction | |
if CURRENT_INDEX < 0: | |
CURRENT_INDEX = 0 # ้ฒๆญข็ดขๅผๅฐไบ0 | |
elif CURRENT_INDEX >= len(TRANSCRIPTS): | |
CURRENT_INDEX = len(TRANSCRIPTS) - 1 # ้ฒๆญข็ดขๅผ่ถ ๅบ่ๅด | |
# ่ทๅๅฝๅๆก็ฎ็ๆๆฌๅๆชๅพ URL | |
current_transcript = TRANSCRIPTS[CURRENT_INDEX] | |
slide_image = current_transcript["screenshot_path"] | |
slide_text = current_transcript["text"] | |
return slide_image, slide_text | |
def prev_slide(): | |
return update_slide(-1) | |
def next_slide(): | |
return update_slide(1) | |
# --- Init params --- | |
def init_params(text, request: gr.Request): | |
if request: | |
print("Request headers dictionary:", request.headers) | |
print("IP address:", request.client.host) | |
print("Query parameters:", dict(request.query_params)) | |
# url = request.url | |
print("Request URL:", request.url) | |
youtube_link = "" | |
password_text = "" | |
admin = gr.update(visible=True) | |
reading_passage_admin = gr.update(visible=True) | |
summary_admin = gr.update(visible=True) | |
see_detail = gr.update(visible=True) | |
worksheet_accordion = gr.update(visible=True) | |
lesson_plan_accordion = gr.update(visible=True) | |
exit_ticket_accordion = gr.update(visible=True) | |
chatbot_open_ai = gr.update(visible=False) | |
chatbot_open_ai_streaming = gr.update(visible=False) | |
chatbot_jutor = gr.update(visible=False) | |
# if youtube_link in query_params | |
if "youtube_id" in request.query_params: | |
youtube_id = request.query_params["youtube_id"] | |
youtube_link = f"https://www.youtube.com/watch?v={youtube_id}" | |
print(f"youtube_link: {youtube_link}") | |
# check if origin is from junyiacademy | |
origin = request.headers.get("origin", "") | |
if "junyiacademy" in origin: | |
password_text = "6161" | |
admin = gr.update(visible=False) | |
reading_passage_admin = gr.update(visible=False) | |
summary_admin = gr.update(visible=False) | |
see_detail = gr.update(visible=False) | |
worksheet_accordion = gr.update(visible=False) | |
lesson_plan_accordion = gr.update(visible=False) | |
exit_ticket_accordion = gr.update(visible=False) | |
return admin, reading_passage_admin, summary_admin, see_detail, \ | |
worksheet_accordion, lesson_plan_accordion, exit_ticket_accordion, \ | |
password_text, youtube_link, \ | |
chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor | |
def update_state(content_subject, content_grade, trascript, key_moments, question_1, question_2, question_3): | |
# inputs=[content_subject, content_grade, df_string_output], | |
# outputs=[content_subject_state, content_grade_state, trascript_state] | |
content_subject_state = content_subject | |
content_grade_state = content_grade | |
trascript_json = json.loads(trascript) | |
formatted_simple_transcript = create_formatted_simple_transcript(trascript_json) | |
trascript_state = formatted_simple_transcript | |
key_moments_state = key_moments | |
streaming_chat_thread_id_state = create_thread_id() | |
ai_chatbot_question_1 = question_1 | |
ai_chatbot_question_2 = question_2 | |
ai_chatbot_question_3 = question_3 | |
return content_subject_state, content_grade_state, trascript_state, key_moments_state, \ | |
streaming_chat_thread_id_state, \ | |
ai_chatbot_question_1, ai_chatbot_question_2, ai_chatbot_question_3 | |
HEAD = """ | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<style> | |
svg.markmap {{ | |
width: 100%; | |
height: 100vh; | |
}} | |
</style> | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script> | |
<script> | |
const mind_map_tab_button = document.querySelector("#mind_map_tab-button"); | |
if (mind_map_tab_button) { | |
mind_map_tab_button.addEventListener('click', function() { | |
const mind_map_markdown = document.querySelector("#mind_map_markdown > label > textarea"); | |
if (mind_map_markdown) { | |
// ๅฝๆ้ฎ่ขซ็นๅปๆถ๏ผๆๅฐๅฝๅ็textarea็ๅผ | |
console.log('Value changed to: ' + mind_map_markdown.value); | |
markmap.autoLoader.renderAll(); | |
} | |
}); | |
} | |
</script> | |
<script> | |
function changeImage(direction, count, galleryIndex) { | |
// Find the current visible image by iterating over possible indices | |
var currentImage = null; | |
var currentIndex = -1; | |
for (var i = 0; i < count; i++) { | |
var img = document.querySelector('.slide-image-' + galleryIndex + '-' + i); | |
if (img && img.style.display !== 'none') { | |
currentImage = img; | |
currentIndex = i; | |
break; | |
} | |
} | |
// If no current image is visible, show the first one and return | |
if (currentImage === null) { | |
document.querySelector('.slide-image-' + galleryIndex + '-0').style.display = 'block'; | |
console.error('No current image found for galleryIndex ' + galleryIndex + ', defaulting to first image.'); | |
return; | |
} | |
// Hide the current image | |
currentImage.style.display = 'none'; | |
// Calculate the index of the next image to show | |
var newIndex = (currentIndex + direction + count) % count; | |
// Select the next image and show it | |
var nextImage = document.querySelector('.slide-image-' + galleryIndex + '-' + newIndex); | |
if (nextImage) { | |
nextImage.style.display = 'block'; | |
} else { | |
console.error('No image found for galleryIndex ' + galleryIndex + ' and newIndex ' + newIndex); | |
} | |
} | |
</script> | |
""" | |
with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.amber, text_size = gr.themes.sizes.text_lg), head=HEAD) as demo: | |
with gr.Row() as admin: | |
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True) | |
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True) | |
video_id = gr.Textbox(label="video_id", visible=True) | |
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False) | |
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False) | |
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True) | |
youtube_link_btn = gr.Button("Submit_YouTube_Link", elem_id="youtube_link_btn", visible=True) | |
with gr.Row() as data_state: | |
content_subject_state = gr.State() # ไฝฟ็จ gr.State ๅญๅจ content_subject | |
content_grade_state = gr.State() # ไฝฟ็จ gr.State ๅญๅจ content_grade | |
trascript_state = gr.State() # ไฝฟ็จ gr.State ๅญๅจ trascript | |
key_moments_state = gr.State() # ไฝฟ็จ gr.State ๅญๅจ key_moments | |
streaming_chat_thread_id_state = gr.State() # ไฝฟ็จ gr.State ๅญๅจ streaming_chat_thread_id | |
with gr.Tab("AIๅฐ็ฒพ้"): | |
with gr.Accordion("้ธๆ AI ๅฐ็ฒพ้", open=True) as chatbot_select_accordion: | |
with gr.Row(): | |
with gr.Column(scale=1, variant="panel"): | |
chatbot_avatar_url = "https://junyitopicimg.s3.amazonaws.com/s4byy--icon.jpe?v=20200513013523726" | |
chatbot_description = """Hi๏ผๆๆฏไฝ ็AIๅญธไผดใ้ฃ็น็ฒพ้ใ๏ผ\n | |
ๆๅฏไปฅ้ชไฝ ไธ่ตทๅญธ็ฟๆฌๆฌก็ๅ งๅฎน๏ผๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ\n | |
๐ค ๅฆๆไฝ ไธ็ฅ้ๆ้บผ็ผๅ๏ผๅฏไปฅ้ปๆๅทฆไธๆน็ๅ้กไธใๅ้กไบใๅ้กไธ๏ผๆๆๅนซไฝ ็ๆๅ้ก๏ผ\n | |
๐ฃ๏ธ ไนๅฏไปฅ้ปๆๅณไธๆน็จ่ช้ณ่ผธๅ ฅ๏ผๆๆๅนซไฝ ่ฝๆๆๆๅญ๏ผๅฒๅฎณๅง๏ผ\n | |
๐ ๆๆฏ็ดๆฅ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ๏ผ\n | |
๐ค ไฝๆ้ๅจๆ้ท๏ผ้ซๅๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ผ\n | |
๐ฆ ๅฆๆ้ๅฐไธ้๏ผๆๆฏ้ๅฐ็ฒพ้ๅพ็ดฏ๏ผ่ซๅๅๅ ถไปๆๅ๏ผๅๆฏ้ฃ็น้ณ้่ชช่ฉฑ็้ๅบฆๆฏ่ผๅฟซ๏ผไฝ ๆฏๅฆ่ทๅพไธๅข๏ผไฝ ไนๅฏไปฅๅๅ ถไป็ฒพ้ไบๅ็็ๅ๏ผ\n | |
""" | |
chatbot_open_ai_name = gr.State("chatbot_open_ai") | |
gr.Image(value=chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False) | |
chatbot_open_ai_select_btn = gr.Button("๐้ธๆใ้ฃ็น็ฒพ้ใ", elem_id="chatbot_btn", visible=True, variant="primary") | |
gr.Markdown(value=chatbot_description, visible=True) | |
with gr.Column(scale=1, variant="panel"): | |
streaming_chatbot_avatar_url = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2020/11/1-%E6%98%9F%E7%A9%BA%E9%A0%AD%E8%B2%BC-%E5%A4%AA%E7%A9%BA%E7%8B%90%E7%8B%B8%E8%B2%93-150x150.png" | |
streaming_chatbot_description = """Hi๏ผๆๆฏใ้ฃ็น้ณ้ใ๏ผ \n | |
่ชช่ฉฑๆฏ่ผๅฟซ๏ผไฝๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ \n | |
๐ ๆๆฒๆ้ ่จญๅ้กใไนๆฒๆ่ช้ณ่ผธๅ ฅ๏ผ้ฉๅๅฟซๅๅฟซ็ญ๏ผไธ่ตท็ทด็ฟๅๅบๅฅฝๅ้กๅง \n | |
๐ ๆ ้ท็จๆๅญ่กจ้็ไฝ ๏ผๅฏไปฅ็จ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ\n | |
๐ค ๆ้ๅจๆ้ท๏ผ้ซๅๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ฝ | |
""" | |
chatbot_open_ai_streaming_name = gr.State("chatbot_open_ai_streaming") | |
gr.Image(value=streaming_chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False) | |
chatbot_open_ai_streaming_select_btn = gr.Button("๐้ธๆใ้ฃ็น้ณ้ใ", elem_id="streaming_chatbot_btn", visible=True, variant="primary") | |
gr.Markdown(value=streaming_chatbot_description, visible=True) | |
with gr.Column(scale=1, variant="panel"): | |
jutor_chatbot_avatar_url = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2019/11/%E5%9B%9B%E6%A0%BC%E6%95%85%E4%BA%8B-04.jpg" | |
jutor_chatbot_description = """Hi๏ผๆๅๆฏใๆขจๆขจใ้บฅ้บฅใ็็ธ่ฒใ๏ผ\n | |
ไนๅฏไปฅ้ชไฝ ไธ่ตทๅญธ็ฟๆฌๆฌก็ๅ งๅฎน๏ผๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ\n | |
๐ค ๅฆๆไฝ ไธ็ฅ้ๆ้บผ็ผๅ๏ผๅฏไปฅ้ปๆๅทฆไธๆน็ๅ้กไธใๅ้กไบใๅ้กไธ๏ผๆๆๅนซไฝ ็ๆๅ้ก๏ผ\n | |
๐ฃ๏ธ ไนๅฏไปฅ้ปๆๅณไธๆน็จ่ช้ณ่ผธๅ ฅ๏ผๆๆๅนซไฝ ่ฝๆๆๆๅญ๏ผๅฒๅฎณๅง๏ผ\n | |
๐ ๆๆฏ็ดๆฅ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ๏ผ\n | |
๐ค ็ฒพ้ๅ้ซๅ้ฝๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ผ\n | |
""" | |
chatbot_jutor_name = gr.State("chatbot_jutor") | |
gr.Image(value=jutor_chatbot_avatar_url, height=100, width=100, show_label=False, show_download_button=False) | |
chatbot_jutor_select_btn = gr.Button("๐้ธๆใๆขจๆขจใ้บฅ้บฅใ็็ธ่ฒใ", elem_id="jutor_chatbot_btn", visible=True, variant="primary") | |
gr.Markdown(value=jutor_chatbot_description, visible=True) | |
with gr.Row("้ฃ็น็ฒพ้") as chatbot_open_ai: | |
with gr.Column(): | |
user_avatar = "https://em-content.zobj.net/source/google/263/flushed-face_1f633.png" | |
bot_avatar = "https://junyitopicimg.s3.amazonaws.com/s4byy--icon.jpe?v=20200513013523726" | |
latex_delimiters = [{"left": "$", "right": "$", "display": False}] | |
chatbot_greeting = [[ | |
None, | |
"""Hi๏ผๆๆฏไฝ ็AIๅญธไผดใ้ฃ็น็ฒพ้ใ๏ผๆๅฏไปฅ้ชไฝ ไธ่ตทๅญธ็ฟๆฌๆฌก็ๅ งๅฎน๏ผๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ | |
๐ค ๅฆๆไฝ ไธ็ฅ้ๆ้บผ็ผๅ๏ผๅฏไปฅ้ปๆๅทฆไธๆน็ๅ้กไธใๅ้กไบใๅ้กไธ๏ผๆๆๅนซไฝ ็ๆๅ้ก๏ผ | |
๐ฃ๏ธ ไนๅฏไปฅ้ปๆๅณไธๆน็จ่ช้ณ่ผธๅ ฅ๏ผๆๆๅนซไฝ ่ฝๆๆๆๅญ๏ผๅฒๅฎณๅง๏ผ | |
๐ ๆๆฏ็ดๆฅ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ๏ผ | |
๐ค ไฝๆ้ๅจๆ้ท๏ผ้ซๅๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ผ | |
๐ฆ ๅฆๆ้ๅฐไธ้๏ผๆๆฏ้ๅฐ็ฒพ้ๅพ็ดฏ๏ผ่ซๅๅๅ ถไปๆๅ๏ผๅๆฏ้ฃ็น้ณ้่ชช่ฉฑ็้ๅบฆๆฏ่ผๅฟซ๏ผไฝ ๆฏๅฆ่ทๅพไธๅข๏ผไฝ ไนๅฏไปฅๅๅ ถไป็ฒพ้ไบๅ็็ๅ๏ผ | |
""", | |
]] | |
with gr.Row(): | |
chatbot = gr.Chatbot(avatar_images=[user_avatar, bot_avatar], label="OPEN AI", show_share_button=False, likeable=True, show_label=False, latex_delimiters=latex_delimiters,value=chatbot_greeting) | |
with gr.Row(): | |
thread_id = gr.Textbox(label="thread_id", visible=False) | |
socratic_mode_btn = gr.Checkbox(label="่ๆ ผๆๅบๅฎถๆๅฉ็ๆจกๅผ", value=True, visible=False) | |
with gr.Row(): | |
with gr.Accordion("ไฝ ไนๆ้กไผผ็ๅ้กๆณๅๅ๏ผ", open=False) as ask_questions_accordion: | |
btn_1 = gr.Button("ๅ้กไธ") | |
btn_2 = gr.Button("ๅ้กไธ") | |
btn_3 = gr.Button("ๅ้กไธ") | |
gr.Markdown("### ้ๆฐ็ๆๅ้ก") | |
btn_create_question = gr.Button("็ๆๅ ถไปๅ้ก", variant="primary") | |
openai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60, label="่ช้ณ่ผธๅ ฅ") | |
with gr.Row(): | |
msg = gr.Textbox(label="่จๆฏ",scale=3) | |
send_button = gr.Button("้ๅบ", variant="primary", scale=1) | |
with gr.Row("้ฃ็น้ณ้") as chatbot_open_ai_streaming: | |
with gr.Column(): | |
streaming_chat_greeting = """ | |
Hi๏ผๆๆฏใ้ฃ็น้ณ้ใ๏ผ่ชช่ฉฑๆฏ่ผๅฟซ๏ผไฝๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ \n | |
๐ ๆๆฒๆ้ ่จญๅ้กใไนๆฒๆ่ช้ณ่ผธๅ ฅ๏ผ้ฉๅๅฟซๅๅฟซ็ญ็ไฝ \n | |
๐ ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ๏ผ\n | |
๐ค ๆ้ๅจๆ้ท๏ผ้ซๅๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ผ | |
""" | |
additional_inputs = [password, user_data, streaming_chat_thread_id_state, trascript_state, key_moments_state, content_subject_state, content_grade_state] | |
streaming_chat = gr.ChatInterface( | |
fn=streaming_chat_with_open_ai, | |
additional_inputs=additional_inputs, | |
submit_btn="้ๅบ", | |
retry_btn=None, | |
undo_btn="โช ไธไธๆญฅ", | |
clear_btn="๐๏ธ ๆธ ้คๅ จ้จ", | |
stop_btn=None, | |
description=streaming_chat_greeting | |
) | |
with gr.Row("ๅ ถไป็ฒพ้") as chatbot_jutor: | |
with gr.Column(): | |
ai_chatbot_greeting = [[ | |
None, | |
"""Hi๏ผๆๆฏ้ฃ็น็ฒพ้็ๆๅๅใๆขจๆขจใ้บฅ้บฅใ็็ธ่ฒใ๏ผไนๅฏไปฅ้ชไฝ ไธ่ตทๅญธ็ฟๆฌๆฌก็ๅ งๅฎน๏ผๆไป้บผๅ้ก้ฝๅฏไปฅๅๆๅ๏ผ | |
๐ค ๅฆๆไฝ ไธ็ฅ้ๆ้บผ็ผๅ๏ผๅฏไปฅ้ปๆๅทฆไธๆน็ๅ้กไธใๅ้กไบใๅ้กไธ๏ผๆๆๅนซไฝ ็ๆๅ้ก๏ผ | |
๐ฃ๏ธ ไนๅฏไปฅ้ปๆๅณไธๆน็จ่ช้ณ่ผธๅ ฅ๏ผๆๆๅนซไฝ ่ฝๆๆๆๅญ๏ผๅฒๅฎณๅง๏ผ | |
๐ ๆๆฏ็ดๆฅ้ต็ค่ผธๅ ฅไฝ ็ๅ้ก๏ผๆๆ็กๅๅ็ญไฝ ็ๅ้กๅ๏ผ | |
๐ค ็ฒพ้ๅ้ซๅ้ฝๆ้๏ผๆฏไธๆฌกๅญธ็ฟๅช่ฝๅ็ญๅๅๅ้ก๏ผ่ซ่ฎๆไผๆฏไธไธๅๅๅ้กๅ๏ผ | |
""", | |
]] | |
ai_chatbot_bot_avatar = "https://storage.googleapis.com/wpassets.junyiacademy.org/1/2019/11/%E5%9B%9B%E6%A0%BC%E6%95%85%E4%BA%8B-04.jpg" | |
ai_name = gr.Dropdown(label="้ธๆ AI ๅฉ็", choices=[("ๆขจๆขจ","jutor"), ("้บฅ้บฅ","claude3"), ("็็ธ่ฒ","groq")], value="jutor") | |
ai_chatbot = gr.Chatbot(avatar_images=[user_avatar, ai_chatbot_bot_avatar], label="ai_chatbot", show_share_button=False, likeable=True, show_label=False, latex_delimiters=latex_delimiters, value=ai_chatbot_greeting) | |
ai_chatbot_socratic_mode_btn = gr.Checkbox(label="่ๆ ผๆๅบๅฎถๆๅฉ็ๆจกๅผ", value=True, visible=False) | |
with gr.Row(): | |
with gr.Accordion("ไฝ ไนๆ้กไผผ็ๅ้กๆณๅๅ๏ผ", open=False) as ask_questions_accordion_2: | |
ai_chatbot_question_1 = gr.Button("ๅ้กไธ") | |
ai_chatbot_question_2 = gr.Button("ๅ้กไธ") | |
ai_chatbot_question_3 = gr.Button("ๅ้กไธ") | |
ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60, label="่ช้ณ่ผธๅ ฅ") | |
with gr.Row(): | |
ai_msg = gr.Textbox(label="่จๆฏ่ผธๅ ฅ",scale=3) | |
ai_send_button = gr.Button("้ๅบ", variant="primary",scale=1) | |
with gr.Tab("ๆ็ซ ๆจกๅผ"): | |
with gr.Row(): | |
reading_passage = gr.Markdown(show_label=False, latex_delimiters = [{"left": "$", "right": "$", "display": False}]) | |
reading_passage_speak_button = gr.Button("Speak", visible=False) | |
reading_passage_audio_output = gr.Audio(label="Audio Output", visible=False) | |
with gr.Tab("้้ปๆ่ฆ"): | |
with gr.Row(): | |
df_summarise = gr.Markdown(show_label=False, latex_delimiters = [{"left": "$", "right": "$", "display": False}]) | |
with gr.Tab("้้ตๆๅป"): | |
with gr.Row(): | |
key_moments_html = gr.HTML(value="") | |
with gr.Tab("ๆๅญธๅ่ชฒ"): | |
with gr.Row(): | |
content_subject = gr.Dropdown(label="้ธๆไธป้ก", choices=["ๆธๅญธ", "่ช็ถ", "ๅๆ", "่ฑๆ", "็คพๆ","็ฉ็", "ๅๅญธ", "็็ฉ", "ๅฐ็", "ๆญทๅฒ", "ๅ ฌๆฐ"], value="", visible=False) | |
content_grade = gr.Dropdown(label="้ธๆๅนด็ด", choices=["ไธๅนด็ด", "ไบๅนด็ด", "ไธๅนด็ด", "ๅๅนด็ด", "ไบๅนด็ด", "ๅ ญๅนด็ด", "ไธๅนด็ด", "ๅ ซๅนด็ด", "ไนๅนด็ด", "ๅๅนด็ด", "ๅไธๅนด็ด", "ๅไบๅนด็ด"], value="", visible=False) | |
content_level = gr.Dropdown(label="ๅทฎ็ฐๅๆๅญธ", choices=["ๅบ็ค", "ไธญ็ด", "้ฒ้"], value="ๅบ็ค") | |
with gr.Row(): | |
with gr.Tab("ๅญธ็ฟๅฎ"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
worksheet_content_type_name = gr.Textbox(value="worksheet", visible=False) | |
worksheet_algorithm = gr.Dropdown(label="้ธๆๆๅญธ็ญ็ฅๆ็่ซ", choices=["Bloom่ช็ฅ้ๅฑค็่ซ", "Polyaๆธๅญธ่งฃ้กๆณ", "CRAๆๅญธๆณ"], value="Bloom่ช็ฅ้ๅฑค็่ซ", visible=False) | |
worksheet_content_btn = gr.Button("็ๆๅญธ็ฟๅฎ ๐", variant="primary") | |
with gr.Accordion("ๅพฎ่ชฟ", open=False): | |
worksheet_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ็ตๆ๏ผ่ผธๅ ฅไฝ ๆณๆดๆน็ๆณๆณ") | |
worksheet_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆ", variant="primary") | |
worksheet_exam_result_retrun_original = gr.Button("่ฟๅๅๅง็ตๆ") | |
with gr.Accordion("prompt", open=False) as worksheet_accordion: | |
worksheet_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40) | |
with gr.Column(scale=2): | |
# ็ๆๅฐๆไธๅๆจกๅผ็็ตๆ | |
worksheet_exam_result_prompt = gr.Textbox(visible=False) | |
worksheet_exam_result_original = gr.Textbox(visible=False) | |
# worksheet_exam_result = gr.Textbox(label="ๅๆฌก็ๆ็ตๆ", show_copy_button=True, interactive=True, lines=40) | |
worksheet_exam_result = gr.Markdown(label="ๅๆฌก็ๆ็ตๆ", latex_delimiters = [{"left": "$", "right": "$", "display": False}]) | |
worksheet_download_exam_result_button = gr.Button("่ฝๆ word๏ผๅฎๆๅพ่ซ้ปๆๅณไธ่ง download ๆ้", variant="primary") | |
worksheet_exam_result_word_link = gr.File(label="Download Word") | |
with gr.Tab("่ชฒ็จ่จ็ซ"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
lesson_plan_content_type_name = gr.Textbox(value="lesson_plan", visible=False) | |
lesson_plan_time = gr.Slider(label="้ธๆ่ชฒ็จๆ้(ๅ้)", minimum=10, maximum=120, step=5, value=40) | |
lesson_plan_btn = gr.Button("็ๆ่ชฒ็จ่จ็ซ ๐", variant="primary") | |
with gr.Accordion("ๅพฎ่ชฟ", open=False): | |
lesson_plan_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ็ตๆ๏ผ่ผธๅ ฅไฝ ๆณๆดๆน็ๆณๆณ") | |
lesson_plan_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆ", variant="primary") | |
lesson_plan_exam_result_retrun_original = gr.Button("่ฟๅๅๅง็ตๆ") | |
with gr.Accordion("prompt", open=False) as lesson_plan_accordion: | |
lesson_plan_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40) | |
with gr.Column(scale=2): | |
# ็ๆๅฐๆไธๅๆจกๅผ็็ตๆ | |
lesson_plan_exam_result_prompt = gr.Textbox(visible=False) | |
lesson_plan_exam_result_original = gr.Textbox(visible=False) | |
lesson_plan_exam_result = gr.Markdown(label="ๅๆฌก็ๆ็ตๆ", latex_delimiters = [{"left": "$", "right": "$", "display": False}]) | |
lesson_plan_download_exam_result_button = gr.Button("่ฝๆ word๏ผๅฎๆๅพ่ซ้ปๆๅณไธ่ง download ๆ้", variant="primary") | |
lesson_plan_exam_result_word_link = gr.File(label="Download Word") | |
with gr.Tab("ๅบๅ ดๅธ"): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
exit_ticket_content_type_name = gr.Textbox(value="exit_ticket", visible=False) | |
exit_ticket_time = gr.Slider(label="้ธๆๅบๅ ดๅธๆ้(ๅ้)", minimum=5, maximum=10, step=1, value=8) | |
exit_ticket_btn = gr.Button("็ๆๅบๅ ดๅธ ๐๏ธ", variant="primary") | |
with gr.Accordion("ๅพฎ่ชฟ", open=False): | |
exit_ticket_exam_result_fine_tune_prompt = gr.Textbox(label="ๆ นๆ็ตๆ๏ผ่ผธๅ ฅไฝ ๆณๆดๆน็ๆณๆณ") | |
exit_ticket_exam_result_fine_tune_btn = gr.Button("ๅพฎ่ชฟ็ตๆ", variant="primary") | |
exit_ticket_exam_result_retrun_original = gr.Button("่ฟๅๅๅง็ตๆ") | |
with gr.Accordion("prompt", open=False) as exit_ticket_accordion: | |
exit_ticket_prompt = gr.Textbox(label="worksheet_prompt", show_copy_button=True, lines=40) | |
with gr.Column(scale=2): | |
# ็ๆๅฐๆไธๅๆจกๅผ็็ตๆ | |
exit_ticket_exam_result_prompt = gr.Textbox(visible=False) | |
exit_ticket_exam_result_original = gr.Textbox(visible=False) | |
exit_ticket_exam_result = gr.Markdown(label="ๅๆฌก็ๆ็ตๆ", latex_delimiters = [{"left": "$", "right": "$", "display": False}]) | |
exit_ticket_download_exam_result_button = gr.Button("่ฝๆ word๏ผๅฎๆๅพ่ซ้ปๆๅณไธ่ง download ๆ้", variant="primary") | |
exit_ticket_exam_result_word_link = gr.File(label="Download Word") | |
# with gr.Tab("็ด ้คๅฐๅ้ฑ่ฎ้ก็ต"): | |
# literacy_oriented_reading_content = gr.Textbox(label="่ผธๅ ฅ้ฑ่ฎๆๆ") | |
# literacy_oriented_reading_content_btn = gr.Button("็ๆ้ฑ่ฎ็่งฃ้ก") | |
# with gr.Tab("่ชๆ่ฉไผฐ"): | |
# self_assessment_content = gr.Textbox(label="่ผธๅ ฅ่ช่ฉๅๅทๆๆชขๆฅ่กจ") | |
# self_assessment_content_btn = gr.Button("็ๆ่ช่ฉๅๅท") | |
# with gr.Tab("่ชๆๅๆ่ฉ้"): | |
# self_reflection_content = gr.Textbox(label="่ผธๅ ฅ่ชๆๅๆๆดปๅ") | |
# self_reflection_content_btn = gr.Button("็ๆ่ชๆๅๆๆดปๅ") | |
# with gr.Tab("ๅพ่จญ่ช็ฅ"): | |
# metacognition_content = gr.Textbox(label="่ผธๅ ฅๅพ่จญ่ช็ฅ็ธ้ๅ้ก") | |
# metacognition_content_btn = gr.Button("็ๆๅพ่จญ่ช็ฅๅ้ก") | |
with gr.Accordion("See Details", open=False) as see_details: | |
with gr.Tab("้ๅญ็จฟๆฌๆ"): | |
with gr.Row() as transcript_admmin: | |
transcript_kind = gr.Textbox(value="transcript", show_label=False) | |
transcript_get_button = gr.Button("ๅๅพ", size="sm", variant="primary") | |
transcript_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary") | |
transcript_update_button = gr.Button("ๅฒๅญ", size="sm", variant="primary") | |
transcript_delete_button = gr.Button("ๅช้ค", size="sm", variant="primary") | |
transcript_create_button = gr.Button("ๅปบ็ซ", size="sm", variant="primary") | |
with gr.Row(): | |
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True) | |
with gr.Tab("ๆ็ซ ๆฌๆ"): | |
with gr.Row() as reading_passage_admin: | |
with gr.Column(): | |
with gr.Row(): | |
reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False) | |
with gr.Row(): | |
reading_passage_text_to_latex = gr.Button("ๆฐๅข LaTeX", size="sm", variant="primary") | |
reading_passage_get_button = gr.Button("ๅๅพ", size="sm", variant="primary") | |
reading_passage_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary") | |
reading_passage_update_button = gr.Button("ๆดๆฐ", size="sm", variant="primary") | |
reading_passage_delete_button = gr.Button("ๅช้ค", size="sm", variant="primary") | |
reading_passage_create_button = gr.Button("ๅปบ็ซ", size="sm", variant="primary") | |
with gr.Row(): | |
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True) | |
with gr.Tab("้้ปๆ่ฆๆฌๆ"): | |
with gr.Row() as summary_admmin: | |
with gr.Column(): | |
with gr.Row(): | |
summary_kind = gr.Textbox(value="summary", show_label=False) | |
with gr.Row(): | |
summary_to_markdown = gr.Button("ๆฐๅข Markdown", size="sm", variant="primary") | |
summary_get_button = gr.Button("ๅๅพ", size="sm", variant="primary") | |
summary_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary") | |
summary_update_button = gr.Button("ๆดๆฐ", size="sm", variant="primary") | |
summary_delete_button = gr.Button("ๅช้ค", size="sm", variant="primary") | |
summary_create_button = gr.Button("ๅปบ็ซ", size="sm", variant="primary") | |
with gr.Row(): | |
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True) | |
with gr.Tab("้้ตๆๅปๆฌๆ"): | |
with gr.Row() as key_moments_admin: | |
key_moments_kind = gr.Textbox(value="key_moments", show_label=False) | |
key_moments_get_button = gr.Button("ๅๅพ", size="sm", variant="primary") | |
key_moments_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary") | |
key_moments_update_button = gr.Button("ๅฒๅญ", size="sm", variant="primary") | |
key_moments_delete_button = gr.Button("ๅช้ค", size="sm", variant="primary") | |
key_moments_create_button = gr.Button("ๅปบ็ซ", size="sm", variant="primary") | |
with gr.Row(): | |
key_moments = gr.Textbox(label="Key Moments", lines=40, interactive=False, show_copy_button=True) | |
with gr.Tab("ๅ้กๆฌๆ"): | |
with gr.Row() as question_list_admin: | |
questions_kind = gr.Textbox(value="questions", show_label=False) | |
questions_get_button = gr.Button("ๅๅพ", size="sm", variant="primary") | |
questions_edit_button = gr.Button("็ทจ่ผฏ", size="sm", variant="primary") | |
questions_update_button = gr.Button("ๅฒๅญ", size="sm", variant="primary") | |
questions_delete_button = gr.Button("ๅช้ค", size="sm", variant="primary") | |
questions_create_button = gr.Button("ๅปบ็ซ", size="sm", variant="primary") | |
with gr.Row(): | |
questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True) | |
with gr.Tab("้ๅญ็จฟ"): | |
simple_html_content = gr.HTML(label="Simple Transcript") | |
with gr.Tab("ๅๆ"): | |
transcript_html = gr.HTML(label="YouTube Transcript and Video") | |
with gr.Tab("ๆๅฝฑ็"): | |
slide_image = gr.Image() | |
slide_text = gr.Textbox() | |
with gr.Row(): | |
prev_button = gr.Button("Previous") | |
next_button = gr.Button("Next") | |
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text]) | |
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text]) | |
with gr.Tab("markdown"): | |
gr.Markdown("## ่ซ่ค่ฃฝไปฅไธ markdown ไธฆ่ฒผๅฐไฝ ็ๅฟๆบๅๅทฅๅ ทไธญ๏ผๅปบ่ญฐไฝฟ็จ๏ผhttps://markmap.js.org/repl") | |
mind_map = gr.Textbox(container=True, show_copy_button=True, lines=40, elem_id="mind_map_markdown") | |
with gr.Tab("ๅฟๆบๅ",elem_id="mind_map_tab"): | |
mind_map_html = gr.HTML() | |
# --- Event --- | |
# CHATBOT SELECT | |
chatbot_open_ai_select_btn.click( | |
chatbot_select, | |
inputs=[chatbot_open_ai_name], | |
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor] | |
) | |
chatbot_open_ai_streaming_select_btn.click( | |
chatbot_select, | |
inputs=[chatbot_open_ai_streaming_name], | |
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor] | |
) | |
chatbot_jutor_select_btn.click( | |
chatbot_select, | |
inputs=[chatbot_jutor_name], | |
outputs=[chatbot_select_accordion, chatbot_open_ai, chatbot_open_ai_streaming, chatbot_jutor] | |
) | |
# OPENAI ASSISTANT CHATBOT ๆจกๅผ | |
send_button.click( | |
chat_with_opan_ai_assistant, | |
inputs=[password, video_id, user_data, thread_id, trascript_state, key_moments, msg, chatbot, content_subject, content_grade, socratic_mode_btn], | |
outputs=[msg, chatbot, thread_id], | |
scroll_to_output=True | |
) | |
openai_chatbot_audio_input.change( | |
process_open_ai_audio_to_chatbot, | |
inputs=[password, openai_chatbot_audio_input], | |
outputs=[msg] | |
) | |
# OPENAI ASSISTANT CHATBOT ้ฃๆฅๆ้้ปๆไบไปถ | |
btn_1_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_1, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
btn_2_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_2, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
btn_3_chat_with_opan_ai_assistant_input =[password, video_id, user_data, thread_id, trascript_state, key_moments, btn_3, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
btn_1.click( | |
chat_with_opan_ai_assistant, | |
inputs=btn_1_chat_with_opan_ai_assistant_input, | |
outputs=[msg, chatbot, thread_id], | |
scroll_to_output=True | |
) | |
btn_2.click( | |
chat_with_opan_ai_assistant, | |
inputs=btn_2_chat_with_opan_ai_assistant_input, | |
outputs=[msg, chatbot, thread_id], | |
scroll_to_output=True | |
) | |
btn_3.click( | |
chat_with_opan_ai_assistant, | |
inputs=btn_3_chat_with_opan_ai_assistant_input, | |
outputs=[msg, chatbot, thread_id], | |
scroll_to_output=True | |
) | |
btn_create_question.click( | |
change_questions, | |
inputs = [password, df_string_output], | |
outputs = [btn_1, btn_2, btn_3] | |
) | |
# ๅ ถไป็ฒพ้ ai_chatbot ๆจกๅผ | |
ai_send_button.click( | |
chat_with_ai, | |
inputs=[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_msg, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn], | |
outputs=[ai_msg, ai_chatbot], | |
scroll_to_output=True | |
) | |
# ๅ ถไป็ฒพ้ ai_chatbot ่ฟๆฅๆ้ฎ็นๅปไบไปถ | |
ai_chatbot_question_1_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_1, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
ai_chatbot_question_2_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_2, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
ai_chatbot_question_3_chat_with_ai_input =[ai_name, password, video_id, user_data, trascript_state, key_moments, ai_chatbot_question_3, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn] | |
ai_chatbot_question_1.click( | |
chat_with_ai, | |
inputs=ai_chatbot_question_1_chat_with_ai_input, | |
outputs=[ai_msg, ai_chatbot], | |
scroll_to_output=True | |
) | |
ai_chatbot_question_2.click( | |
chat_with_ai, | |
inputs=ai_chatbot_question_2_chat_with_ai_input, | |
outputs=[ai_msg, ai_chatbot], | |
scroll_to_output=True | |
) | |
ai_chatbot_question_3.click( | |
chat_with_ai, | |
inputs=ai_chatbot_question_3_chat_with_ai_input, | |
outputs=[ai_msg, ai_chatbot], | |
scroll_to_output=True | |
) | |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output) | |
# file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output]) | |
# ๅฝ่พๅ ฅ YouTube ้พๆฅๆถ่งฆๅ | |
process_youtube_link_inputs = [password, youtube_link] | |
process_youtube_link_outputs = [ | |
video_id, | |
questions_json, | |
btn_1, | |
btn_2, | |
btn_3, | |
df_string_output, | |
summary_text, | |
df_summarise, | |
key_moments, | |
key_moments_html, | |
mind_map, | |
mind_map_html, | |
transcript_html, | |
simple_html_content, | |
slide_image, | |
slide_text, | |
reading_passage_text, | |
reading_passage, | |
content_subject, | |
content_grade, | |
] | |
update_state_inputs = [ | |
content_subject, | |
content_grade, | |
df_string_output, | |
key_moments, | |
btn_1, | |
btn_2, | |
btn_3 | |
] | |
update_state_outputs = [ | |
content_subject_state, | |
content_grade_state, | |
trascript_state, | |
key_moments_state, | |
streaming_chat_thread_id_state, | |
ai_chatbot_question_1, | |
ai_chatbot_question_2, | |
ai_chatbot_question_3 | |
] | |
youtube_link.change( | |
process_youtube_link, | |
inputs=process_youtube_link_inputs, | |
outputs=process_youtube_link_outputs | |
).then( | |
update_state, | |
inputs=update_state_inputs, | |
outputs=update_state_outputs | |
) | |
youtube_link_btn.click( | |
process_youtube_link, | |
inputs=process_youtube_link_inputs, | |
outputs=process_youtube_link_outputs | |
).then( | |
update_state, | |
inputs=update_state_inputs, | |
outputs=update_state_outputs | |
) | |
# ๅฝ่พๅ ฅ็ฝ้กต้พๆฅๆถ่งฆๅ | |
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output]) | |
# reading_passage event | |
reading_passage_text_to_latex.click( | |
reading_passage_add_latex_version, | |
inputs=[video_id], | |
outputs=[reading_passage_text] | |
) | |
reading_passage_get_button.click( | |
get_LLM_content, | |
inputs=[video_id, reading_passage_kind], | |
outputs=[reading_passage_text] | |
) | |
reading_passage_create_button.click( | |
create_LLM_content, | |
inputs=[video_id, df_string_output, reading_passage_kind], | |
outputs=[reading_passage_text] | |
) | |
reading_passage_delete_button.click( | |
delete_LLM_content, | |
inputs=[video_id, reading_passage_kind], | |
outputs=[reading_passage_text] | |
) | |
reading_passage_edit_button.click( | |
enable_edit_mode, | |
inputs=[], | |
outputs=[reading_passage_text] | |
) | |
reading_passage_update_button.click( | |
update_LLM_content, | |
inputs=[video_id, reading_passage_text, reading_passage_kind], | |
outputs=[reading_passage_text] | |
) | |
# summary event | |
summary_to_markdown.click( | |
summary_add_markdown_version, | |
inputs=[video_id], | |
outputs=[summary_text] | |
) | |
summary_get_button.click( | |
get_LLM_content, | |
inputs=[video_id, summary_kind], | |
outputs=[summary_text] | |
) | |
summary_create_button.click( | |
create_LLM_content, | |
inputs=[video_id, df_string_output, summary_kind], | |
outputs=[summary_text] | |
) | |
summary_delete_button.click( | |
delete_LLM_content, | |
inputs=[video_id, summary_kind], | |
outputs=[summary_text] | |
) | |
summary_edit_button.click( | |
enable_edit_mode, | |
inputs=[], | |
outputs=[summary_text] | |
) | |
summary_update_button.click( | |
update_LLM_content, | |
inputs=[video_id, summary_text, summary_kind], | |
outputs=[summary_text] | |
) | |
# transcript event | |
transcript_get_button.click( | |
get_LLM_content, | |
inputs=[video_id, transcript_kind], | |
outputs=[df_string_output] | |
) | |
transcript_create_button.click( | |
create_LLM_content, | |
inputs=[video_id, df_string_output, transcript_kind], | |
outputs=[df_string_output] | |
) | |
transcript_delete_button.click( | |
delete_LLM_content, | |
inputs=[video_id, transcript_kind], | |
outputs=[df_string_output] | |
) | |
transcript_edit_button.click( | |
enable_edit_mode, | |
inputs=[], | |
outputs=[df_string_output] | |
) | |
transcript_update_button.click( | |
update_LLM_content, | |
inputs=[video_id, df_string_output, transcript_kind], | |
outputs=[df_string_output] | |
) | |
# key_moments event | |
key_moments_get_button.click( | |
get_LLM_content, | |
inputs=[video_id, key_moments_kind], | |
outputs=[key_moments] | |
) | |
key_moments_create_button.click( | |
create_LLM_content, | |
inputs=[video_id, df_string_output, key_moments_kind], | |
outputs=[key_moments] | |
) | |
key_moments_delete_button.click( | |
delete_LLM_content, | |
inputs=[video_id, key_moments_kind], | |
outputs=[key_moments] | |
) | |
key_moments_edit_button.click( | |
enable_edit_mode, | |
inputs=[], | |
outputs=[key_moments] | |
) | |
key_moments_update_button.click( | |
update_LLM_content, | |
inputs=[video_id, key_moments, key_moments_kind], | |
outputs=[key_moments] | |
) | |
# question_list event | |
questions_get_button.click( | |
get_LLM_content, | |
inputs=[video_id, questions_kind], | |
outputs=[questions_json] | |
) | |
questions_create_button.click( | |
create_LLM_content, | |
inputs=[video_id, df_string_output, questions_kind], | |
outputs=[questions_json] | |
) | |
questions_delete_button.click( | |
delete_LLM_content, | |
inputs=[video_id, questions_kind], | |
outputs=[questions_json] | |
) | |
questions_edit_button.click( | |
enable_edit_mode, | |
inputs=[], | |
outputs=[questions_json] | |
) | |
questions_update_button.click( | |
update_LLM_content, | |
inputs=[video_id, questions_json, questions_kind], | |
outputs=[questions_json] | |
) | |
# ๆๅธซ็ | |
worksheet_content_btn.click( | |
get_ai_content, | |
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, worksheet_algorithm, worksheet_content_type_name], | |
outputs=[worksheet_exam_result_original, worksheet_exam_result, worksheet_prompt, worksheet_exam_result_prompt] | |
) | |
lesson_plan_btn.click( | |
get_ai_content, | |
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, lesson_plan_time, lesson_plan_content_type_name], | |
outputs=[lesson_plan_exam_result_original, lesson_plan_exam_result, lesson_plan_prompt, lesson_plan_exam_result_prompt] | |
) | |
exit_ticket_btn.click( | |
get_ai_content, | |
inputs=[password, video_id, df_string_output, content_subject, content_grade, content_level, exit_ticket_time, exit_ticket_content_type_name], | |
outputs=[exit_ticket_exam_result_original, exit_ticket_exam_result, exit_ticket_prompt, exit_ticket_exam_result_prompt] | |
) | |
# ็ๆ็ตๆๅพฎ่ชฟ | |
worksheet_exam_result_fine_tune_btn.click( | |
generate_exam_fine_tune_result, | |
inputs=[password, worksheet_exam_result_prompt, df_string_output, worksheet_exam_result, worksheet_exam_result_fine_tune_prompt], | |
outputs=[worksheet_exam_result] | |
) | |
worksheet_download_exam_result_button.click( | |
download_exam_result, | |
inputs=[worksheet_exam_result], | |
outputs=[worksheet_exam_result_word_link] | |
) | |
worksheet_exam_result_retrun_original.click( | |
return_original_exam_result, | |
inputs=[worksheet_exam_result_original], | |
outputs=[worksheet_exam_result] | |
) | |
lesson_plan_exam_result_fine_tune_btn.click( | |
generate_exam_fine_tune_result, | |
inputs=[password, lesson_plan_exam_result_prompt, df_string_output, lesson_plan_exam_result, lesson_plan_exam_result_fine_tune_prompt], | |
outputs=[lesson_plan_exam_result] | |
) | |
lesson_plan_download_exam_result_button.click( | |
download_exam_result, | |
inputs=[lesson_plan_exam_result], | |
outputs=[lesson_plan_exam_result_word_link] | |
) | |
lesson_plan_exam_result_retrun_original.click( | |
return_original_exam_result, | |
inputs=[lesson_plan_exam_result_original], | |
outputs=[lesson_plan_exam_result] | |
) | |
exit_ticket_exam_result_fine_tune_btn.click( | |
generate_exam_fine_tune_result, | |
inputs=[password, exit_ticket_exam_result_prompt, df_string_output, exit_ticket_exam_result, exit_ticket_exam_result_fine_tune_prompt], | |
outputs=[exit_ticket_exam_result] | |
) | |
exit_ticket_download_exam_result_button.click( | |
download_exam_result, | |
inputs=[exit_ticket_exam_result], | |
outputs=[exit_ticket_exam_result_word_link] | |
) | |
exit_ticket_exam_result_retrun_original.click( | |
return_original_exam_result, | |
inputs=[exit_ticket_exam_result_original], | |
outputs=[exit_ticket_exam_result] | |
) | |
# init_params | |
init_outputs = [ | |
admin, | |
reading_passage_admin, | |
summary_admmin, | |
see_details, | |
worksheet_accordion, | |
lesson_plan_accordion, | |
exit_ticket_accordion, | |
password, | |
youtube_link, | |
chatbot_open_ai, | |
chatbot_open_ai_streaming, | |
chatbot_jutor | |
] | |
demo.load( | |
init_params, | |
inputs =[youtube_link], | |
outputs = init_outputs | |
) | |
demo.launch(allowed_paths=["videos"]) | |