ani-quiz / youtube_worksheet.py
JUNGU's picture
Update youtube_worksheet.py
22a2604 verified
from youtube_transcript_api import YouTubeTranscriptApi
import google.generativeai as genai
from docx import Document
import re
class YouTubeWorksheet:
def __init__(self, api_key):
# Gemini API ์ดˆ๊ธฐํ™”
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel('gemini-1.5-pro')
def get_video_id(self, url):
# YouTube URL์—์„œ video ID ์ถ”์ถœ
video_id = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
return video_id.group(1) if video_id else None
def get_transcript(self, url):
video_id = self.get_video_id(url)
if not video_id:
return None
try:
# ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋“  ์ž๋ง‰ ๋ชฉ๋ก์„ ๋จผ์ € ํ™•์ธ
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# ์ž๋™ ์ƒ์„ฑ๋œ ์ž๋ง‰ ํฌํ•จํ•˜์—ฌ ์‹œ๋„
transcript = transcript_list.find_transcript(['en', 'ko'])
return ' '.join([entry['text'] for entry in transcript.fetch()])
except Exception as e:
print(f"์ž๋ง‰ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
print(f"๋น„๋””์˜ค ID: {video_id}")
return None
def create_worksheet(self, transcript):
prompt = f"""
๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ๋ณ„๋กœ ๋‚˜๋ˆ„๊ณ , ๊ฐ ๋ฌธ์žฅ์— ๋Œ€ํ•ด:
1. ๋นˆ์นธ ๋ฌธ์ œ ๋งŒ๋“ค๊ธฐ (์ค‘์š” ๋‹จ์–ด๋ฅผ ___๋กœ ๋Œ€์ฒด)
2. ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ธฐ
ํ…์ŠคํŠธ: {transcript}
ํ‘œ ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ:
์›๋ฌธ์žฅ|๋นˆ์นธ ๋ฌธ์ œ|ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ
"""
response = self.model.generate_content(prompt)
return response.text
def save_to_docx(self, content, output_file="worksheet.docx"):
doc = Document()
doc.add_heading('YouTube ํ•™์Šต ํ™œ๋™์ง€', 0)
# ํ‘œ ์ƒ์„ฑ ๋ฐ ๋‚ด์šฉ ์ถ”๊ฐ€
rows = content.strip().split('\n')[2:] # ํ—ค๋” ์ œ์™ธ
table = doc.add_table(rows=len(rows)+1, cols=3)
table.style = 'Table Grid'
# ํ—ค๋” ์ถ”๊ฐ€
headers = ['์›๋ฌธ์žฅ', '๋นˆ์นธ ๋ฌธ์ œ', 'ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ']
for i, header in enumerate(headers):
table.cell(0, i).text = header
# ๋‚ด์šฉ ์ถ”๊ฐ€
for i, row in enumerate(rows):
cells = row.split('|')
for j, cell in enumerate(cells):
table.cell(i+1, j).text = cell.strip()
doc.save(output_file)
return output_file