|
from youtube_transcript_api import YouTubeTranscriptApi |
|
import google.generativeai as genai |
|
from docx import Document |
|
import re |
|
|
|
class YouTubeWorksheet: |
|
def __init__(self, api_key): |
|
|
|
genai.configure(api_key=api_key) |
|
self.model = genai.GenerativeModel('gemini-1.5-pro') |
|
|
|
def get_video_id(self, url): |
|
|
|
video_id = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url) |
|
return video_id.group(1) if video_id else None |
|
|
|
def get_transcript(self, url): |
|
video_id = self.get_video_id(url) |
|
if not video_id: |
|
return None |
|
try: |
|
|
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) |
|
|
|
|
|
transcript = transcript_list.find_transcript(['en', 'ko']) |
|
return ' '.join([entry['text'] for entry in transcript.fetch()]) |
|
except Exception as e: |
|
print(f"์๋ง ์ถ์ถ ์ค๋ฅ: {e}") |
|
print(f"๋น๋์ค ID: {video_id}") |
|
return None |
|
|
|
def create_worksheet(self, transcript): |
|
prompt = f""" |
|
๋ค์ ํ
์คํธ๋ฅผ ๋ฌธ์ฅ๋ณ๋ก ๋๋๊ณ , ๊ฐ ๋ฌธ์ฅ์ ๋ํด: |
|
1. ๋น์นธ ๋ฌธ์ ๋ง๋ค๊ธฐ (์ค์ ๋จ์ด๋ฅผ ___๋ก ๋์ฒด) |
|
2. ํ๊ตญ์ด๋ก ๋ฒ์ญํ๊ธฐ |
|
|
|
ํ
์คํธ: {transcript} |
|
|
|
ํ ํ์์ผ๋ก ์ถ๋ ฅ: |
|
์๋ฌธ์ฅ|๋น์นธ ๋ฌธ์ |ํ๊ตญ์ด ๋ฒ์ญ |
|
""" |
|
|
|
response = self.model.generate_content(prompt) |
|
return response.text |
|
|
|
def save_to_docx(self, content, output_file="worksheet.docx"): |
|
doc = Document() |
|
doc.add_heading('YouTube ํ์ต ํ๋์ง', 0) |
|
|
|
|
|
rows = content.strip().split('\n')[2:] |
|
table = doc.add_table(rows=len(rows)+1, cols=3) |
|
table.style = 'Table Grid' |
|
|
|
|
|
headers = ['์๋ฌธ์ฅ', '๋น์นธ ๋ฌธ์ ', 'ํ๊ตญ์ด ๋ฒ์ญ'] |
|
for i, header in enumerate(headers): |
|
table.cell(0, i).text = header |
|
|
|
|
|
for i, row in enumerate(rows): |
|
cells = row.split('|') |
|
for j, cell in enumerate(cells): |
|
table.cell(i+1, j).text = cell.strip() |
|
|
|
doc.save(output_file) |
|
return output_file |