File size: 2,494 Bytes
9ad2423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22a2604
 
 
 
 
 
9ad2423
 
22a2604
9ad2423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from youtube_transcript_api import YouTubeTranscriptApi
import google.generativeai as genai
from docx import Document
import re

class YouTubeWorksheet:
    def __init__(self, api_key):
        # Gemini API ์ดˆ๊ธฐํ™”
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-1.5-pro')
        
    def get_video_id(self, url):
        # YouTube URL์—์„œ video ID ์ถ”์ถœ
        video_id = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
        return video_id.group(1) if video_id else None

    def get_transcript(self, url):
        video_id = self.get_video_id(url)
        if not video_id:
            return None
        try:
            # ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋“  ์ž๋ง‰ ๋ชฉ๋ก์„ ๋จผ์ € ํ™•์ธ
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
            
            # ์ž๋™ ์ƒ์„ฑ๋œ ์ž๋ง‰ ํฌํ•จํ•˜์—ฌ ์‹œ๋„
            transcript = transcript_list.find_transcript(['en', 'ko'])
            return ' '.join([entry['text'] for entry in transcript.fetch()])
        except Exception as e:
            print(f"์ž๋ง‰ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
            print(f"๋น„๋””์˜ค ID: {video_id}")
            return None

    def create_worksheet(self, transcript):
        prompt = f"""
        ๋‹ค์Œ ํ…์ŠคํŠธ๋ฅผ ๋ฌธ์žฅ๋ณ„๋กœ ๋‚˜๋ˆ„๊ณ , ๊ฐ ๋ฌธ์žฅ์— ๋Œ€ํ•ด:
        1. ๋นˆ์นธ ๋ฌธ์ œ ๋งŒ๋“ค๊ธฐ (์ค‘์š” ๋‹จ์–ด๋ฅผ ___๋กœ ๋Œ€์ฒด)
        2. ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญํ•˜๊ธฐ
        
        ํ…์ŠคํŠธ: {transcript}
        
        ํ‘œ ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ:
        ์›๋ฌธ์žฅ|๋นˆ์นธ ๋ฌธ์ œ|ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ
        """
        
        response = self.model.generate_content(prompt)
        return response.text

    def save_to_docx(self, content, output_file="worksheet.docx"):
        doc = Document()
        doc.add_heading('YouTube ํ•™์Šต ํ™œ๋™์ง€', 0)
        
        # ํ‘œ ์ƒ์„ฑ ๋ฐ ๋‚ด์šฉ ์ถ”๊ฐ€
        rows = content.strip().split('\n')[2:]  # ํ—ค๋” ์ œ์™ธ
        table = doc.add_table(rows=len(rows)+1, cols=3)
        table.style = 'Table Grid'
        
        # ํ—ค๋” ์ถ”๊ฐ€
        headers = ['์›๋ฌธ์žฅ', '๋นˆ์นธ ๋ฌธ์ œ', 'ํ•œ๊ตญ์–ด ๋ฒˆ์—ญ']
        for i, header in enumerate(headers):
            table.cell(0, i).text = header
            
        # ๋‚ด์šฉ ์ถ”๊ฐ€
        for i, row in enumerate(rows):
            cells = row.split('|')
            for j, cell in enumerate(cells):
                table.cell(i+1, j).text = cell.strip()
                
        doc.save(output_file)
        return output_file