lcjln commited on
Commit
f6beab7
Β·
verified Β·
1 Parent(s): 64a29d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -125
app.py CHANGED
@@ -1,126 +1,83 @@
1
- import streamlit as st
2
- import yt_dlp
3
  import os
4
-
5
- # Streamlit 제λͺ©
6
- st.title('유튜브 μ˜μƒ λ‹€μš΄λ‘œλ“œ')
7
- st.write("<br>", unsafe_allow_html=True) # 1쀄 띄어쓰기
8
- st.write("μ‚¬μš©λ°©λ²•")
9
- st.write("<br>", unsafe_allow_html=True) # 1쀄 띄어쓰기
10
- st.write("1. λ‹€μš΄λ°›μœΌλ €λŠ” 유튜브 μ˜μƒμ˜ 링크λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”")
11
- st.write("2. 링크λ₯Ό μž…λ ₯ν•˜κ³  ν™•μΈλ²„νŠΌμ„ λˆ„λ₯΄λ©΄ ν•΄λ‹Ή μ˜μƒμ˜ 썸넀일이 λ“±μž₯ν•©λ‹ˆλ‹€")
12
- st.write("3. 썸넀일 λ°‘μ˜ μ„ νƒλž€μ„ 톡해 해상도와 μ˜μƒ μš©λŸ‰μ„ ν™•μΈν•œ ν›„ μ›ν•˜λŠ” 해상도λ₯Ό μ„ νƒν•˜κ³  μΆ”μΆœ λ²„νŠΌμ„ λˆŒλŸ¬μ£Όμ„Έμš”")
13
- st.write("4. μž μ‹œ ν›„ λ‹€μš΄λ°›μœΌλ €λŠ” μ˜μƒμ΄ 밑에 λ“±μž₯ν•˜κ³  μž¬μƒν•˜κ±°λ‚˜ λ‹€μš΄λ‘œλ“œ 받을 수 μžˆμŠ΅λ‹ˆλ‹€ (μ˜μƒμ˜ μš©λŸ‰μ΄ 클수둝 μΆ”μΆœν•˜λŠ”λ° μ‹œκ°„μ΄ 더 κ±Έλ¦½λ‹ˆλ‹€. 20λΆ„ μ˜μƒ μΆ”μΆœν•˜λŠ”λ° 1λΆ„λ‚΄λ‘œ μ†Œμš”)")
14
-
15
- # μœ μ €λ‘œλΆ€ν„° 유튜브 링크 μž…λ ₯λ°›κΈ°
16
- col1, col2 = st.columns([4, 1])
17
- with col1:
18
- youtube_url = st.text_input('Enter YouTube video URL')
19
-
20
- def fetch_video_info(youtube_url):
21
- ydl_opts = {'nocheckcertificate': True}
22
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
23
- info_dict = ydl.extract_info(youtube_url, download=False)
24
- return info_dict
25
-
26
- def get_available_formats(info_dict):
27
- formats = {}
28
- for fmt in info_dict['formats']:
29
- if fmt.get('height') and fmt.get('ext') == 'mp4':
30
- filesize = fmt.get('filesize', 0)
31
- if filesize:
32
- filesize_mb = round(filesize / (1024 * 1024), 2)
33
- resolution = f"{fmt['height']}p"
34
- formats[resolution] = {
35
- 'id': fmt['format_id'],
36
- 'info': f"{resolution} - {filesize_mb} MB"
37
- }
38
- return formats
39
-
40
- def reset_session_state():
41
- st.session_state.pop('info_dict', None)
42
- st.session_state.pop('thumbnail_url', None)
43
- st.session_state.pop('formats', None)
44
- st.session_state.pop('youtube_url', None)
45
- st.session_state.pop('confirmed', None)
46
- st.session_state.pop('selected_format_id', None)
47
-
48
- def delete_downloaded_video():
49
- if os.path.exists('downloaded_video.mp4'):
50
- os.remove('downloaded_video.mp4')
51
-
52
- # "확인" λ²„νŠΌμ„ λˆ„λ₯΄λ©΄ μ²˜λ¦¬ν•  둜직
53
- if st.button('확인'):
54
- if youtube_url:
55
- try:
56
- # κΈ°μ‘΄ λ‹€μš΄λ‘œλ“œλœ μ˜μƒ μ‚­μ œ
57
- delete_downloaded_video()
58
-
59
- # μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™” 및 URL λ³€κ²½ 확인
60
- if 'youtube_url' in st.session_state and st.session_state.youtube_url != youtube_url:
61
- reset_session_state()
62
-
63
- st.session_state.youtube_url = youtube_url
64
- info_dict = fetch_video_info(youtube_url)
65
- st.session_state.info_dict = info_dict
66
- st.session_state.thumbnail_url = info_dict.get('thumbnail')
67
- st.session_state.formats = get_available_formats(info_dict)
68
- st.session_state.confirmed = True
69
- except Exception as e:
70
- st.error(f"An error occurred: {e}")
71
- else:
72
- st.warning('Please enter a valid YouTube video URL')
73
-
74
- if 'confirmed' in st.session_state and st.session_state.confirmed:
75
- st.image(st.session_state.thumbnail_url, caption='Video Thumbnail', use_column_width=True)
76
-
77
- # 해상도 선택
78
- format_options = [value['info'] for value in st.session_state.formats.values()]
79
- format_selection = st.selectbox('Select resolution', format_options)
80
-
81
- # μ„ νƒλœ ν•΄μƒλ„μ˜ format_id μ €μž₯
82
- st.session_state.selected_format_id = next(
83
- (value['id'] for key, value in st.session_state.formats.items() if value['info'] == format_selection), None
84
- )
85
-
86
- def download_video(youtube_url, format_id):
87
- ydl_opts = {
88
- 'format': f"{format_id}+bestaudio/best",
89
- 'merge_output_format': 'mp4',
90
- 'outtmpl': 'downloaded_video.%(ext)s',
91
- 'nocache': True, # μΊμ‹œ λΉ„ν™œμ„±ν™”
92
- 'force_generic_extractor': True # 항상 μƒˆλ‘œμš΄ URL μ‚¬μš©
93
- }
94
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
95
- ydl.download([youtube_url])
96
-
97
- # "μΆ”μΆœ" λ²„νŠΌ μΆ”κ°€
98
- if st.button('μΆ”μΆœ'):
99
- format_id = st.session_state.selected_format_id
100
- if format_id is None:
101
- st.error('Selected format not found.')
102
- else:
103
- try:
104
- # μ„Έμ…˜ μƒνƒœμ˜ youtube_url μ‚¬μš©
105
- download_video(st.session_state.youtube_url, format_id)
106
- st.success('Video downloaded successfully!')
107
- st.video('downloaded_video.mp4')
108
-
109
- # λ‹€μš΄λ‘œλ“œλœ 파일 제곡 (optional)
110
- with open('downloaded_video.mp4', 'rb') as file:
111
- st.download_button(
112
- label="Download Video File",
113
- data=file,
114
- file_name='downloaded_video.mp4',
115
- mime='video/mp4'
116
- )
117
-
118
- # μΆ”μΆœλœ μ˜μƒ 정보 좜λ ₯
119
- st.write(f"**제λͺ©:** {st.session_state.info_dict['title']}")
120
- st.write(f"**채널λͺ…:** {st.session_state.info_dict['uploader']}")
121
- st.write(f"**μ—…λ‘œλ“œ λ‚ μ§œ:** {st.session_state.info_dict['upload_date']}")
122
-
123
- # λ‹€μš΄λ‘œλ“œ ν›„ μ„Έμ…˜ μƒνƒœ 리셋
124
- reset_session_state()
125
- except Exception as e:
126
- st.error(f"An error occurred during download: {e}")
 
 
 
1
  import os
2
+ import streamlit as st
3
+ import torch
4
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
5
+ import librosa
6
+ import moviepy.editor as mp
7
+ import srt
8
+ from datetime import timedelta
9
+ from tempfile import NamedTemporaryFile
10
+
11
+ # λͺ¨λΈ 및 ν”„λ‘œμ„Έμ„œ λ‘œλ“œ
12
+ @st.cache_resource
13
+ def load_model():
14
+ model = WhisperForConditionalGeneration.from_pretrained("lcjln/AIME_Project_The_Final")
15
+ processor = WhisperProcessor.from_pretrained("lcjln/AIME_The_Final")
16
+ return model, processor
17
+
18
+ model, processor = load_model()
19
+
20
+ # μ›Ή μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μΈν„°νŽ˜μ΄μŠ€
21
+ st.title("Whisper μžλ§‰ 생성기")
22
+ st.write("μ˜μƒ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ—¬ μžλ§‰μ„ μƒμ„±ν•˜μ„Έμš”.")
23
+
24
+ # μ˜μƒ 파일 μ—…λ‘œλ“œ
25
+ uploaded_file = st.file_uploader("여기에 μ˜μƒ νŒŒμΌμ„ λ“œλž˜κ·Έ μ•€ λ“œλ‘­ ν•˜μ„Έμš”", type=["mp4", "mkv", "mov"])
26
+
27
+ if uploaded_file is not None:
28
+ # μ§„ν–‰λ°” μ΄ˆκΈ°ν™”
29
+ progress_bar = st.progress(0)
30
+
31
+ # μ—…λ‘œλ“œλœ μ˜μƒ νŒŒμΌμ—μ„œ μ˜€λ””μ˜€ μΆ”μΆœ
32
+ st.write("μ˜μƒμ„ μ²˜λ¦¬ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
33
+ progress_bar.progress(10)
34
+
35
+ with NamedTemporaryFile(suffix=".mp4") as temp_video_file:
36
+ temp_video_file.write(uploaded_file.read())
37
+ video = mp.VideoFileClip(temp_video_file.name)
38
+ audio_path = temp_video_file.name.replace(".mp4", ".wav")
39
+ video.audio.write_audiofile(audio_path, codec='pcm_s16le')
40
+
41
+ progress_bar.progress(30)
42
+
43
+ # μ˜€λ””μ˜€ 파일 λ‘œλ“œ 및 처리
44
+ st.write("μ˜€λ””μ˜€ νŒŒμΌμ„ μ²˜λ¦¬ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
45
+ audio, sr = librosa.load(audio_path, sr=16000)
46
+
47
+ progress_bar.progress(50)
48
+
49
+ # Whisper λͺ¨λΈλ‘œ λ³€ν™˜
50
+ st.write("λͺ¨λΈμ„ 톡해 μžλ§‰μ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
51
+ inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
52
+ with torch.no_grad():
53
+ predicted_ids = model.generate(inputs["input_features"], max_length=2048)
54
+
55
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
56
+
57
+ progress_bar.progress(80)
58
+
59
+ # μžλ§‰μ„ SRT 파일둜 μ €μž₯
60
+ st.write("SRT νŒŒμΌμ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
61
+ lines = transcription.split(". ")
62
+ subs = []
63
+ step = len(audio) / sr / len(lines)
64
+ start_time = 0.0
65
+
66
+ for i, line in enumerate(lines):
67
+ end_time = start_time + step
68
+ subs.append(srt.Subtitle(index=i+1, start=timedelta(seconds=start_time), end=timedelta(seconds=end_time), content=line))
69
+ start_time = end_time
70
+
71
+ srt_content = srt.compose(subs)
72
+
73
+ srt_file_path = audio_path.replace(".wav", ".srt")
74
+ with open(srt_file_path, "w", encoding="utf-8") as f:
75
+ f.write(srt_content)
76
+
77
+ progress_bar.progress(100)
78
+
79
+ st.success("SRT 파일이 μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
80
+
81
+ # SRT 파일 λ‹€μš΄λ‘œλ“œ λ²„νŠΌ
82
+ with open(srt_file_path, "rb") as srt_file:
83
+ st.download_button(label="SRT 파일 λ‹€μš΄λ‘œλ“œ", data=srt_file, file_name=os.path.basename(srt_file_path), mime="text/srt")