lcjln commited on
Commit
8f5fb37
Β·
verified Β·
1 Parent(s): 3085b46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -48
app.py CHANGED
@@ -3,10 +3,8 @@ import streamlit as st
3
  import torch
4
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
5
  import librosa
6
- import moviepy.editor as mp
7
  import srt
8
  from datetime import timedelta
9
- from tempfile import NamedTemporaryFile
10
 
11
  # λͺ¨λΈ 및 ν”„λ‘œμ„Έμ„œ λ‘œλ“œ
12
  @st.cache_resource
@@ -19,65 +17,73 @@ model, processor = load_model()
19
 
20
  # μ›Ή μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μΈν„°νŽ˜μ΄μŠ€
21
  st.title("Whisper μžλ§‰ 생성기")
22
- st.write("μ˜μƒ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ—¬ μžλ§‰μ„ μƒμ„±ν•˜μ„Έμš”.")
23
 
24
- # μ˜μƒ 파일 μ—…λ‘œλ“œ
25
- uploaded_file = st.file_uploader("여기에 μ˜μƒ νŒŒμΌμ„ λ“œλž˜κ·Έ μ•€ λ“œλ‘­ ν•˜μ„Έμš”", type=["mp4", "mkv", "mov"])
26
 
27
- if uploaded_file is not None:
28
- # μ§„ν–‰λ°” μ΄ˆκΈ°ν™”
29
- progress_bar = st.progress(0)
30
-
31
- # μ—…λ‘œλ“œλœ μ˜μƒ νŒŒμΌμ—μ„œ μ˜€λ””μ˜€ μΆ”μΆœ
32
- st.write("μ˜μƒμ„ μ²˜λ¦¬ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
33
- progress_bar.progress(10)
34
 
35
- with NamedTemporaryFile(suffix=".mp4") as temp_video_file:
36
- temp_video_file.write(uploaded_file.read())
37
- video = mp.VideoFileClip(temp_video_file.name)
38
- audio_path = temp_video_file.name.replace(".mp4", ".wav")
39
- video.audio.write_audiofile(audio_path, codec='pcm_s16le')
40
 
41
- progress_bar.progress(30)
 
42
 
43
- # μ˜€λ””μ˜€ 파일 λ‘œλ“œ 및 처리
44
- st.write("μ˜€λ””μ˜€ νŒŒμΌμ„ μ²˜λ¦¬ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
45
- audio, sr = librosa.load(audio_path, sr=16000)
46
 
47
- progress_bar.progress(50)
 
 
48
 
49
- # Whisper λͺ¨λΈλ‘œ λ³€ν™˜
50
- st.write("λͺ¨λΈμ„ 톡해 μžλ§‰μ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
51
- inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
52
- with torch.no_grad():
53
- predicted_ids = model.generate(inputs["input_features"], max_length=2048)
54
 
55
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
 
 
 
 
56
 
57
- progress_bar.progress(80)
58
 
59
- # μžλ§‰μ„ SRT 파일둜 μ €μž₯
60
- st.write("SRT νŒŒμΌμ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
61
- lines = transcription.split(". ")
62
- subs = []
63
- step = len(audio) / sr / len(lines)
64
- start_time = 0.0
65
 
66
- for i, line in enumerate(lines):
67
- end_time = start_time + step
68
- subs.append(srt.Subtitle(index=i+1, start=timedelta(seconds=start_time), end=timedelta(seconds=end_time), content=line))
69
- start_time = end_time
 
70
 
71
- srt_content = srt.compose(subs)
 
 
 
 
 
 
72
 
73
- srt_file_path = audio_path.replace(".wav", ".srt")
74
- with open(srt_file_path, "w", encoding="utf-8") as f:
75
- f.write(srt_content)
76
 
77
- progress_bar.progress(100)
78
 
79
- st.success("SRT 파일이 μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
 
 
80
 
81
- # SRT 파일 λ‹€μš΄λ‘œλ“œ λ²„νŠΌ
82
- with open(srt_file_path, "rb") as srt_file:
83
- st.download_button(label="SRT 파일 λ‹€μš΄λ‘œλ“œ", data=srt_file, file_name=os.path.basename(srt_file_path), mime="text/srt")
 
 
 
 
 
 
 
3
  import torch
4
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
5
  import librosa
 
6
  import srt
7
  from datetime import timedelta
 
8
 
9
  # λͺ¨λΈ 및 ν”„λ‘œμ„Έμ„œ λ‘œλ“œ
10
  @st.cache_resource
 
17
 
18
  # μ›Ή μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μΈν„°νŽ˜μ΄μŠ€
19
  st.title("Whisper μžλ§‰ 생성기")
20
+ st.write("WAV νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ—¬ μžλ§‰μ„ μƒμ„±ν•˜μ„Έμš”.")
21
 
22
+ # μ—¬λŸ¬ WAV 파일 μ—…λ‘œλ“œ
23
+ uploaded_files = st.file_uploader("여기에 WAV νŒŒμΌλ“€μ„ λ“œλž˜κ·Έ μ•€ λ“œλ‘­ ν•˜μ„Έμš”", type=["wav"], accept_multiple_files=True)
24
 
25
+ # 파일 λͺ©λ‘μ„ λ³΄μ—¬μ€Œ
26
+ if uploaded_files:
27
+ st.write("μ—…λ‘œλ“œλœ 파일 λͺ©λ‘:")
28
+ for uploaded_file in uploaded_files:
29
+ st.write(uploaded_file.name)
 
 
30
 
31
+ # μ‹€ν–‰ λ²„νŠΌ
32
+ if st.button("μ‹€ν–‰"):
33
+ combined_subs = []
34
+ last_end_time = timedelta(0)
35
+ subtitle_index = 1
36
 
37
+ for uploaded_file in uploaded_files:
38
+ st.write(f"처리 쀑: {uploaded_file.name}")
39
 
40
+ # μ§„ν–‰λ°” μ΄ˆκΈ°ν™”
41
+ progress_bar = st.progress(0)
 
42
 
43
+ # WAV 파일 λ‘œλ“œ 및 처리
44
+ st.write("μ˜€λ””μ˜€ νŒŒμΌμ„ μ²˜λ¦¬ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
45
+ audio, sr = librosa.load(uploaded_file, sr=16000)
46
 
47
+ progress_bar.progress(50)
 
 
 
 
48
 
49
+ # Whisper λͺ¨λΈλ‘œ λ³€ν™˜
50
+ st.write("λͺ¨λΈμ„ 톡해 μžλ§‰μ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
51
+ inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
52
+ with torch.no_grad():
53
+ predicted_ids = model.generate(inputs["input_features"], max_length=2048)
54
 
55
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
56
 
57
+ progress_bar.progress(80)
 
 
 
 
 
58
 
59
+ # SRT μžλ§‰ 생성
60
+ st.write("SRT νŒŒμΌμ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
61
+ lines = transcription.split(". ")
62
+ step = len(audio) / sr / len(lines)
63
+ start_time = last_end_time
64
 
65
+ for line in lines:
66
+ end_time = start_time + timedelta(seconds=step)
67
+ combined_subs.append(
68
+ srt.Subtitle(index=subtitle_index, start=start_time, end=end_time, content=line)
69
+ )
70
+ start_time = end_time
71
+ subtitle_index += 1
72
 
73
+ last_end_time = start_time # λ‹€μŒ 파일의 μ‹œμž‘ μ‹œκ°„μ„ μ‘°μ •ν•˜κΈ° μœ„ν•΄ λ§ˆμ§€λ§‰ 끝 μ‹œκ°„μ„ 기둝
 
 
74
 
75
+ progress_bar.progress(100)
76
 
77
+ # λͺ¨λ“  μžλ§‰μ„ ν•˜λ‚˜μ˜ SRT 파일둜 μ €μž₯
78
+ st.write("μ΅œμ’… SRT νŒŒμΌμ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
79
+ srt_content = srt.compose(combined_subs)
80
 
81
+ final_srt_file_path = "combined_output.srt"
82
+ with open(final_srt_file_path, "w", encoding="utf-8") as f:
83
+ f.write(srt_content)
84
+
85
+ st.success("μ΅œμ’… SRT 파일이 μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
86
+
87
+ # μ΅œμ’… SRT 파일 λ‹€μš΄λ‘œλ“œ λ²„νŠΌ
88
+ with open(final_srt_file_path, "rb") as srt_file:
89
+ st.download_button(label="SRT 파일 λ‹€μš΄λ‘œλ“œ", data=srt_file, file_name=final_srt_file_path, mime="text/srt")