Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,8 @@ import streamlit as st
|
|
3 |
import torch
|
4 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
5 |
import librosa
|
6 |
-
import moviepy.editor as mp
|
7 |
import srt
|
8 |
from datetime import timedelta
|
9 |
-
from tempfile import NamedTemporaryFile
|
10 |
|
11 |
# λͺ¨λΈ λ° νλ‘μΈμ λ‘λ
|
12 |
@st.cache_resource
|
@@ -19,65 +17,73 @@ model, processor = load_model()
|
|
19 |
|
20 |
# μΉ μ ν리μΌμ΄μ
μΈν°νμ΄μ€
|
21 |
st.title("Whisper μλ§ μμ±κΈ°")
|
22 |
-
st.write("
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
st.write("μμμ μ²λ¦¬νλ μ€μ
λλ€...")
|
33 |
-
progress_bar.progress(10)
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
audio, sr = librosa.load(audio_path, sr=16000)
|
46 |
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
st.write("λͺ¨λΈμ ν΅ν΄ μλ§μ μμ±νλ μ€μ
λλ€...")
|
51 |
-
inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
|
52 |
-
with torch.no_grad():
|
53 |
-
predicted_ids = model.generate(inputs["input_features"], max_length=2048)
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
st.write("SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
61 |
-
lines = transcription.split(". ")
|
62 |
-
subs = []
|
63 |
-
step = len(audio) / sr / len(lines)
|
64 |
-
start_time = 0.0
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
70 |
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
with open(srt_file_path, "w", encoding="utf-8") as f:
|
75 |
-
f.write(srt_content)
|
76 |
|
77 |
-
|
78 |
|
79 |
-
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import torch
|
4 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
5 |
import librosa
|
|
|
6 |
import srt
|
7 |
from datetime import timedelta
|
|
|
8 |
|
9 |
# λͺ¨λΈ λ° νλ‘μΈμ λ‘λ
|
10 |
@st.cache_resource
|
|
|
17 |
|
18 |
# μΉ μ ν리μΌμ΄μ
μΈν°νμ΄μ€
|
19 |
st.title("Whisper μλ§ μμ±κΈ°")
|
20 |
+
st.write("WAV νμΌμ μ
λ‘λνμ¬ μλ§μ μμ±νμΈμ.")
|
21 |
|
22 |
+
# μ¬λ¬ WAV νμΌ μ
λ‘λ
|
23 |
+
uploaded_files = st.file_uploader("μ¬κΈ°μ WAV νμΌλ€μ λλκ·Έ μ€ λλ‘ νμΈμ", type=["wav"], accept_multiple_files=True)
|
24 |
|
25 |
+
# νμΌ λͺ©λ‘μ 보μ¬μ€
|
26 |
+
if uploaded_files:
|
27 |
+
st.write("μ
λ‘λλ νμΌ λͺ©λ‘:")
|
28 |
+
for uploaded_file in uploaded_files:
|
29 |
+
st.write(uploaded_file.name)
|
|
|
|
|
30 |
|
31 |
+
# μ€ν λ²νΌ
|
32 |
+
if st.button("μ€ν"):
|
33 |
+
combined_subs = []
|
34 |
+
last_end_time = timedelta(0)
|
35 |
+
subtitle_index = 1
|
36 |
|
37 |
+
for uploaded_file in uploaded_files:
|
38 |
+
st.write(f"μ²λ¦¬ μ€: {uploaded_file.name}")
|
39 |
|
40 |
+
# μ§νλ° μ΄κΈ°ν
|
41 |
+
progress_bar = st.progress(0)
|
|
|
42 |
|
43 |
+
# WAV νμΌ λ‘λ λ° μ²λ¦¬
|
44 |
+
st.write("μ€λμ€ νμΌμ μ²λ¦¬νλ μ€μ
λλ€...")
|
45 |
+
audio, sr = librosa.load(uploaded_file, sr=16000)
|
46 |
|
47 |
+
progress_bar.progress(50)
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
# Whisper λͺ¨λΈλ‘ λ³ν
|
50 |
+
st.write("λͺ¨λΈμ ν΅ν΄ μλ§μ μμ±νλ μ€μ
λλ€...")
|
51 |
+
inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
|
52 |
+
with torch.no_grad():
|
53 |
+
predicted_ids = model.generate(inputs["input_features"], max_length=2048)
|
54 |
|
55 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
|
56 |
|
57 |
+
progress_bar.progress(80)
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
# SRT μλ§ μμ±
|
60 |
+
st.write("SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
61 |
+
lines = transcription.split(". ")
|
62 |
+
step = len(audio) / sr / len(lines)
|
63 |
+
start_time = last_end_time
|
64 |
|
65 |
+
for line in lines:
|
66 |
+
end_time = start_time + timedelta(seconds=step)
|
67 |
+
combined_subs.append(
|
68 |
+
srt.Subtitle(index=subtitle_index, start=start_time, end=end_time, content=line)
|
69 |
+
)
|
70 |
+
start_time = end_time
|
71 |
+
subtitle_index += 1
|
72 |
|
73 |
+
last_end_time = start_time # λ€μ νμΌμ μμ μκ°μ μ‘°μ νκΈ° μν΄ λ§μ§λ§ λ μκ°μ κΈ°λ‘
|
|
|
|
|
74 |
|
75 |
+
progress_bar.progress(100)
|
76 |
|
77 |
+
# λͺ¨λ μλ§μ νλμ SRT νμΌλ‘ μ μ₯
|
78 |
+
st.write("μ΅μ’
SRT νμΌμ μμ±νλ μ€μ
λλ€...")
|
79 |
+
srt_content = srt.compose(combined_subs)
|
80 |
|
81 |
+
final_srt_file_path = "combined_output.srt"
|
82 |
+
with open(final_srt_file_path, "w", encoding="utf-8") as f:
|
83 |
+
f.write(srt_content)
|
84 |
+
|
85 |
+
st.success("μ΅μ’
SRT νμΌμ΄ μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€!")
|
86 |
+
|
87 |
+
# μ΅μ’
SRT νμΌ λ€μ΄λ‘λ λ²νΌ
|
88 |
+
with open(final_srt_file_path, "rb") as srt_file:
|
89 |
+
st.download_button(label="SRT νμΌ λ€μ΄λ‘λ", data=srt_file, file_name=final_srt_file_path, mime="text/srt")
|