Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
-
import torch
|
4 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
|
|
5 |
import librosa
|
6 |
import srt
|
7 |
from datetime import timedelta
|
@@ -15,9 +15,8 @@ def load_model():
|
|
15 |
|
16 |
model, processor = load_model()
|
17 |
|
18 |
-
# ์น ์ ํ๋ฆฌ์ผ์ด์
์ธํฐํ์ด์ค
|
19 |
st.title("Whisper ์๋ง ์์ฑ๊ธฐ")
|
20 |
-
st.write("WAV ํ์ผ์ ์
๋ก๋ํ์ฌ ์๋ง์ ์์ฑํ์ธ์.")
|
21 |
|
22 |
# ์ฌ๋ฌ WAV ํ์ผ ์
๋ก๋
|
23 |
uploaded_files = st.file_uploader("์ฌ๊ธฐ์ WAV ํ์ผ๋ค์ ๋๋๊ทธ ์ค ๋๋กญ ํ์ธ์", type=["wav"], accept_multiple_files=True)
|
@@ -48,31 +47,37 @@ if uploaded_files:
|
|
48 |
|
49 |
# Whisper ๋ชจ๋ธ๋ก ๋ณํ
|
50 |
st.write("๋ชจ๋ธ์ ํตํด ์๋ง์ ์์ฑํ๋ ์ค์
๋๋ค...")
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
progress_bar.progress(100)
|
|
|
76 |
|
77 |
# ๋ชจ๋ ์๋ง์ ํ๋์ SRT ํ์ผ๋ก ์ ์ฅ
|
78 |
st.write("์ต์ข
SRT ํ์ผ์ ์์ฑํ๋ ์ค์
๋๋ค...")
|
@@ -86,4 +91,11 @@ if uploaded_files:
|
|
86 |
|
87 |
# ์ต์ข
SRT ํ์ผ ๋ค์ด๋ก๋ ๋ฒํผ
|
88 |
with open(final_srt_file_path, "rb") as srt_file:
|
89 |
-
st.download_button(label="SRT ํ์ผ ๋ค์ด๋ก๋", data=srt_file, file_name=final_srt_file_path, mime="text/srt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
|
|
3 |
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
4 |
+
import torch
|
5 |
import librosa
|
6 |
import srt
|
7 |
from datetime import timedelta
|
|
|
15 |
|
16 |
model, processor = load_model()
|
17 |
|
18 |
+
# Streamlit ์น ์ ํ๋ฆฌ์ผ์ด์
์ธํฐํ์ด์ค
|
19 |
st.title("Whisper ์๋ง ์์ฑ๊ธฐ")
|
|
|
20 |
|
21 |
# ์ฌ๋ฌ WAV ํ์ผ ์
๋ก๋
|
22 |
uploaded_files = st.file_uploader("์ฌ๊ธฐ์ WAV ํ์ผ๋ค์ ๋๋๊ทธ ์ค ๋๋กญ ํ์ธ์", type=["wav"], accept_multiple_files=True)
|
|
|
47 |
|
48 |
# Whisper ๋ชจ๋ธ๋ก ๋ณํ
|
49 |
st.write("๋ชจ๋ธ์ ํตํด ์๋ง์ ์์ฑํ๋ ์ค์
๋๋ค...")
|
50 |
+
segments = split_audio(audio, sr, segment_duration=5)
|
51 |
+
|
52 |
+
for i, segment in enumerate(segments):
|
53 |
+
inputs = processor(segment, return_tensors="pt", sampling_rate=16000)
|
54 |
+
with torch.no_grad():
|
55 |
+
outputs = model.generate(inputs["input_features"], max_length=2048, return_dict_in_generate=True, output_scores=True)
|
56 |
+
|
57 |
+
# ํ
์คํธ ๋์ฝ๋ฉ
|
58 |
+
transcription = processor.batch_decode(outputs.sequences, skip_special_tokens=True)[0].strip()
|
59 |
+
|
60 |
+
# ์ ๋ขฐ๋ ์ ์ ๊ณ์ฐ (์ถ๊ฐ์ ์ธ ์ ๋ขฐ๋ ํํฐ๋ง ์ ์ฉ)
|
61 |
+
avg_logit_score = torch.mean(outputs.scores[-1]).item()
|
62 |
+
|
63 |
+
# ์ ๋ขฐ๋ ์ ์๊ฐ ๋ฎ๊ฑฐ๋ ํ
์คํธ๊ฐ ๋น์ด์๋ ๊ฒฝ์ฐ ๋ฌด์
|
64 |
+
if transcription and avg_logit_score > -5.0:
|
65 |
+
segment_duration = librosa.get_duration(y=segment, sr=sr)
|
66 |
+
end_time = last_end_time + timedelta(seconds=segment_duration)
|
67 |
+
|
68 |
+
combined_subs.append(
|
69 |
+
srt.Subtitle(
|
70 |
+
index=subtitle_index,
|
71 |
+
start=last_end_time,
|
72 |
+
end=end_time,
|
73 |
+
content=transcription
|
74 |
+
)
|
75 |
+
)
|
76 |
+
last_end_time = end_time
|
77 |
+
subtitle_index += 1
|
78 |
|
79 |
progress_bar.progress(100)
|
80 |
+
st.success(f"{uploaded_file.name}์ ์๋ง์ด ์ฑ๊ณต์ ์ผ๋ก ์์ฑ๋์์ต๋๋ค!")
|
81 |
|
82 |
# ๋ชจ๋ ์๋ง์ ํ๋์ SRT ํ์ผ๋ก ์ ์ฅ
|
83 |
st.write("์ต์ข
SRT ํ์ผ์ ์์ฑํ๋ ์ค์
๋๋ค...")
|
|
|
91 |
|
92 |
# ์ต์ข
SRT ํ์ผ ๋ค์ด๋ก๋ ๋ฒํผ
|
93 |
with open(final_srt_file_path, "rb") as srt_file:
|
94 |
+
st.download_button(label="SRT ํ์ผ ๋ค์ด๋ก๋", data=srt_file, file_name=final_srt_file_path, mime="text/srt")
|
95 |
+
|
96 |
+
def split_audio(audio, sr, segment_duration=5):
|
97 |
+
segments = []
|
98 |
+
for i in range(0, len(audio), int(segment_duration * sr)):
|
99 |
+
segment = audio[i:i + int(segment_duration * sr)]
|
100 |
+
segments.append(segment)
|
101 |
+
return segments
|