Spaces:

lcjln
/

AIME

Sleeping

App Files Files Community

lcjln commited on Sep 3, 2024

Commit

d20cd0c

verified ·

1 Parent(s): 8f5fb37

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -27

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import streamlit as st
-import torch
 from transformers import WhisperForConditionalGeneration, WhisperProcessor
 import librosa
 import srt
 from datetime import timedelta
@@ -15,9 +15,8 @@ def load_model():
 model, processor = load_model()
-# 웹 애플리케이션 인터페이스
 st.title("Whisper 자막 생성기")
-st.write("WAV 파일을 업로드하여 자막을 생성하세요.")
 # 여러 WAV 파일 업로드
 uploaded_files = st.file_uploader("여기에 WAV 파일들을 드래그 앤 드롭 하세요", type=["wav"], accept_multiple_files=True)
@@ -48,31 +47,37 @@ if uploaded_files:
             # Whisper 모델로 변환
             st.write("모델을 통해 자막을 생성하는 중입니다...")
-            inputs = processor(audio, return_tensors="pt", sampling_rate=16000)
-            with torch.no_grad():
-                predicted_ids = model.generate(inputs["input_features"], max_length=2048)
-            transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0].strip()
-            progress_bar.progress(80)
-            # SRT 자막 생성
-            st.write("SRT 파일을 생성하는 중입니다...")
-            lines = transcription.split(". ")
-            step = len(audio) / sr / len(lines)
-            start_time = last_end_time
-            for line in lines:
-                end_time = start_time + timedelta(seconds=step)
-                combined_subs.append(
-                    srt.Subtitle(index=subtitle_index, start=start_time, end=end_time, content=line)
-                )
-                start_time = end_time
-                subtitle_index += 1
-            last_end_time = start_time  # 다음 파일의 시작 시간을 조정하기 위해 마지막 끝 시간을 기록
             progress_bar.progress(100)
         # 모든 자막을 하나의 SRT 파일로 저장
         st.write("최종 SRT 파일을 생성하는 중입니다...")
@@ -86,4 +91,11 @@ if uploaded_files:
         # 최종 SRT 파일 다운로드 버튼
         with open(final_srt_file_path, "rb") as srt_file:
-            st.download_button(label="SRT 파일 다운로드", data=srt_file, file_name=final_srt_file_path, mime="text/srt")

 import os
 import streamlit as st
 from transformers import WhisperForConditionalGeneration, WhisperProcessor
+import torch
 import librosa
 import srt
 from datetime import timedelta
 model, processor = load_model()
+# Streamlit 웹 애플리케이션 인터페이스
 st.title("Whisper 자막 생성기")
 # 여러 WAV 파일 업로드
 uploaded_files = st.file_uploader("여기에 WAV 파일들을 드래그 앤 드롭 하세요", type=["wav"], accept_multiple_files=True)
             # Whisper 모델로 변환
             st.write("모델을 통해 자막을 생성하는 중입니다...")
+            segments = split_audio(audio, sr, segment_duration=5)
+            for i, segment in enumerate(segments):
+                inputs = processor(segment, return_tensors="pt", sampling_rate=16000)
+                with torch.no_grad():
+                    outputs = model.generate(inputs["input_features"], max_length=2048, return_dict_in_generate=True, output_scores=True)
+                # 텍스트 디코딩
+                transcription = processor.batch_decode(outputs.sequences, skip_special_tokens=True)[0].strip()
+                # 신뢰도 점수 계산 (추가적인 신뢰도 필터링 적용)
+                avg_logit_score = torch.mean(outputs.scores[-1]).item()
+                # 신뢰도 점수가 낮거나 텍스트가 비어있는 경우 무시
+                if transcription and avg_logit_score > -5.0:
+                    segment_duration = librosa.get_duration(y=segment, sr=sr)
+                    end_time = last_end_time + timedelta(seconds=segment_duration)
+                    combined_subs.append(
+                        srt.Subtitle(
+                            index=subtitle_index,
+                            start=last_end_time,
+                            end=end_time,
+                            content=transcription
+                        )
+                    )
+                    last_end_time = end_time
+                    subtitle_index += 1
             progress_bar.progress(100)
+            st.success(f"{uploaded_file.name}의 자막이 성공적으로 생성되었습니다!")
         # 모든 자막을 하나의 SRT 파일로 저장
         st.write("최종 SRT 파일을 생성하는 중입니다...")
         # 최종 SRT 파일 다운로드 버튼
         with open(final_srt_file_path, "rb") as srt_file:
+            st.download_button(label="SRT 파일 다운로드", data=srt_file, file_name=final_srt_file_path, mime="text/srt")
+def split_audio(audio, sr, segment_duration=5):
+    segments = []
+    for i in range(0, len(audio), int(segment_duration * sr)):
+        segment = audio[i:i + int(segment_duration * sr)]
+        segments.append(segment)
+    return segments