Spaces:
Build error
Build error
File size: 3,644 Bytes
fb79caf ae8fbd2 fb79caf f427fe9 ae8fbd2 fb79caf b815c4a ae8fbd2 f427fe9 fb79caf 6d2ca12 7f5deab 6d2ca12 7f5deab 6d2ca12 353faef fb79caf 6d2ca12 fb79caf 6d2ca12 fb79caf 6d2ca12 e564472 353faef fb79caf 6d2ca12 fb79caf 6d2ca12 e564472 6d2ca12 e564472 353faef fb79caf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import torch
import pickle
import streamlit as st
from io import BytesIO
from config import SAMPLING_RATE
from model_utils import load_models
from audio_processing import detect_language, process_long_audio, load_and_resample_audio
# Clear GPU cache
torch.cuda.empty_cache()
# Load models at startup
load_models()
# Title of the app
st.title("Audio Player with Live Transcription and Translation")
# Sidebar for file uploader and submit button
st.sidebar.header("Upload Audio Files")
uploaded_files = st.sidebar.file_uploader("Choose audio files", type=["mp3", "wav"], accept_multiple_files=True)
submit_button = st.sidebar.button("Submit")
# Session state to hold data
if 'audio_files' not in st.session_state:
st.session_state.audio_files = []
st.session_state.transcriptions = {}
st.session_state.translations = {}
st.session_state.detected_languages = []
st.session_state.waveforms = []
# Process uploaded files
if submit_button and uploaded_files is not None:
st.session_state.audio_files = uploaded_files
st.session_state.detected_languages = []
st.session_state.waveforms = []
for uploaded_file in uploaded_files:
waveform = load_and_resample_audio(BytesIO(uploaded_file.read()))
st.session_state.waveforms.append(waveform)
detected_language = detect_language(waveform)
st.session_state.detected_languages.append(detected_language)
# Display uploaded files and options
if 'audio_files' in st.session_state and st.session_state.audio_files:
for i, uploaded_file in enumerate(st.session_state.audio_files):
st.write(f"**File name**: {uploaded_file.name}")
st.audio(uploaded_file, format=uploaded_file.type)
st.write(f"**Detected Language**: {st.session_state.detected_languages[i]}")
col1, col2 = st.columns(2)
with col1:
if st.button(f"Transcribe {uploaded_file.name}"):
with st.spinner("Transcribing..."):
transcription = process_long_audio(st.session_state.waveforms[i], SAMPLING_RATE)
st.session_state.transcriptions[i] = transcription
if st.session_state.transcriptions.get(i):
st.write("**Transcription**:")
st.text_area("", st.session_state.transcriptions[i], height=200, key=f"transcription_{i}")
st.markdown(f'<div style="text-align: right;"><a href="data:text/plain;charset=UTF-8,{st.session_state.transcriptions[i]}" download="transcription_{uploaded_file.name}.txt">Download Transcription</a></div>', unsafe_allow_html=True)
with col2:
if st.button(f"Translate {uploaded_file.name}"):
with st.spinner("Translating..."):
with open('languages.pkl', 'rb') as f:
lang_dict = pickle.load(f)
detected_language_name = lang_dict[st.session_state.detected_languages[i]]
translation = process_long_audio(st.session_state.waveforms[i], SAMPLING_RATE, task="translate",
language=detected_language_name)
st.session_state.translations[i] = translation
if st.session_state.translations.get(i):
st.write("**Translation**:")
st.text_area("", st.session_state.translations[i], height=200, key=f"translation_{i}")
st.markdown(f'<div style="text-align: right;"><a href="data:text/plain;charset=UTF-8,{st.session_state.translations[i]}" download="translation_{uploaded_file.name}.txt">Download Translation</a></div>', unsafe_allow_html=True) |