Spaces:
Sleeping
Sleeping
File size: 1,223 Bytes
fdf092c cd7ce5f fdf092c cd7ce5f fdf092c cd7ce5f fdf092c cd7ce5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import streamlit as st
import numpy as np
import torch
from transformers import pipeline
import librosa
# Load the pipelines
asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese")
translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation")
tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese")
# Streamlit UI
st.title("Cantonese to Chinese Translator")
st.write("Upload your Cantonese audio file (WAV format) below.")
# File upload
uploaded_file = st.file_uploader("Choose a WAV file", type="wav")
if uploaded_file is not None:
# Load the audio file
audio, sr = librosa.load(uploaded_file, sr=16000)
# Recognize Cantonese speech
audio_input = torch.tensor(audio)
result = asr_pipe(audio_input)
cantonese_text = result['text']
st.write(f"Cantonese Text: {cantonese_text}")
# Translate Cantonese to Chinese
chinese_text = translation_pipe(cantonese_text)[0]['translation_text']
st.write(f"Chinese Text: {chinese_text}")
# Convert Chinese text to speech
tts_output = tts_pipe(chinese_text)
# Play back the Chinese output
st.audio(tts_output['audio'], format='audio/wav') |