Spaces:
Sleeping
Sleeping
File size: 1,664 Bytes
a18c706 52d87a4 a18c706 52d87a4 a18c706 52d87a4 a18c706 52d87a4 a18c706 52d87a4 a18c706 52d87a4 5b6f753 52d87a4 5b6f753 52d87a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import librosa
import numpy as np
def preprocess_audio(file_path):
y, sr = librosa.load(file_path, sr=16000)
mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
return mel_spectrogram
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer, MarianMTModel, MarianTokenizer
# Load pre-trained models
speech_to_text_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
speech_to_text_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
def translate_audio(file_path):
# Preprocess the audio
mel_spectrogram = preprocess_audio(file_path)
# Convert to text
audio_input = speech_to_text_tokenizer(file_path, return_tensors="pt").input_values
logits = speech_to_text_model(audio_input).logits
predicted_ids = logits.argmax(dim=-1)
transcription = speech_to_text_tokenizer.batch_decode(predicted_ids)[0]
# Translate text
translation_input = translation_tokenizer(transcription, return_tensors="pt")
translated_output = translation_model.generate(**translation_input)
translation = translation_tokenizer.batch_decode(translated_output, skip_special_tokens=True)[0]
return translation
import datetime
def should_translate():
now = datetime.datetime.now()
return now.hour >= 18
def handle_translation(file_path):
if should_translate():
return translate_audio(file_path)
else:
return "Translation is only available after 6 PM IST."
|