Spaces:
Runtime error
Runtime error
File size: 2,158 Bytes
97433fd b5f889a 97433fd 0fce504 97433fd b5f889a 97433fd 0fce504 00592d4 0fce504 00592d4 0fce504 97433fd b5f889a 97433fd b5f889a 5cffb7f 4c929ff ea0a23f 9ccbaed 97433fd ea0a23f b5f889a ea0a23f 97433fd ea0a23f 97433fd b5f889a 21872db b5f889a 0fce504 97433fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import soundfile as sf
import torch
import gradio as gr
import torchaudio
# load model and processor
processor = Wav2Vec2Processor.from_pretrained("maher13/arabic-iti")
model = Wav2Vec2ForCTC.from_pretrained("maher13/arabic-iti").eval()
# define function to read in sound file
def map_to_array(file):
speech, sr = torchaudio.load(file)
if sr != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sr,
new_freq=16000)
speech= transform(speech)
speech = speech[0]
speech = speech.numpy()
return speech
# tokenize
def inference(audio_file, audio_file2):
if audio_file:
input_values = processor(map_to_array(audio_file.name), return_tensors="pt", padding="longest").input_values # Batch size 1
logits = model(input_values).logits
with torch.no_grad():
predicted_ids = torch.argmax(logits, dim=-1)
predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id
transcription1 = processor.tokenizer.batch_decode(predicted_ids)[0]
else:
transcription1 = "N/A"
if audio_file2:
input_values = processor(map_to_array(audio_file2.name), return_tensors="pt", padding="longest").input_values # Batch size 1
logits = model(input_values).logits
with torch.no_grad():
predicted_ids = torch.argmax(logits, dim=-1)
predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id
transcription2 = processor.tokenizer.batch_decode(predicted_ids)[0]
else :
transcription2 = "N/A"
return transcription1, transcription2
gradio_ui = gr.Interface(
fn=inference,
title="Speech to Text Graduation project \n sponsored by TensorGraph",
inputs=
[
gr.inputs.Audio(source = 'microphone', type="file", optional = True),
gr.inputs.Audio(source = 'upload', type="file", optional = True)
],
outputs=[
gr.outputs.Textbox(label="Auto-Transcript"),
gr.outputs.Textbox(label="Auto-Transcript")
],
)
gradio_ui.launch(share=True) |