Spaces:
Sleeping
Sleeping
File size: 2,123 Bytes
31a2efa 146f24d 7bcf8d4 31a2efa c14e7f9 31a2efa 56f600c 31a2efa 146f24d 56f600c 31a2efa 56f600c 1696f82 31a2efa 56f600c 31a2efa 8b3f196 11fc882 6cf7481 11fc882 8b3f196 d310dd9 8b3f196 31a2efa 56f600c 31a2efa 25df624 31a2efa 400f40d 31a2efa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os
import numpy as np
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch
############### HF ###########################
HF_TOKEN = os.getenv("HF_TOKEN")
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
############## Inference ##############################
transcriber = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
demo = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"]),
"text",
)
################### Gradio Web APP ################################
title = "Urdu Automatic Speech Recognition"
description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
</p>
"""
article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with π€ Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"
examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]
def main():
iface = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"]),
"text",
title=title,
allow_flagging="manual",
flagging_callback=hf_writer,
description=description,
article=article,
examples=examples,
theme='JohnSmith9982/small_and_pretty'
)
iface.launch(enable_queue=True)
# enable_queue=True,auth=("admin", "pass1234")
if __name__ == "__main__":
main()
|