Spaces:

kingabzpro
/

Urdu-ASR-SOTA

Sleeping

File size: 2,123 Bytes

31a2efa
146f24d
7bcf8d4
31a2efa
 
 
c14e7f9
31a2efa
 
 
 
 
 
 
56f600c
31a2efa
 
146f24d
56f600c
 
 
 
 
31a2efa
56f600c
1696f82
31a2efa
56f600c
 
 
 
 
31a2efa
 
 
 
 
 
8b3f196
 
 
 
11fc882
6cf7481
 
11fc882
8b3f196
 
d310dd9
8b3f196
31a2efa
 
 
 
 
 
56f600c
 
 
31a2efa
 
 
25df624
31a2efa
 
400f40d
31a2efa

import os
import numpy as np
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch

############### HF ###########################

HF_TOKEN = os.getenv("HF_TOKEN")

hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")

############## Inference ##############################


transcriber = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]


demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["microphone"]),
    "text",
)


################### Gradio Web APP ################################

title = "Urdu Automatic Speech Recognition"

description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.

<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
</p>
"""

article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with 🤗 Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"

examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]



def main():
    iface = gr.Interface(
        transcribe,
        gr.Audio(sources=["microphone"]),
        "text",
        title=title,
        allow_flagging="manual",
        flagging_callback=hf_writer,
        description=description,
        article=article,
        examples=examples,
        theme='JohnSmith9982/small_and_pretty'
    )

    iface.launch(enable_queue=True)


# enable_queue=True,auth=("admin", "pass1234")

if __name__ == "__main__":
    main()