import os
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch
############### HF ###########################
#HF_TOKEN = os.getenv("HF_TOKEN")
HF_TOKEN = "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"
os.environ["HF_TOKEN"] = HF_TOKEN
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
############## DagsHub ################################
Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
# This is not working because Huggingface has completely changed the git server.
# from dagshub.streaming import install_hooks
# install_hooks()
############## Inference ##############################
def asr(audio):
asr = pipeline("automatic-speech-recognition", model=Model)
prediction = asr(audio, chunk_length_s=30)
return unicodedata.normalize("NFC",prediction["text"])
################### Gradio Web APP ################################
#
title = "Automatic Speech Recognition System for Urdu Language"
description = """