In [1]:
import gradio as gr
import librosa
import numpy as np
import pywt
import nbimporter
from scipy.signal import butter, lfilter, wiener
from scipy.io.wavfile import write
from transformers import pipeline
from text2int import text_to_int
from isNumber import is_number
from Text2List import text_to_list
from convert2list import convert_to_list
from processDoubles import process_doubles
from replaceWords import replace_words

asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")

# Function to apply a high-pass filter
def high_pass_filter(audio, sr, cutoff=300):
    nyquist = 0.5 * sr
    normal_cutoff = cutoff / nyquist
    b, a = butter(1, normal_cutoff, btype='high', analog=False)
    filtered_audio = lfilter(b, a, audio)
    return filtered_audio

# Function to apply wavelet denoising
def wavelet_denoise(audio, wavelet='db1', level=1):
    coeffs = pywt.wavedec(audio, wavelet, mode='per')
    sigma = np.median(np.abs(coeffs[-level])) / 0.5
    uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
    coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
    return pywt.waverec(coeffs, wavelet, mode='per')

# Function to apply a Wiener filter for noise reduction
def apply_wiener_filter(audio):
    return wiener(audio)

# Function to handle speech recognition
def recognize_speech(audio_file):
    audio, sr = librosa.load(audio_file, sr=16000)
    audio = high_pass_filter(audio, sr)
    audio = apply_wiener_filter(audio)
    denoised_audio = wavelet_denoise(audio)
    result = asr_model(denoised_audio)
    text_value = result['text']
    cleaned_text = text_value.replace("<s>", "")
    print(cleaned_text)
    converted_to_list = convert_to_list(cleaned_text, text_to_list())
    print(converted_to_list)
    processed_doubles = process_doubles(converted_to_list)
    print(processed_doubles)
    replaced_words = replace_words(processed_doubles)
    print(replaced_words)
    converted_text = text_to_int(replaced_words)
    print(converted_text)
    return converted_text

# Gradio Interface
gr.Interface(
    fn=recognize_speech,
    inputs=gr.Audio(sources=["microphone","upload"], type="filepath"),
    outputs="text",
    title="Speech Recognition with Advanced Noise Reduction & Hindi ASR",
    description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio."
).launch()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




--------
ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\WCHL\anaconda3\envs\RunInference2\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 404, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\WCHL\anaconda3\envs\RunInference2\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 84, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\WCHL\anaconda3\envs\RunInference2\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\WCHL\anaconda3\envs\RunInference2\Lib\site-packages\starlette\applications.py", line 123, in __call__
    await self.middleware_stack(scope, receive, send)
  File "C:\Users\WCHL\anaconda3\envs\RunInference2\Lib\site-packages\starlette\middleware\errors.py", li

எண்பது
எண்பது
எண்பது
eighty
80
