Spaces:
Running
Running
from gradio_client import Client, handle_file | |
import pandas as pd | |
import gradio as gr | |
from vosk import Model, KaldiRecognizer | |
import json | |
import wave | |
clientEngText = Client("dj-dawgs-ipd/IPD-Text-English-Finetune") | |
clientHingText = Client("dj-dawgs-ipd/IPD-Text-Hinglish") | |
clientAud = Client("dj-dawgs-ipd/IPD_Audio_HuBERT") | |
profanity_df = pd.read_csv('Hinglish_Profanity_List.csv', encoding='utf-8') | |
profanity_hn = profanity_df['profanity_hn'] | |
vosk_model = Model(lang="en-us") | |
# import whisper | |
# def stt_whisper(file_path): | |
# model = whisper.load_model("base") | |
# try: | |
# result = model.transcribe(file_path) | |
# return result["text"] | |
# except Exception as e: | |
# print(e) | |
# return "" | |
def stt_vosk(file_path): | |
try: | |
wf = wave.open(file_path, "rb") | |
rec = KaldiRecognizer(vosk_model, wf.getframerate()) | |
rec.SetWords(True) | |
rec.SetPartialWords(True) | |
while True: | |
data = wf.readframes(4000) | |
if len(data) == 0: | |
break | |
rec.AcceptWaveform(data) | |
data = json.loads(rec.FinalResult()) | |
return data["text"] | |
except: | |
return "" | |
def extract_text(audio_path): | |
return stt_vosk(audio_path).lower() | |
def predict_hate_speech(audio_path): | |
audResult = clientAud.predict( | |
audio_path=handle_file(audio_path), | |
api_name="/predict" | |
) | |
audResult = json.loads(audResult.replace("'", '"')) | |
stt_text = extract_text(audio_path) | |
engResult = clientEngText.predict( | |
text=stt_text[:200], | |
api_name="/predict" | |
) | |
hingResult = clientHingText.predict( | |
text=stt_text[:200], | |
api_name="/predict" | |
) | |
profanityFound = [word for word in stt_text.split() if word in profanity_hn] | |
threshold = 0.6 | |
isHate = (engResult[0] != "NEITHER" and engResult[1] > threshold) or ( | |
hingResult[0] != "NAG" and hingResult[1] > threshold) or ( | |
audResult['Classification'] == 'Hate Speech\n' and audResult['Confidence'] > threshold) | |
engConf = engResult[1] if engResult[0] != "NEITHER" else (1 - engResult[1]) | |
hingConf = hingResult[1] if hingResult[0] != "NEITHER" else (1 - hingResult[1]) | |
audConf = audResult['Confidence'] if audResult['Classification'] == 'Hate Speech\n' else (1 - audResult['Confidence']) | |
confidence = (engConf + hingConf + audConf) / 3 | |
if len(profanityFound) > 0: | |
return { | |
'prediction' : 'hate', | |
'language' : 'Hindi', | |
'label' : 'Profanity found', | |
'confidence' : None, | |
'hate_text' : ",".join(profanityFound) | |
} | |
if isHate: | |
return { | |
'prediction' : 'hate', | |
'language' : 'English' if engConf > hingConf else 'Hinglish', | |
'label' : None, | |
'confidence' : confidence, | |
'hate_text' : stt_text | |
} | |
return { | |
'prediction' : 'not_hate', | |
'language' : None, | |
'label' : None, | |
'confidence' : None, | |
'hate_text' : None | |
} | |
iface = gr.Interface( | |
fn=predict_hate_speech, | |
inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
outputs=gr.JSON(), | |
title="Hate Speech Audio Pipeline", | |
description="Upload an audio file to detect potential hate speech content.", | |
examples=[ | |
["hate_1.wav"], | |
["hate_2.wav"] | |
], | |
allow_flagging="manual" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |