Spaces:
Sleeping
Sleeping
shubhamjaiswar
commited on
Upload 3 files
Browse files- Hinglish_Profanity_List.csv +211 -0
- app.py +102 -0
- requirements.txt +6 -0
Hinglish_Profanity_List.csv
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
profanity_hn,profanity_en,ind
|
2 |
+
badir,idiot,1
|
3 |
+
badirchand,idiot,1
|
4 |
+
bakland,idiot,1
|
5 |
+
bhadva,pimp,2
|
6 |
+
bhootnika,son of a witch,3
|
7 |
+
chinaal,whore,3
|
8 |
+
chup,shut up,1
|
9 |
+
chutia,fucker ,5
|
10 |
+
ghasti,hooker,4
|
11 |
+
chutiya,fucker,5
|
12 |
+
haraami,bastard,5
|
13 |
+
haraam,bastard,5
|
14 |
+
hijra,transsexual ,3
|
15 |
+
hinjda,transsexual ,3
|
16 |
+
jaanvar,animal,1
|
17 |
+
kutta,dog,2
|
18 |
+
kutiya,bitch,3
|
19 |
+
khota,donkey,1
|
20 |
+
auladheen,sonless,2
|
21 |
+
jaat,breed,1
|
22 |
+
najayaz,illegitimate,3
|
23 |
+
gandpaidaish,badborn,2
|
24 |
+
saala,sister�s husband,2
|
25 |
+
kutti,bitch,2
|
26 |
+
soover,swine,3
|
27 |
+
tatti,shit,3
|
28 |
+
potty,shit,3
|
29 |
+
bahenchod,sister fucker,9
|
30 |
+
bahanchod,sister fucker,9
|
31 |
+
bahencho,sister fucker,9
|
32 |
+
bancho,sister fucker,7
|
33 |
+
bhenchod,profanity,7
|
34 |
+
bahenke,sister�s ,6
|
35 |
+
laude,dick,7
|
36 |
+
takke,balls,7
|
37 |
+
betichod,daughter fucker,9
|
38 |
+
bhaichod,brother fucker,8
|
39 |
+
bhains,buffalo,1
|
40 |
+
jhalla,faggot,6
|
41 |
+
jhant,pubic,7
|
42 |
+
nabaal,hairless,4
|
43 |
+
pissu,bug,3
|
44 |
+
kutte,dog,2
|
45 |
+
maadherchod,mother fucker,10
|
46 |
+
madarchod,motherfucker,10
|
47 |
+
padma,fat bitch,6
|
48 |
+
raand,whore,9
|
49 |
+
jamai,son-in-law,5
|
50 |
+
randwa,male prostitute,8
|
51 |
+
randi,hooker,8
|
52 |
+
bachachod,son fucker,8
|
53 |
+
bachichod,daughter fucker,8
|
54 |
+
soower,swine,3
|
55 |
+
bachchechod,children fucker,8
|
56 |
+
ullu,idiot,1
|
57 |
+
pathe,idiot,1
|
58 |
+
banda,semi-dick,7
|
59 |
+
booblay,boobs,7
|
60 |
+
booby,boobs,7
|
61 |
+
buble,boobs,7
|
62 |
+
babla,boobs,7
|
63 |
+
bhonsriwala,fucker,8
|
64 |
+
bhonsdiwala,fucker,8
|
65 |
+
ched,pussy,8
|
66 |
+
chut,pussy,8
|
67 |
+
chod,fuck,7
|
68 |
+
chodu,fucker,7
|
69 |
+
chodra,fucker,7
|
70 |
+
choochi,boobs,8
|
71 |
+
chuchi,boobs,8
|
72 |
+
gaandu,asshole,9
|
73 |
+
gandu,asshole,9
|
74 |
+
gaand,ass,9
|
75 |
+
lavda, dick ,8
|
76 |
+
lawda,dick,8
|
77 |
+
lauda,dick,8
|
78 |
+
lund�, dick,8
|
79 |
+
balchod,hair fucker,7
|
80 |
+
lavander,dick head,8
|
81 |
+
muth,masturbate ,7
|
82 |
+
maacho,mother fucker,10
|
83 |
+
mammey,boobs,8
|
84 |
+
tatte,boobs,8
|
85 |
+
toto,penis,8
|
86 |
+
toota,broken,7
|
87 |
+
backar,gossip,2
|
88 |
+
bhandwe,pimp,7
|
89 |
+
bhosadchod,ass fucker,9
|
90 |
+
bhosad,pussy,8
|
91 |
+
bumchod,ass fucker,9
|
92 |
+
bum,ass,8
|
93 |
+
bur,pussy,9
|
94 |
+
chatani,ketchup,1
|
95 |
+
cunt,pussy,8
|
96 |
+
cuntmama,pussy,8
|
97 |
+
chipkali,lizzard,1
|
98 |
+
pasine,sweat,1
|
99 |
+
jhaat,cunt,8
|
100 |
+
chodela,fucked up,6
|
101 |
+
bhagatchod,saint fucker,7
|
102 |
+
chhola,clit,7
|
103 |
+
chudai,fucking,7
|
104 |
+
chudaikhana,whore house,9
|
105 |
+
chunni,clit,8
|
106 |
+
choot,pussy,8
|
107 |
+
bhoot,ghost,1
|
108 |
+
dhakkan,idiot,1
|
109 |
+
bhajiye,snack,1
|
110 |
+
fateychu,torn pussy,9
|
111 |
+
gandnatije,Bad result,2
|
112 |
+
lundtopi,condom,4
|
113 |
+
gaandu,ass,8
|
114 |
+
gaandfat,ass,8
|
115 |
+
gaandmasti,ass,8
|
116 |
+
makhanchudai,fucking,8
|
117 |
+
gaandmarau,ass fuck,9
|
118 |
+
gandu,faggot,8
|
119 |
+
chaatu,licker,6
|
120 |
+
beej,semen,6
|
121 |
+
choosu,sucker,7
|
122 |
+
fakeerchod,saint fucker,8
|
123 |
+
lundoos,dick,8
|
124 |
+
shorba,semen,7
|
125 |
+
binbheja,brainless,3
|
126 |
+
bhadwe,pimp,6
|
127 |
+
parichod,angel fucker,9
|
128 |
+
nirodh,condom.,5
|
129 |
+
pucchi,pussy,8
|
130 |
+
baajer,fucker,8
|
131 |
+
choud,fuck,8
|
132 |
+
bhosda,pussy,9
|
133 |
+
sadi,stinking,5
|
134 |
+
choos,suck,5
|
135 |
+
maka,mother�s,7
|
136 |
+
chinaal,prostitute,7
|
137 |
+
gadde,boobs,7
|
138 |
+
joon,bug,3
|
139 |
+
chullugand,handful dirt,4
|
140 |
+
doob,drown,1
|
141 |
+
khatmal,bug,1
|
142 |
+
gandkate,ass,6
|
143 |
+
bambu,bamboo,4
|
144 |
+
lassan,garlic,1
|
145 |
+
danda,stick,2
|
146 |
+
keera,bug,2
|
147 |
+
keeda,bug,2
|
148 |
+
hazaarchu,thousand pussy,7
|
149 |
+
paidaishikeeda,born bug,5
|
150 |
+
kali,nigger,5
|
151 |
+
safaid,american,2
|
152 |
+
poot,son,2
|
153 |
+
behendi,sister,5
|
154 |
+
chus,sucker,6
|
155 |
+
machudi,mother fucker,10
|
156 |
+
chodoonga,fuck,8
|
157 |
+
baapchu,father pussy,9
|
158 |
+
laltern,lantern,5
|
159 |
+
suhaagchudai,wedding fuck,8
|
160 |
+
raatchuda,night fuck,7
|
161 |
+
kaalu,migga,6
|
162 |
+
neech,low caste,7
|
163 |
+
chikna,gay,6
|
164 |
+
meetha,gay,6
|
165 |
+
beechka,gay,6
|
166 |
+
chooche,boobs,8
|
167 |
+
patichod,husband,8
|
168 |
+
rundi,prostitute,7
|
169 |
+
makkhi,fly,1
|
170 |
+
biwichod,wife fucker,9
|
171 |
+
chodhunga,fuck,8
|
172 |
+
haathi,elephant,1
|
173 |
+
kute,dog,2
|
174 |
+
jhanten,pubic hair,8
|
175 |
+
kaat,cut,3
|
176 |
+
gandi,filthy,3
|
177 |
+
gadha,donkey,1
|
178 |
+
bimaar,ill,2
|
179 |
+
badboodar,smelly,2
|
180 |
+
dum,tail,2
|
181 |
+
raandsaala,sister�s brother pimp,7
|
182 |
+
phudi,pussy,7
|
183 |
+
chute,pussy,7
|
184 |
+
kussi,ass,7
|
185 |
+
khandanchod,family fucker,9
|
186 |
+
ghussa,fuck,6
|
187 |
+
maarey,dead,4
|
188 |
+
chipkili,lizard,1
|
189 |
+
unday,eggs,1
|
190 |
+
budh,cunt,7
|
191 |
+
chaarpai,cot,1
|
192 |
+
chodun,fuck,5
|
193 |
+
chatri,condom,3
|
194 |
+
chode,fuck,6
|
195 |
+
chodho,fuck,6
|
196 |
+
mullekatue,Derogatory abuse to muslims,6
|
197 |
+
mullikatui,Derogatory Abuse to female muslim,6
|
198 |
+
mullekebaal,Derogatory Abuse to muslim,6
|
199 |
+
momedankatue,Derogatory Abuse to muslim,6
|
200 |
+
katua,dick cut,8
|
201 |
+
chutiyapa,fuck all,8
|
202 |
+
bc,sister fucker,10
|
203 |
+
mc,mother fucker,10
|
204 |
+
chudwaya,fuck,7
|
205 |
+
kutton,dog,2
|
206 |
+
jungli,wild,2
|
207 |
+
vahiyaat,disgusting,4
|
208 |
+
jihadi,terrorist,4
|
209 |
+
atankvadi,terrorist,4
|
210 |
+
atankwadi,terrorist,4
|
211 |
+
aatanki,terorist,4
|
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio_client import Client, handle_file
|
2 |
+
import pandas as pd
|
3 |
+
import gradio as gr
|
4 |
+
from vosk import Model, KaldiRecognizer
|
5 |
+
import json
|
6 |
+
import wave
|
7 |
+
|
8 |
+
clientEngText = Client("dj-dawgs-ipd/IPD-Text-English-Finetune")
|
9 |
+
clientHingText = Client("dj-dawgs-ipd/IPD-Text-Hinglish")
|
10 |
+
clientAud = Client("dj-dawgs-ipd/IPD_Audio_HuBERT")
|
11 |
+
|
12 |
+
profanity_df = pd.read_csv('Hinglish_Profanity_List.csv', encoding='utf-8')
|
13 |
+
profanity_hn = profanity_df['profanity_hn']
|
14 |
+
vosk_model = Model(lang="en-us")
|
15 |
+
|
16 |
+
|
17 |
+
# import whisper
|
18 |
+
# def stt_whisper(file_path):
|
19 |
+
# model = whisper.load_model("base")
|
20 |
+
# try:
|
21 |
+
# result = model.transcribe(file_path)
|
22 |
+
# return result["text"]
|
23 |
+
# except Exception as e:
|
24 |
+
# print(e)
|
25 |
+
# return ""
|
26 |
+
|
27 |
+
|
28 |
+
def stt_vosk(file_path):
|
29 |
+
try:
|
30 |
+
wf = wave.open(file_path, "rb")
|
31 |
+
rec = KaldiRecognizer(vosk_model, wf.getframerate())
|
32 |
+
rec.SetWords(True)
|
33 |
+
rec.SetPartialWords(True)
|
34 |
+
while True:
|
35 |
+
data = wf.readframes(4000)
|
36 |
+
if len(data) == 0:
|
37 |
+
break
|
38 |
+
rec.AcceptWaveform(data)
|
39 |
+
data = json.loads(rec.FinalResult())
|
40 |
+
return data["text"]
|
41 |
+
except:
|
42 |
+
return ""
|
43 |
+
|
44 |
+
|
45 |
+
def extract_text(audio_path):
|
46 |
+
return stt_vosk(audio_path).lower()
|
47 |
+
|
48 |
+
|
49 |
+
def predict_hate_speech(audio_path):
|
50 |
+
|
51 |
+
audResult = clientAud.predict(
|
52 |
+
audio_path=handle_file(audio_path),
|
53 |
+
api_name="/predict"
|
54 |
+
)
|
55 |
+
audResult = json.loads(audResult.replace("'", '"'))
|
56 |
+
|
57 |
+
stt_text = extract_text(audio_path)
|
58 |
+
|
59 |
+
engResult = clientEngText.predict(
|
60 |
+
text=stt_text[:200],
|
61 |
+
api_name="/predict"
|
62 |
+
)
|
63 |
+
|
64 |
+
hingResult = clientHingText.predict(
|
65 |
+
text=stt_text[:200],
|
66 |
+
api_name="/predict"
|
67 |
+
)
|
68 |
+
|
69 |
+
profanityFound = any(word in stt_text.split() for word in profanity_hn)
|
70 |
+
threshold = 0.6
|
71 |
+
isHate = (engResult[0] != "NEITHER" and engResult[1] > threshold) or (
|
72 |
+
hingResult[0] != "NAG" and hingResult[1] > threshold) or (
|
73 |
+
audResult['Classification'] == 'Hate Speech\n' and audResult['Confidence'] > threshold)
|
74 |
+
|
75 |
+
engConf = engResult[1] if engResult[0] != "NEITHER" else (1 - engResult[1])
|
76 |
+
hingConf = hingResult[1] if hingResult[0] != "NEITHER" else (1 - hingResult[1])
|
77 |
+
audConf = audResult['Confidence'] if audResult['Classification'] == 'Hate Speech\n' else (1 - audResult['Confidence'])
|
78 |
+
|
79 |
+
confidence = (engConf + hingConf + audConf) / 3
|
80 |
+
# print(profanityFound, engResult, hingResult, audResult)
|
81 |
+
if profanityFound:
|
82 |
+
return ["hate", f"Result: Profanity Found", f"Text: {stt_text}"]
|
83 |
+
elif isHate:
|
84 |
+
return ["hate", f"Confidence: {confidence}", f"Text: {stt_text}"]
|
85 |
+
|
86 |
+
return ["not_hate", "No hate found, yay!"]
|
87 |
+
|
88 |
+
|
89 |
+
iface = gr.Interface(
|
90 |
+
fn=predict_hate_speech,
|
91 |
+
inputs=gr.Audio(type="filepath", label="Upload Audio"),
|
92 |
+
outputs=gr.Textbox(label="Hate Speech Analysis"),
|
93 |
+
title="Hate Speech Audio Pipeline",
|
94 |
+
description="Upload an audio file to detect potential hate speech content.",
|
95 |
+
examples=[
|
96 |
+
["hate_video_3_3_snippet2.wav"]
|
97 |
+
],
|
98 |
+
allow_flagging="manual"
|
99 |
+
)
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
gradio_client
|
3 |
+
pandas
|
4 |
+
vosk
|
5 |
+
json
|
6 |
+
wave
|