shubhamjaiswar commited on
Commit
0d2e8f8
·
verified ·
1 Parent(s): 11f9787

Upload 3 files

Browse files
Files changed (3) hide show
  1. Hinglish_Profanity_List.csv +211 -0
  2. app.py +102 -0
  3. requirements.txt +6 -0
Hinglish_Profanity_List.csv ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ profanity_hn,profanity_en,ind
2
+ badir,idiot,1
3
+ badirchand,idiot,1
4
+ bakland,idiot,1
5
+ bhadva,pimp,2
6
+ bhootnika,son of a witch,3
7
+ chinaal,whore,3
8
+ chup,shut up,1
9
+ chutia,fucker ,5
10
+ ghasti,hooker,4
11
+ chutiya,fucker,5
12
+ haraami,bastard,5
13
+ haraam,bastard,5
14
+ hijra,transsexual ,3
15
+ hinjda,transsexual ,3
16
+ jaanvar,animal,1
17
+ kutta,dog,2
18
+ kutiya,bitch,3
19
+ khota,donkey,1
20
+ auladheen,sonless,2
21
+ jaat,breed,1
22
+ najayaz,illegitimate,3
23
+ gandpaidaish,badborn,2
24
+ saala,sister�s husband,2
25
+ kutti,bitch,2
26
+ soover,swine,3
27
+ tatti,shit,3
28
+ potty,shit,3
29
+ bahenchod,sister fucker,9
30
+ bahanchod,sister fucker,9
31
+ bahencho,sister fucker,9
32
+ bancho,sister fucker,7
33
+ bhenchod,profanity,7
34
+ bahenke,sister�s ,6
35
+ laude,dick,7
36
+ takke,balls,7
37
+ betichod,daughter fucker,9
38
+ bhaichod,brother fucker,8
39
+ bhains,buffalo,1
40
+ jhalla,faggot,6
41
+ jhant,pubic,7
42
+ nabaal,hairless,4
43
+ pissu,bug,3
44
+ kutte,dog,2
45
+ maadherchod,mother fucker,10
46
+ madarchod,motherfucker,10
47
+ padma,fat bitch,6
48
+ raand,whore,9
49
+ jamai,son-in-law,5
50
+ randwa,male prostitute,8
51
+ randi,hooker,8
52
+ bachachod,son fucker,8
53
+ bachichod,daughter fucker,8
54
+ soower,swine,3
55
+ bachchechod,children fucker,8
56
+ ullu,idiot,1
57
+ pathe,idiot,1
58
+ banda,semi-dick,7
59
+ booblay,boobs,7
60
+ booby,boobs,7
61
+ buble,boobs,7
62
+ babla,boobs,7
63
+ bhonsriwala,fucker,8
64
+ bhonsdiwala,fucker,8
65
+ ched,pussy,8
66
+ chut,pussy,8
67
+ chod,fuck,7
68
+ chodu,fucker,7
69
+ chodra,fucker,7
70
+ choochi,boobs,8
71
+ chuchi,boobs,8
72
+ gaandu,asshole,9
73
+ gandu,asshole,9
74
+ gaand,ass,9
75
+ lavda, dick ,8
76
+ lawda,dick,8
77
+ lauda,dick,8
78
+ lund�, dick,8
79
+ balchod,hair fucker,7
80
+ lavander,dick head,8
81
+ muth,masturbate ,7
82
+ maacho,mother fucker,10
83
+ mammey,boobs,8
84
+ tatte,boobs,8
85
+ toto,penis,8
86
+ toota,broken,7
87
+ backar,gossip,2
88
+ bhandwe,pimp,7
89
+ bhosadchod,ass fucker,9
90
+ bhosad,pussy,8
91
+ bumchod,ass fucker,9
92
+ bum,ass,8
93
+ bur,pussy,9
94
+ chatani,ketchup,1
95
+ cunt,pussy,8
96
+ cuntmama,pussy,8
97
+ chipkali,lizzard,1
98
+ pasine,sweat,1
99
+ jhaat,cunt,8
100
+ chodela,fucked up,6
101
+ bhagatchod,saint fucker,7
102
+ chhola,clit,7
103
+ chudai,fucking,7
104
+ chudaikhana,whore house,9
105
+ chunni,clit,8
106
+ choot,pussy,8
107
+ bhoot,ghost,1
108
+ dhakkan,idiot,1
109
+ bhajiye,snack,1
110
+ fateychu,torn pussy,9
111
+ gandnatije,Bad result,2
112
+ lundtopi,condom,4
113
+ gaandu,ass,8
114
+ gaandfat,ass,8
115
+ gaandmasti,ass,8
116
+ makhanchudai,fucking,8
117
+ gaandmarau,ass fuck,9
118
+ gandu,faggot,8
119
+ chaatu,licker,6
120
+ beej,semen,6
121
+ choosu,sucker,7
122
+ fakeerchod,saint fucker,8
123
+ lundoos,dick,8
124
+ shorba,semen,7
125
+ binbheja,brainless,3
126
+ bhadwe,pimp,6
127
+ parichod,angel fucker,9
128
+ nirodh,condom.,5
129
+ pucchi,pussy,8
130
+ baajer,fucker,8
131
+ choud,fuck,8
132
+ bhosda,pussy,9
133
+ sadi,stinking,5
134
+ choos,suck,5
135
+ maka,mother�s,7
136
+ chinaal,prostitute,7
137
+ gadde,boobs,7
138
+ joon,bug,3
139
+ chullugand,handful dirt,4
140
+ doob,drown,1
141
+ khatmal,bug,1
142
+ gandkate,ass,6
143
+ bambu,bamboo,4
144
+ lassan,garlic,1
145
+ danda,stick,2
146
+ keera,bug,2
147
+ keeda,bug,2
148
+ hazaarchu,thousand pussy,7
149
+ paidaishikeeda,born bug,5
150
+ kali,nigger,5
151
+ safaid,american,2
152
+ poot,son,2
153
+ behendi,sister,5
154
+ chus,sucker,6
155
+ machudi,mother fucker,10
156
+ chodoonga,fuck,8
157
+ baapchu,father pussy,9
158
+ laltern,lantern,5
159
+ suhaagchudai,wedding fuck,8
160
+ raatchuda,night fuck,7
161
+ kaalu,migga,6
162
+ neech,low caste,7
163
+ chikna,gay,6
164
+ meetha,gay,6
165
+ beechka,gay,6
166
+ chooche,boobs,8
167
+ patichod,husband,8
168
+ rundi,prostitute,7
169
+ makkhi,fly,1
170
+ biwichod,wife fucker,9
171
+ chodhunga,fuck,8
172
+ haathi,elephant,1
173
+ kute,dog,2
174
+ jhanten,pubic hair,8
175
+ kaat,cut,3
176
+ gandi,filthy,3
177
+ gadha,donkey,1
178
+ bimaar,ill,2
179
+ badboodar,smelly,2
180
+ dum,tail,2
181
+ raandsaala,sister�s brother pimp,7
182
+ phudi,pussy,7
183
+ chute,pussy,7
184
+ kussi,ass,7
185
+ khandanchod,family fucker,9
186
+ ghussa,fuck,6
187
+ maarey,dead,4
188
+ chipkili,lizard,1
189
+ unday,eggs,1
190
+ budh,cunt,7
191
+ chaarpai,cot,1
192
+ chodun,fuck,5
193
+ chatri,condom,3
194
+ chode,fuck,6
195
+ chodho,fuck,6
196
+ mullekatue,Derogatory abuse to muslims,6
197
+ mullikatui,Derogatory Abuse to female muslim,6
198
+ mullekebaal,Derogatory Abuse to muslim,6
199
+ momedankatue,Derogatory Abuse to muslim,6
200
+ katua,dick cut,8
201
+ chutiyapa,fuck all,8
202
+ bc,sister fucker,10
203
+ mc,mother fucker,10
204
+ chudwaya,fuck,7
205
+ kutton,dog,2
206
+ jungli,wild,2
207
+ vahiyaat,disgusting,4
208
+ jihadi,terrorist,4
209
+ atankvadi,terrorist,4
210
+ atankwadi,terrorist,4
211
+ aatanki,terorist,4
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client, handle_file
2
+ import pandas as pd
3
+ import gradio as gr
4
+ from vosk import Model, KaldiRecognizer
5
+ import json
6
+ import wave
7
+
8
+ clientEngText = Client("dj-dawgs-ipd/IPD-Text-English-Finetune")
9
+ clientHingText = Client("dj-dawgs-ipd/IPD-Text-Hinglish")
10
+ clientAud = Client("dj-dawgs-ipd/IPD_Audio_HuBERT")
11
+
12
+ profanity_df = pd.read_csv('Hinglish_Profanity_List.csv', encoding='utf-8')
13
+ profanity_hn = profanity_df['profanity_hn']
14
+ vosk_model = Model(lang="en-us")
15
+
16
+
17
+ # import whisper
18
+ # def stt_whisper(file_path):
19
+ # model = whisper.load_model("base")
20
+ # try:
21
+ # result = model.transcribe(file_path)
22
+ # return result["text"]
23
+ # except Exception as e:
24
+ # print(e)
25
+ # return ""
26
+
27
+
28
+ def stt_vosk(file_path):
29
+ try:
30
+ wf = wave.open(file_path, "rb")
31
+ rec = KaldiRecognizer(vosk_model, wf.getframerate())
32
+ rec.SetWords(True)
33
+ rec.SetPartialWords(True)
34
+ while True:
35
+ data = wf.readframes(4000)
36
+ if len(data) == 0:
37
+ break
38
+ rec.AcceptWaveform(data)
39
+ data = json.loads(rec.FinalResult())
40
+ return data["text"]
41
+ except:
42
+ return ""
43
+
44
+
45
+ def extract_text(audio_path):
46
+ return stt_vosk(audio_path).lower()
47
+
48
+
49
+ def predict_hate_speech(audio_path):
50
+
51
+ audResult = clientAud.predict(
52
+ audio_path=handle_file(audio_path),
53
+ api_name="/predict"
54
+ )
55
+ audResult = json.loads(audResult.replace("'", '"'))
56
+
57
+ stt_text = extract_text(audio_path)
58
+
59
+ engResult = clientEngText.predict(
60
+ text=stt_text[:200],
61
+ api_name="/predict"
62
+ )
63
+
64
+ hingResult = clientHingText.predict(
65
+ text=stt_text[:200],
66
+ api_name="/predict"
67
+ )
68
+
69
+ profanityFound = any(word in stt_text.split() for word in profanity_hn)
70
+ threshold = 0.6
71
+ isHate = (engResult[0] != "NEITHER" and engResult[1] > threshold) or (
72
+ hingResult[0] != "NAG" and hingResult[1] > threshold) or (
73
+ audResult['Classification'] == 'Hate Speech\n' and audResult['Confidence'] > threshold)
74
+
75
+ engConf = engResult[1] if engResult[0] != "NEITHER" else (1 - engResult[1])
76
+ hingConf = hingResult[1] if hingResult[0] != "NEITHER" else (1 - hingResult[1])
77
+ audConf = audResult['Confidence'] if audResult['Classification'] == 'Hate Speech\n' else (1 - audResult['Confidence'])
78
+
79
+ confidence = (engConf + hingConf + audConf) / 3
80
+ # print(profanityFound, engResult, hingResult, audResult)
81
+ if profanityFound:
82
+ return ["hate", f"Result: Profanity Found", f"Text: {stt_text}"]
83
+ elif isHate:
84
+ return ["hate", f"Confidence: {confidence}", f"Text: {stt_text}"]
85
+
86
+ return ["not_hate", "No hate found, yay!"]
87
+
88
+
89
+ iface = gr.Interface(
90
+ fn=predict_hate_speech,
91
+ inputs=gr.Audio(type="filepath", label="Upload Audio"),
92
+ outputs=gr.Textbox(label="Hate Speech Analysis"),
93
+ title="Hate Speech Audio Pipeline",
94
+ description="Upload an audio file to detect potential hate speech content.",
95
+ examples=[
96
+ ["hate_video_3_3_snippet2.wav"]
97
+ ],
98
+ allow_flagging="manual"
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ gradio_client
3
+ pandas
4
+ vosk
5
+ json
6
+ wave