DavidLoccus commited on
Commit
7ca009e
·
1 Parent(s): b706e43

Change output layout from score to figure.

Browse files
Files changed (1) hide show
  1. app.py +70 -32
app.py CHANGED
@@ -10,9 +10,13 @@ import random
10
  from huggingface_hub import HfApi
11
  import pandas as pd
12
  from moviepy.editor import *
 
 
13
 
14
  FS=16000
15
- MAX_SIZE = FS * 30
 
 
16
 
17
  HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
18
  MODEL_REPO=os.getenv("MODEL_REPO")
@@ -107,48 +111,87 @@ def process_youtube_address(youtube_address):
107
  return audiowav
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
 
112
  def process_micro(micro):
 
113
  x=preprocess_audio(micro)
114
- output = MODEL(x)
 
115
  print(output)
116
  result = postprocess_output(output)
117
 
118
- return result
 
 
119
 
120
  def process_file(file):
 
121
  x,fs = librosa.load(file, sr=FS)
122
  x=preprocess_audio((fs,x))
123
  print("Running model")
124
- output = MODEL(x)
125
  print(output)
126
  result = postprocess_output(output)
127
 
128
- return result
 
129
 
130
- def process_files(files):
131
 
 
 
132
  resout=[]
133
- res2out=[]
134
  fnames=[]
135
  for f in files:
136
  file=f.name
137
  x,fs = librosa.load(file, sr=FS)
138
  x=preprocess_audio((fs,x))
139
  print("Running model")
140
- output = MODEL(x)
141
  print(output)
142
  result = postprocess_output(output)
143
  resout.append(result)
144
- #res2out.append(res2)
145
 
146
 
147
  fnames.append(os.path.basename(file))
148
 
149
 
150
  resout = pd.DataFrame({"File":fnames, "Probability of Real": resout})
151
- #return resout, res2out
152
  return resout
153
 
154
  def process_video(file):
@@ -182,14 +225,16 @@ def process_youtube(youtube_address):
182
 
183
  with gr.Blocks(title="Audio Fake Detector") as demo:
184
  with gr.Tab("Individual Processing"):
185
- gr.Markdown("""# Welcome to Loccus' Authenticity Verification demo!
186
- This is a showcase of our solution. It provides a probability of a voice being real or AI-generated. It is designed for the following context:
187
- * To detect voice clones
188
- * Focus on English and Spanish languages
189
- * For short audio samples (3 to 10 seconds)
190
- * For audio from digital channels (at 16 kHz or more)
191
-
192
- Please test it accordingly. Variations of the above (e.g. a 1 minute audio file of an off-the-shelf TTS voice in Japanese) can compromise the accuracy and performance of the solution. We keep improving the solution adding new features every week.""")
 
 
193
 
194
 
195
 
@@ -201,21 +246,21 @@ with gr.Blocks(title="Audio Fake Detector") as demo:
201
  #y = gr.Textbox(label="Enter YouTube address here")
202
  #v = gr.Video(label="Enter a video", include_audio=True, scale=0.5)
203
 
204
- with gr.Column():
205
  with gr.Row(equal_height=True):
206
 
207
- text = gr.Textbox(label="Probability of Real Voice")
208
 
209
  #file= gr.Audio(source="upload", type="filepath", optional=True)
210
  #button_clear = gr.ClearButton([m,f,y,v,text])
211
- button_clear = gr.ClearButton([m,f,text])
212
- m.stop_recording(process_micro, inputs=[m], outputs=text)
213
- f.upload(process_file,inputs=[f], outputs=text)
214
  #y.submit(process_youtube, inputs=[y], outputs=text)
215
  #v.upload(process_video, inputs=[v], outputs=[text])
216
 
217
  with gr.Tab("Batch Processing"):
218
- gr.Markdown("# Welcome to Loccus' Authenticity Verification demo!")
219
 
220
  with gr.Row():
221
  with gr.Column():
@@ -229,19 +274,12 @@ with gr.Blocks(title="Audio Fake Detector") as demo:
229
  headers=["File", "Probability of Real"],
230
  datatype=["str", "str"],
231
  )
232
-
233
- #text = gr.Textbox(label="Probability of Real Voice")
234
- #text2 = gr.Textbox(label="Amp Mean Score")
235
 
236
  button_clear = gr.ClearButton([f,textbatch])
237
 
238
  f.upload(process_files,inputs=[f], outputs=[textbatch])
239
 
240
-
241
-
242
- #btn = gr.Button("Run")
243
- #btn.click(fn=update, inputs=inp, outputs=out)
244
-
245
  demo.launch(auth=[(username,password),(username0,password0),(username1,password1),(username2,password2),(username3,password3),(username4,password4),(username5,password5),(username6,password6),(username7,password7),(username8,password8),(username9,password9),(username10,password10), \
246
  (username11,password11),(username12,password12),(username13,password13),(username14,password14)])
247
 
 
10
  from huggingface_hub import HfApi
11
  import pandas as pd
12
  from moviepy.editor import *
13
+ import matplotlib.pyplot as plt
14
+
15
 
16
  FS=16000
17
+ MAX_SIZE = FS * 60
18
+ CHUNK_SIZE = 4
19
+ N = CHUNK_SIZE * FS
20
 
21
  HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
22
  MODEL_REPO=os.getenv("MODEL_REPO")
 
111
  return audiowav
112
 
113
 
114
+ def create_chunk_plot(x,ini, end, scores, lvec, scr):
115
+ x=x.squeeze()
116
+ T=x.size(0)
117
+ t = np.array(list(range(T))) / FS
118
+
119
+ result=[np.nan for _ in range(ini)]
120
+
121
+ for s,l in zip(scores.tolist(),lvec.tolist()):
122
+ resi=[100*s for _ in range(int(l))]
123
+ result.extend(resi)
124
+
125
+ reslast=[np.nan for _ in range(T-end)]
126
+ result.extend(reslast)
127
+
128
+ assert len(result)==T, f"Length result: {len(result)} - Length audio {T}"
129
+ assert len(t)==T, f"Length time: {len(result)} - Length audio {T}"
130
+
131
+ x=x-torch.min(x)
132
+ x=x/torch.max(x)*100
133
+
134
+ fig = plt.figure()
135
+ ax = fig.add_subplot(111)
136
+ ax.plot(t, x, alpha=0.3)
137
+ ax.plot(t,result,color = 'tab:red')
138
+ ax.set_ylabel('Probability of Real')
139
+ ax.set_xlabel('Time (s)')
140
+ ax.set_title(f"Prob. of real audio = {scr}")
141
+
142
+ yticks=np.arange(11)*10
143
+ ax.set_yticks(yticks)
144
+
145
+ return fig
146
+
147
+
148
 
149
 
150
  def process_micro(micro):
151
+ print("Micro processing")
152
  x=preprocess_audio(micro)
153
+ print("Running model")
154
+ output, output_arr, lvec, ls, ts = MODEL(x)
155
  print(output)
156
  result = postprocess_output(output)
157
 
158
+ fig = create_chunk_plot(x, ls, ts, output_arr, lvec, result)
159
+
160
+ return fig
161
 
162
  def process_file(file):
163
+ print("File processing")
164
  x,fs = librosa.load(file, sr=FS)
165
  x=preprocess_audio((fs,x))
166
  print("Running model")
167
+ output, output_arr, lvec, ls, ts = MODEL(x)
168
  print(output)
169
  result = postprocess_output(output)
170
 
171
+
172
+ fig = create_chunk_plot(x, ls, ts, output_arr, lvec, result)
173
 
174
+ return fig
175
 
176
+ def process_files(files):
177
+ print("Batch processing")
178
  resout=[]
 
179
  fnames=[]
180
  for f in files:
181
  file=f.name
182
  x,fs = librosa.load(file, sr=FS)
183
  x=preprocess_audio((fs,x))
184
  print("Running model")
185
+ output, _, _, _, _ = MODEL(x)
186
  print(output)
187
  result = postprocess_output(output)
188
  resout.append(result)
 
189
 
190
 
191
  fnames.append(os.path.basename(file))
192
 
193
 
194
  resout = pd.DataFrame({"File":fnames, "Probability of Real": resout})
 
195
  return resout
196
 
197
  def process_video(file):
 
225
 
226
  with gr.Blocks(title="Audio Fake Detector") as demo:
227
  with gr.Tab("Individual Processing"):
228
+ gr.Markdown("""# [Loccus.ai](http://www.loccus.ai) - AI Voice detection demo
229
+ This is a demo of our Authenticity Verification solution, aimed at detecting if a voice is real or not.
230
+ * Input - audio file in any format
231
+ * Output - probability of that voice being real or AI-generated (1.0 - Real / 0.0 AI-generated)
232
+
233
+ There are two testing modes:
234
+ * Individual processing - for single files. You will see a time-based view and scores for each 4-second chunk. Best for single long files.
235
+ * Batch processing - for a batch of files. You will see a single overall score per file. Best to assess multiple short files.
236
+
237
+ Only the first minute of audio is analyzed.""")
238
 
239
 
240
 
 
246
  #y = gr.Textbox(label="Enter YouTube address here")
247
  #v = gr.Video(label="Enter a video", include_audio=True, scale=0.5)
248
 
249
+ with gr.Column(scale=2):
250
  with gr.Row(equal_height=True):
251
 
252
+ img = gr.Plot(show_label=False)
253
 
254
  #file= gr.Audio(source="upload", type="filepath", optional=True)
255
  #button_clear = gr.ClearButton([m,f,y,v,text])
256
+ button_clear = gr.ClearButton([m,f,img])
257
+ m.stop_recording(process_micro, inputs=[m], outputs=img)
258
+ f.upload(process_file,inputs=[f], outputs=img)
259
  #y.submit(process_youtube, inputs=[y], outputs=text)
260
  #v.upload(process_video, inputs=[v], outputs=[text])
261
 
262
  with gr.Tab("Batch Processing"):
263
+ gr.Markdown("# [Loccus.ai](http://www.loccus.ai) - AI Voice detection demo")
264
 
265
  with gr.Row():
266
  with gr.Column():
 
274
  headers=["File", "Probability of Real"],
275
  datatype=["str", "str"],
276
  )
277
+
 
 
278
 
279
  button_clear = gr.ClearButton([f,textbatch])
280
 
281
  f.upload(process_files,inputs=[f], outputs=[textbatch])
282
 
 
 
 
 
 
283
  demo.launch(auth=[(username,password),(username0,password0),(username1,password1),(username2,password2),(username3,password3),(username4,password4),(username5,password5),(username6,password6),(username7,password7),(username8,password8),(username9,password9),(username10,password10), \
284
  (username11,password11),(username12,password12),(username13,password13),(username14,password14)])
285