Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
7ca009e
1
Parent(s):
b706e43
Change output layout from score to figure.
Browse files
app.py
CHANGED
@@ -10,9 +10,13 @@ import random
|
|
10 |
from huggingface_hub import HfApi
|
11 |
import pandas as pd
|
12 |
from moviepy.editor import *
|
|
|
|
|
13 |
|
14 |
FS=16000
|
15 |
-
MAX_SIZE = FS *
|
|
|
|
|
16 |
|
17 |
HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
|
18 |
MODEL_REPO=os.getenv("MODEL_REPO")
|
@@ -107,48 +111,87 @@ def process_youtube_address(youtube_address):
|
|
107 |
return audiowav
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
|
112 |
def process_micro(micro):
|
|
|
113 |
x=preprocess_audio(micro)
|
114 |
-
|
|
|
115 |
print(output)
|
116 |
result = postprocess_output(output)
|
117 |
|
118 |
-
|
|
|
|
|
119 |
|
120 |
def process_file(file):
|
|
|
121 |
x,fs = librosa.load(file, sr=FS)
|
122 |
x=preprocess_audio((fs,x))
|
123 |
print("Running model")
|
124 |
-
output = MODEL(x)
|
125 |
print(output)
|
126 |
result = postprocess_output(output)
|
127 |
|
128 |
-
|
|
|
129 |
|
130 |
-
|
131 |
|
|
|
|
|
132 |
resout=[]
|
133 |
-
res2out=[]
|
134 |
fnames=[]
|
135 |
for f in files:
|
136 |
file=f.name
|
137 |
x,fs = librosa.load(file, sr=FS)
|
138 |
x=preprocess_audio((fs,x))
|
139 |
print("Running model")
|
140 |
-
output = MODEL(x)
|
141 |
print(output)
|
142 |
result = postprocess_output(output)
|
143 |
resout.append(result)
|
144 |
-
#res2out.append(res2)
|
145 |
|
146 |
|
147 |
fnames.append(os.path.basename(file))
|
148 |
|
149 |
|
150 |
resout = pd.DataFrame({"File":fnames, "Probability of Real": resout})
|
151 |
-
#return resout, res2out
|
152 |
return resout
|
153 |
|
154 |
def process_video(file):
|
@@ -182,14 +225,16 @@ def process_youtube(youtube_address):
|
|
182 |
|
183 |
with gr.Blocks(title="Audio Fake Detector") as demo:
|
184 |
with gr.Tab("Individual Processing"):
|
185 |
-
gr.Markdown("""#
|
186 |
-
This is a
|
187 |
-
*
|
188 |
-
*
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
193 |
|
194 |
|
195 |
|
@@ -201,21 +246,21 @@ with gr.Blocks(title="Audio Fake Detector") as demo:
|
|
201 |
#y = gr.Textbox(label="Enter YouTube address here")
|
202 |
#v = gr.Video(label="Enter a video", include_audio=True, scale=0.5)
|
203 |
|
204 |
-
with gr.Column():
|
205 |
with gr.Row(equal_height=True):
|
206 |
|
207 |
-
|
208 |
|
209 |
#file= gr.Audio(source="upload", type="filepath", optional=True)
|
210 |
#button_clear = gr.ClearButton([m,f,y,v,text])
|
211 |
-
button_clear = gr.ClearButton([m,f,
|
212 |
-
m.stop_recording(process_micro, inputs=[m], outputs=
|
213 |
-
f.upload(process_file,inputs=[f], outputs=
|
214 |
#y.submit(process_youtube, inputs=[y], outputs=text)
|
215 |
#v.upload(process_video, inputs=[v], outputs=[text])
|
216 |
|
217 |
with gr.Tab("Batch Processing"):
|
218 |
-
gr.Markdown("#
|
219 |
|
220 |
with gr.Row():
|
221 |
with gr.Column():
|
@@ -229,19 +274,12 @@ with gr.Blocks(title="Audio Fake Detector") as demo:
|
|
229 |
headers=["File", "Probability of Real"],
|
230 |
datatype=["str", "str"],
|
231 |
)
|
232 |
-
|
233 |
-
#text = gr.Textbox(label="Probability of Real Voice")
|
234 |
-
#text2 = gr.Textbox(label="Amp Mean Score")
|
235 |
|
236 |
button_clear = gr.ClearButton([f,textbatch])
|
237 |
|
238 |
f.upload(process_files,inputs=[f], outputs=[textbatch])
|
239 |
|
240 |
-
|
241 |
-
|
242 |
-
#btn = gr.Button("Run")
|
243 |
-
#btn.click(fn=update, inputs=inp, outputs=out)
|
244 |
-
|
245 |
demo.launch(auth=[(username,password),(username0,password0),(username1,password1),(username2,password2),(username3,password3),(username4,password4),(username5,password5),(username6,password6),(username7,password7),(username8,password8),(username9,password9),(username10,password10), \
|
246 |
(username11,password11),(username12,password12),(username13,password13),(username14,password14)])
|
247 |
|
|
|
10 |
from huggingface_hub import HfApi
|
11 |
import pandas as pd
|
12 |
from moviepy.editor import *
|
13 |
+
import matplotlib.pyplot as plt
|
14 |
+
|
15 |
|
16 |
FS=16000
|
17 |
+
MAX_SIZE = FS * 60
|
18 |
+
CHUNK_SIZE = 4
|
19 |
+
N = CHUNK_SIZE * FS
|
20 |
|
21 |
HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
|
22 |
MODEL_REPO=os.getenv("MODEL_REPO")
|
|
|
111 |
return audiowav
|
112 |
|
113 |
|
114 |
+
def create_chunk_plot(x,ini, end, scores, lvec, scr):
|
115 |
+
x=x.squeeze()
|
116 |
+
T=x.size(0)
|
117 |
+
t = np.array(list(range(T))) / FS
|
118 |
+
|
119 |
+
result=[np.nan for _ in range(ini)]
|
120 |
+
|
121 |
+
for s,l in zip(scores.tolist(),lvec.tolist()):
|
122 |
+
resi=[100*s for _ in range(int(l))]
|
123 |
+
result.extend(resi)
|
124 |
+
|
125 |
+
reslast=[np.nan for _ in range(T-end)]
|
126 |
+
result.extend(reslast)
|
127 |
+
|
128 |
+
assert len(result)==T, f"Length result: {len(result)} - Length audio {T}"
|
129 |
+
assert len(t)==T, f"Length time: {len(result)} - Length audio {T}"
|
130 |
+
|
131 |
+
x=x-torch.min(x)
|
132 |
+
x=x/torch.max(x)*100
|
133 |
+
|
134 |
+
fig = plt.figure()
|
135 |
+
ax = fig.add_subplot(111)
|
136 |
+
ax.plot(t, x, alpha=0.3)
|
137 |
+
ax.plot(t,result,color = 'tab:red')
|
138 |
+
ax.set_ylabel('Probability of Real')
|
139 |
+
ax.set_xlabel('Time (s)')
|
140 |
+
ax.set_title(f"Prob. of real audio = {scr}")
|
141 |
+
|
142 |
+
yticks=np.arange(11)*10
|
143 |
+
ax.set_yticks(yticks)
|
144 |
+
|
145 |
+
return fig
|
146 |
+
|
147 |
+
|
148 |
|
149 |
|
150 |
def process_micro(micro):
|
151 |
+
print("Micro processing")
|
152 |
x=preprocess_audio(micro)
|
153 |
+
print("Running model")
|
154 |
+
output, output_arr, lvec, ls, ts = MODEL(x)
|
155 |
print(output)
|
156 |
result = postprocess_output(output)
|
157 |
|
158 |
+
fig = create_chunk_plot(x, ls, ts, output_arr, lvec, result)
|
159 |
+
|
160 |
+
return fig
|
161 |
|
162 |
def process_file(file):
|
163 |
+
print("File processing")
|
164 |
x,fs = librosa.load(file, sr=FS)
|
165 |
x=preprocess_audio((fs,x))
|
166 |
print("Running model")
|
167 |
+
output, output_arr, lvec, ls, ts = MODEL(x)
|
168 |
print(output)
|
169 |
result = postprocess_output(output)
|
170 |
|
171 |
+
|
172 |
+
fig = create_chunk_plot(x, ls, ts, output_arr, lvec, result)
|
173 |
|
174 |
+
return fig
|
175 |
|
176 |
+
def process_files(files):
|
177 |
+
print("Batch processing")
|
178 |
resout=[]
|
|
|
179 |
fnames=[]
|
180 |
for f in files:
|
181 |
file=f.name
|
182 |
x,fs = librosa.load(file, sr=FS)
|
183 |
x=preprocess_audio((fs,x))
|
184 |
print("Running model")
|
185 |
+
output, _, _, _, _ = MODEL(x)
|
186 |
print(output)
|
187 |
result = postprocess_output(output)
|
188 |
resout.append(result)
|
|
|
189 |
|
190 |
|
191 |
fnames.append(os.path.basename(file))
|
192 |
|
193 |
|
194 |
resout = pd.DataFrame({"File":fnames, "Probability of Real": resout})
|
|
|
195 |
return resout
|
196 |
|
197 |
def process_video(file):
|
|
|
225 |
|
226 |
with gr.Blocks(title="Audio Fake Detector") as demo:
|
227 |
with gr.Tab("Individual Processing"):
|
228 |
+
gr.Markdown("""# [Loccus.ai](http://www.loccus.ai) - AI Voice detection demo
|
229 |
+
This is a demo of our Authenticity Verification solution, aimed at detecting if a voice is real or not.
|
230 |
+
* Input - audio file in any format
|
231 |
+
* Output - probability of that voice being real or AI-generated (1.0 - Real / 0.0 AI-generated)
|
232 |
+
|
233 |
+
There are two testing modes:
|
234 |
+
* Individual processing - for single files. You will see a time-based view and scores for each 4-second chunk. Best for single long files.
|
235 |
+
* Batch processing - for a batch of files. You will see a single overall score per file. Best to assess multiple short files.
|
236 |
+
|
237 |
+
Only the first minute of audio is analyzed.""")
|
238 |
|
239 |
|
240 |
|
|
|
246 |
#y = gr.Textbox(label="Enter YouTube address here")
|
247 |
#v = gr.Video(label="Enter a video", include_audio=True, scale=0.5)
|
248 |
|
249 |
+
with gr.Column(scale=2):
|
250 |
with gr.Row(equal_height=True):
|
251 |
|
252 |
+
img = gr.Plot(show_label=False)
|
253 |
|
254 |
#file= gr.Audio(source="upload", type="filepath", optional=True)
|
255 |
#button_clear = gr.ClearButton([m,f,y,v,text])
|
256 |
+
button_clear = gr.ClearButton([m,f,img])
|
257 |
+
m.stop_recording(process_micro, inputs=[m], outputs=img)
|
258 |
+
f.upload(process_file,inputs=[f], outputs=img)
|
259 |
#y.submit(process_youtube, inputs=[y], outputs=text)
|
260 |
#v.upload(process_video, inputs=[v], outputs=[text])
|
261 |
|
262 |
with gr.Tab("Batch Processing"):
|
263 |
+
gr.Markdown("# [Loccus.ai](http://www.loccus.ai) - AI Voice detection demo")
|
264 |
|
265 |
with gr.Row():
|
266 |
with gr.Column():
|
|
|
274 |
headers=["File", "Probability of Real"],
|
275 |
datatype=["str", "str"],
|
276 |
)
|
277 |
+
|
|
|
|
|
278 |
|
279 |
button_clear = gr.ClearButton([f,textbatch])
|
280 |
|
281 |
f.upload(process_files,inputs=[f], outputs=[textbatch])
|
282 |
|
|
|
|
|
|
|
|
|
|
|
283 |
demo.launch(auth=[(username,password),(username0,password0),(username1,password1),(username2,password2),(username3,password3),(username4,password4),(username5,password5),(username6,password6),(username7,password7),(username8,password8),(username9,password9),(username10,password10), \
|
284 |
(username11,password11),(username12,password12),(username13,password13),(username14,password14)])
|
285 |
|