Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
2e81dfe
1
Parent(s):
af58e15
Batch of files processing.
Browse filesAudio from video processing.
- app.py +87 -19
- requirements.txt +2 -1
app.py
CHANGED
@@ -8,6 +8,8 @@ from pytube import YouTube
|
|
8 |
import os
|
9 |
import random
|
10 |
from huggingface_hub import HfApi
|
|
|
|
|
11 |
|
12 |
FS=16000
|
13 |
MAX_SIZE = FS * 30
|
@@ -15,6 +17,7 @@ MAX_SIZE = FS * 30
|
|
15 |
HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
|
16 |
MODEL_REPO=os.getenv("MODEL_REPO")
|
17 |
MODELNAME=os.getenv("MODELNAME")
|
|
|
18 |
username=os.getenv("username")
|
19 |
password=os.getenv("password")
|
20 |
|
@@ -23,7 +26,7 @@ hf_api = HfApi(
|
|
23 |
token=HF_TOKEN_DEMO, # Token is not persisted on the machine.
|
24 |
)
|
25 |
|
26 |
-
modelfile = hf_api.hf_hub_download(MODEL_REPO,
|
27 |
MODEL = torch.jit.load(modelfile)
|
28 |
|
29 |
|
@@ -79,7 +82,7 @@ def process_youtube_address(youtube_address):
|
|
79 |
|
80 |
def process_micro(micro):
|
81 |
x=preprocess_audio(micro)
|
82 |
-
output = MODEL(x)
|
83 |
print(output)
|
84 |
result = postprocess_output(output)
|
85 |
|
@@ -89,12 +92,51 @@ def process_file(file):
|
|
89 |
x,fs = librosa.load(file, sr=FS)
|
90 |
x=preprocess_audio((fs,x))
|
91 |
print("Running model")
|
92 |
-
output = MODEL(x)
|
93 |
print(output)
|
94 |
result = postprocess_output(output)
|
95 |
|
96 |
return result
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def process_youtube(youtube_address):
|
99 |
audiofile=process_youtube_address(youtube_address)
|
100 |
|
@@ -110,27 +152,53 @@ def process_youtube(youtube_address):
|
|
110 |
|
111 |
|
112 |
with gr.Blocks(title="Audio Fake Detector") as demo:
|
113 |
-
gr.
|
|
|
114 |
|
115 |
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
|
129 |
#file= gr.Audio(source="upload", type="filepath", optional=True)
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
|
136 |
#btn = gr.Button("Run")
|
|
|
8 |
import os
|
9 |
import random
|
10 |
from huggingface_hub import HfApi
|
11 |
+
import pandas as pd
|
12 |
+
from moviepy.editor import *
|
13 |
|
14 |
FS=16000
|
15 |
MAX_SIZE = FS * 30
|
|
|
17 |
HF_TOKEN_DEMO=os.getenv("HF_TOKEN_DEMO")
|
18 |
MODEL_REPO=os.getenv("MODEL_REPO")
|
19 |
MODELNAME=os.getenv("MODELNAME")
|
20 |
+
MODELNAME2=os.getenv("MODELNAME2")
|
21 |
username=os.getenv("username")
|
22 |
password=os.getenv("password")
|
23 |
|
|
|
26 |
token=HF_TOKEN_DEMO, # Token is not persisted on the machine.
|
27 |
)
|
28 |
|
29 |
+
modelfile = hf_api.hf_hub_download(MODEL_REPO,MODELNAME2)
|
30 |
MODEL = torch.jit.load(modelfile)
|
31 |
|
32 |
|
|
|
82 |
|
83 |
def process_micro(micro):
|
84 |
x=preprocess_audio(micro)
|
85 |
+
output,_ = MODEL(x)
|
86 |
print(output)
|
87 |
result = postprocess_output(output)
|
88 |
|
|
|
92 |
x,fs = librosa.load(file, sr=FS)
|
93 |
x=preprocess_audio((fs,x))
|
94 |
print("Running model")
|
95 |
+
output,_ = MODEL(x)
|
96 |
print(output)
|
97 |
result = postprocess_output(output)
|
98 |
|
99 |
return result
|
100 |
|
101 |
+
def process_files(files):
|
102 |
+
|
103 |
+
resout=[]
|
104 |
+
res2out=[]
|
105 |
+
fnames=[]
|
106 |
+
for f in files:
|
107 |
+
file=f.name
|
108 |
+
x,fs = librosa.load(file, sr=FS)
|
109 |
+
x=preprocess_audio((fs,x))
|
110 |
+
print("Running model")
|
111 |
+
output,_ = MODEL(x)
|
112 |
+
print(output)
|
113 |
+
result, res2 = postprocess_output(output)
|
114 |
+
resout.append(result)
|
115 |
+
res2out.append(res2)
|
116 |
+
|
117 |
+
|
118 |
+
fnames.append(os.path.basename(file))
|
119 |
+
|
120 |
+
|
121 |
+
resout = pd.DataFrame({"File":fnames, "Probability of Real": resout})
|
122 |
+
#return resout, res2out
|
123 |
+
return resout
|
124 |
+
|
125 |
+
def process_video(file):
|
126 |
+
video = VideoFileClip(file)
|
127 |
+
audio = video.audio
|
128 |
+
|
129 |
+
if not os.path.isdir('tmp'):
|
130 |
+
os.makedirs('tmp')
|
131 |
+
nrand=round(random.random()*1000)
|
132 |
+
audiowav="tmp/audio-"+str(nrand)+".wav"
|
133 |
+
audio.to_audiofile(audiowav)
|
134 |
+
|
135 |
+
result = process_file(audiowav)
|
136 |
+
os.remove(audiowav)
|
137 |
+
|
138 |
+
return result
|
139 |
+
|
140 |
def process_youtube(youtube_address):
|
141 |
audiofile=process_youtube_address(youtube_address)
|
142 |
|
|
|
152 |
|
153 |
|
154 |
with gr.Blocks(title="Audio Fake Detector") as demo:
|
155 |
+
with gr.Tab("Individual Processing"):
|
156 |
+
gr.Markdown("# Welcome to Loccus.ai synthetic voice detection demo!")
|
157 |
|
158 |
|
159 |
|
160 |
+
with gr.Row():
|
161 |
+
with gr.Column():
|
162 |
+
m = gr.Audio(source="microphone", type="numpy",label="Micro")
|
163 |
+
f = gr.Audio(source="upload", type="filepath", label="Audio file")
|
164 |
+
y = gr.Textbox(label="Enter YouTube address here")
|
165 |
+
v = gr.Video(label="Enter a video", include_audio=True, scale=0.5)
|
166 |
+
|
167 |
+
with gr.Column():
|
168 |
+
with gr.Row(equal_height=True):
|
169 |
+
|
170 |
+
text = gr.Textbox(label="Probability of Real Voice")
|
171 |
|
172 |
#file= gr.Audio(source="upload", type="filepath", optional=True)
|
173 |
+
button_clear = gr.ClearButton([m,f,y,v,text])
|
174 |
+
m.stop_recording(process_micro, inputs=[m], outputs=text)
|
175 |
+
f.upload(process_file,inputs=[f], outputs=text)
|
176 |
+
y.submit(process_youtube, inputs=[y], outputs=text)
|
177 |
+
v.upload(process_video, inputs=[v], outputs=[text])
|
178 |
+
|
179 |
+
with gr.Tab("Batch Processing"):
|
180 |
+
gr.Markdown("# Welcome to Loccus.ai synthetic voice detection demo!")
|
181 |
+
|
182 |
+
with gr.Row():
|
183 |
+
with gr.Column():
|
184 |
+
f = gr.File(file_types=["audio"], label="Audio file", file_count="multiple")
|
185 |
+
|
186 |
+
|
187 |
+
with gr.Column():
|
188 |
+
with gr.Row(equal_height=True):
|
189 |
+
|
190 |
+
textbatch = gr.Dataframe(
|
191 |
+
headers=["File", "Probability of Real"],
|
192 |
+
datatype=["str", "str"],
|
193 |
+
)
|
194 |
+
|
195 |
+
#text = gr.Textbox(label="Probability of Real Voice")
|
196 |
+
#text2 = gr.Textbox(label="Amp Mean Score")
|
197 |
+
|
198 |
+
button_clear = gr.ClearButton([f,textbatch])
|
199 |
+
|
200 |
+
f.upload(process_files,inputs=[f], outputs=[textbatch])
|
201 |
+
|
202 |
|
203 |
|
204 |
#btn = gr.Button("Run")
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ soundfile
|
|
4 |
librosa
|
5 |
numpy
|
6 |
pytube
|
7 |
-
huggingface_hub
|
|
|
|
4 |
librosa
|
5 |
numpy
|
6 |
pytube
|
7 |
+
huggingface_hub
|
8 |
+
moviepy
|