David Thomas commited on
Commit
8ee452e
·
1 Parent(s): 386449f
Files changed (4) hide show
  1. app.py +102 -9
  2. extract_feature_print.py +0 -0
  3. main.py +6 -0
  4. utils.py +1 -0
app.py CHANGED
@@ -221,6 +221,7 @@ from vc_infer_pipeline import VC
221
  from config import Config
222
 
223
  config = Config()
 
224
  # from trainset_preprocess_pipeline import PreProcess
225
  logging.getLogger("numba").setLevel(logging.WARNING)
226
 
@@ -248,6 +249,91 @@ index_paths = ["./logs/joel/added_IVF479_Flat_nprobe_1.index","./logs/jenny/adde
248
  file_index=None
249
 
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  def vc_single(
253
  sid,
@@ -1556,9 +1642,8 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
1556
  # face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
1557
  with gr.Row():
1558
  animation = gr.Video(type='filepath')
1559
- refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
1560
- # with gr.Row():
1561
- # animate_button = gr.Button('Animate')
1562
 
1563
  with gr.Column():
1564
  with gr.Accordion("Index Settings", open=False):
@@ -1584,12 +1669,20 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
1584
  value=0.66,
1585
  interactive=True,
1586
  )
1587
- vc_output2 = gr.Audio(
1588
- label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
1589
- type='filepath',
1590
- interactive=False,
1591
- )
1592
- #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
 
 
 
 
 
 
 
 
1593
  with gr.Accordion("Advanced Settings", open=False):
1594
  f0method0 = gr.Radio(
1595
  label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
 
221
  from config import Config
222
 
223
  config = Config()
224
+ cpt=None
225
  # from trainset_preprocess_pipeline import PreProcess
226
  logging.getLogger("numba").setLevel(logging.WARNING)
227
 
 
249
  file_index=None
250
 
251
 
252
+ # Define a function to calculate a "similarity score" to identify potential copyright infringement
253
+ def calculate_similarity_score(
254
+ audio0,
255
+ index_file,
256
+ sid0,
257
+ version="v1",
258
+ #protect=0.3,
259
+ index_rate=0.67,
260
+ #pitch
261
+ ): # ,file_index,file_big_npy
262
+ """ Extract features from audio using the Hubert model """
263
+ extracted_feats = None
264
+ model = None
265
+ if sid0 == "joel.pth":
266
+ big_npy = "./logs/joel/total_fea.npy"
267
+ elif sid0 == "jenny.pth":
268
+ big_npy = "./logs/jenny/total_fea.npy"
269
+ try:
270
+ audio = load_audio(audio0, 16000, DoFormant, Quefrency, Timbre)
271
+ logging.log(logging.INFO, "audio loaded")
272
+ audio_max = np.abs(audio).max() / 0.95
273
+ if audio_max > 1:
274
+ audio /= audio_max
275
+ except TypeError as e:
276
+ print(e)
277
+ return None
278
+ feats = torch.from_numpy(audio)
279
+ # Use the "load_hubert_model" function to load the model
280
+ if hubert_model is None:
281
+ load_hubert()
282
+ # Set the "model" variable to the loaded model
283
+ model = hubert_model
284
+ # If the model is half precision, convert the features to half precision
285
+ if config.is_half:
286
+ feats = feats.half()
287
+ else:
288
+ feats = feats.float()
289
+ if feats.dim() == 2: # double channels
290
+ feats = feats.mean(-1)
291
+ assert feats.dim() == 1, feats.dim()
292
+ feats = feats.view(1, -1)
293
+ padding_mask = torch.BoolTensor(feats.shape).to(config.device).fill_(False)
294
+ inputs = {
295
+ "source": feats.to(config.device),
296
+ "padding_mask": padding_mask,
297
+ "output_layer": 9 if version == "v1" else 12,
298
+ }
299
+ with torch.no_grad():
300
+ logits = model.extract_features(**inputs)
301
+ feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
302
+ #if protect < 0.5 and pitch != None and pitchf != None:
303
+ # feats0 = feats.clone()
304
+ if (
305
+ isinstance(index_file, type(None)) == False
306
+ and isinstance(big_npy, type(None)) == False
307
+ and index_rate != 0
308
+ ):
309
+ npy = feats[0].cpu().numpy()
310
+ if config.is_half:
311
+ npy = npy.astype("float32")
312
+
313
+ extracted_feats = npy
314
+
315
+ if config.is_half:
316
+ extracted_feats = extracted_feats.astype("float32")
317
+ # Convert the big_npy file to a numpy array and match the type
318
+ # to the extracted features
319
+ big_npy = np.load(big_npy)
320
+ if config.is_half:
321
+ big_npy = big_npy.astype("float32")
322
+
323
+ # Use the extracted features and the big_npy file to calculate whether or
324
+ # not the audio vocalist is the same as the one in the big_npy file
325
+ # compare the distances between the extracted features and the big_npy file
326
+ # to determine the similarity score
327
+ index = faiss.read_index(index_file)
328
+ D, I = index.search(big_npy, k=1) # search index for nearest match
329
+ distances = np.sqrt(D[:, 0]) # use L2 distance
330
+ threshold = np.percentile(distances, 50) # set threshold to exclude outliers
331
+ score, ix = index.search(extracted_feats, k=1)
332
+ if score[0][0] < threshold:
333
+ print("Potential unauthorized use detected!")
334
+
335
+ return f"Score {score[0][0]}, {distances}"
336
+
337
 
338
  def vc_single(
339
  sid,
 
1642
  # face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
1643
  with gr.Row():
1644
  animation = gr.Video(type='filepath')
1645
+ refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation]) # with gr.Row():
1646
+ animate_button = gr.Button('Animate')
 
1647
 
1648
  with gr.Column():
1649
  with gr.Accordion("Index Settings", open=False):
 
1669
  value=0.66,
1670
  interactive=True,
1671
  )
1672
+ with gr.Row():
1673
+ vc_output2 = gr.Audio(
1674
+ label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
1675
+ type='filepath',
1676
+ interactive=False,
1677
+ )
1678
+ with gr.Row():
1679
+ # Create a new button to calculate the similarity score
1680
+ similarity_button = gr.Button("Calculate Similarity Score", variant="primary")
1681
+ with gr.Row():
1682
+ similarity_score = gr.Textbox(label="Similarity Score", type="text", interactive=False)
1683
+ similarity_button.click(fn=calculate_similarity_score, inputs=[input_audio0, file_index1, sid0], outputs=[similarity_score])
1684
+ print(file_index1)
1685
+ #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
1686
  with gr.Accordion("Advanced Settings", open=False):
1687
  f0method0 = gr.Radio(
1688
  label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
extract_feature_print.py ADDED
File without changes
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """ Main file to run the application. """
2
+ import uvicorn
3
+ from app.app import app
4
+
5
+ if __name__ == "__main__":
6
+ uvicorn.run(app, host="0.0.0.0", port=8000)
utils.py CHANGED
@@ -150,3 +150,4 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
150
  converted = False
151
 
152
  return np.frombuffer(out, np.float32).flatten()
 
 
150
  converted = False
151
 
152
  return np.frombuffer(out, np.float32).flatten()
153
+