RVC_RULE1

Running

App Files Files Community

sjufan84 commited on Oct 16, 2023

Commit

8ee452e

1 Parent(s): 386449f

ui tweaks

Browse files

Files changed (4) hide show

app.py +102 -9
extract_feature_print.py +0 -0
main.py +6 -0
utils.py +1 -0

app.py CHANGED Viewed

@@ -221,6 +221,7 @@ from vc_infer_pipeline import VC
 from config import Config
 config = Config()
 # from trainset_preprocess_pipeline import PreProcess
 logging.getLogger("numba").setLevel(logging.WARNING)
@@ -248,6 +249,91 @@ index_paths = ["./logs/joel/added_IVF479_Flat_nprobe_1.index","./logs/jenny/adde
 file_index=None
 def vc_single(
     sid,
@@ -1556,9 +1642,8 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
                     #            face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
                         with gr.Row():
                             animation = gr.Video(type='filepath')
-                            refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
-                    #        with gr.Row():
-                    #            animate_button = gr.Button('Animate')
                 with gr.Column():
                     with gr.Accordion("Index Settings", open=False):
@@ -1584,12 +1669,20 @@ with gr.Blocks(theme=gr.themes.Base(), title='RVC RULE1 v1') as app:
                             value=0.66,
                             interactive=True,
                             )
-                    vc_output2 = gr.Audio(
-                        label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
-                        type='filepath',
-                        interactive=False,
-                    )
-                    #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
                     with gr.Accordion("Advanced Settings", open=False):
                         f0method0 = gr.Radio(
                             label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",

 from config import Config
 config = Config()
+cpt=None
 # from trainset_preprocess_pipeline import PreProcess
 logging.getLogger("numba").setLevel(logging.WARNING)
 file_index=None
+# Define a function to calculate a "similarity score" to identify potential copyright infringement
+def calculate_similarity_score(
+    audio0,
+    index_file,
+    sid0,
+    version="v1",
+    #protect=0.3,
+    index_rate=0.67,
+    #pitch
+):  # ,file_index,file_big_npy
+    """ Extract features from audio using the Hubert model """
+    extracted_feats = None
+    model = None
+    if sid0 == "joel.pth":
+        big_npy = "./logs/joel/total_fea.npy"
+    elif sid0 == "jenny.pth":
+        big_npy = "./logs/jenny/total_fea.npy"
+    try:
+        audio = load_audio(audio0, 16000, DoFormant, Quefrency, Timbre)
+        logging.log(logging.INFO, "audio loaded")
+        audio_max = np.abs(audio).max() / 0.95
+        if audio_max > 1:
+            audio /= audio_max
+    except TypeError as e:
+        print(e)
+        return None
+    feats = torch.from_numpy(audio)
+    # Use the "load_hubert_model" function to load the model
+    if hubert_model is None:
+            load_hubert()
+    # Set the "model" variable to the loaded model
+    model = hubert_model
+    # If the model is half precision, convert the features to half precision
+    if config.is_half:
+        feats = feats.half()
+    else:
+        feats = feats.float()
+    if feats.dim() == 2:  # double channels
+        feats = feats.mean(-1)
+    assert feats.dim() == 1, feats.dim()
+    feats = feats.view(1, -1)
+    padding_mask = torch.BoolTensor(feats.shape).to(config.device).fill_(False)
+    inputs = {
+        "source": feats.to(config.device),
+        "padding_mask": padding_mask,
+        "output_layer": 9 if version == "v1" else 12,
+    }
+    with torch.no_grad():
+        logits = model.extract_features(**inputs)
+        feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
+    #if protect < 0.5 and pitch != None and pitchf != None:
+    #    feats0 = feats.clone()
+    if (
+        isinstance(index_file, type(None)) == False
+        and isinstance(big_npy, type(None)) == False
+        and index_rate != 0
+    ):
+        npy = feats[0].cpu().numpy()
+        if config.is_half:
+            npy = npy.astype("float32")
+    extracted_feats = npy
+    if config.is_half:
+        extracted_feats = extracted_feats.astype("float32")
+    # Convert the big_npy file to a numpy array and match the type
+    # to the extracted features
+    big_npy = np.load(big_npy)
+    if config.is_half:
+        big_npy = big_npy.astype("float32")
+    # Use the extracted features and the big_npy file to calculate whether or
+    # not the audio vocalist is the same as the one in the big_npy file
+    # compare the distances between the extracted features and the big_npy file
+    # to determine the similarity score
+    index = faiss.read_index(index_file)
+    D, I = index.search(big_npy, k=1) # search index for nearest match
+    distances = np.sqrt(D[:, 0]) # use L2 distance
+    threshold = np.percentile(distances, 50) # set threshold to exclude outliers
+    score, ix = index.search(extracted_feats, k=1)
+    if score[0][0] < threshold:
+        print("Potential unauthorized use detected!")
+    return f"Score {score[0][0]}, {distances}"
 def vc_single(
     sid,
                     #            face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
                         with gr.Row():
                             animation = gr.Video(type='filepath')
+                            refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])                    #        with gr.Row():
+                            animate_button = gr.Button('Animate')
                 with gr.Column():
                     with gr.Accordion("Index Settings", open=False):
                             value=0.66,
                             interactive=True,
                             )
+                    with gr.Row():
+                        vc_output2 = gr.Audio(
+                            label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
+                            type='filepath',
+                            interactive=False,
+                        )
+                    with gr.Row():
+                        # Create a new button to calculate the similarity score
+                        similarity_button = gr.Button("Calculate Similarity Score", variant="primary")
+                    with gr.Row():
+                        similarity_score = gr.Textbox(label="Similarity Score", type="text", interactive=False)
+                        similarity_button.click(fn=calculate_similarity_score, inputs=[input_audio0, file_index1, sid0], outputs=[similarity_score])
+                        print(file_index1)
+                        #animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
                     with gr.Accordion("Advanced Settings", open=False):
                         f0method0 = gr.Radio(
                             label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",

extract_feature_print.py ADDED Viewed

File without changes

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+""" Main file to run the application. """
+import uvicorn
+from app.app import app
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

utils.py CHANGED Viewed

@@ -150,3 +150,4 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
         converted = False
     return np.frombuffer(out, np.float32).flatten()


150	converted = False
151
152	return np.frombuffer(out, np.float32).flatten()
153	+