Spaces:

nateraw
/

voice-cloning

Running

App Files Files Community

nateraw commited on Apr 25, 2023

Commit

7fc71d5

1 Parent(s): 606a5bf

Synced repo using 'sync_with_huggingface' Github Action

Browse files

Files changed (1) hide show

app.py +5 -4

app.py CHANGED Viewed

@@ -30,6 +30,8 @@ default_f0_method = "crepe"
 # The default ratio of cluster inference to SVC inference.
 # If cluster_model_name is not found in the repo, this is set to 0.
 default_cluster_infer_ratio = 0.5
 ###################################################################
 # Figure out the latest generator by taking highest value one.
@@ -135,7 +137,7 @@ def predict(
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
-    audio, _ = librosa.load(audio, sr=model.target_sample)
     audio = model.infer_silence(
         audio.astype(np.float32),
         speaker=speaker,
@@ -167,6 +169,7 @@ def predict_song_from_yt(
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
     original_track_filepath = download_youtube_clip(
         ytid_or_url,
         start,
@@ -201,9 +204,7 @@ description = f"""
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
-## This app uses models trained with [so-vits-svc-fork](https://github.com/voicepaw/so-vits-svc-fork) to clone a voice. Model currently being used is https://hf.co/{repo_id}.
-#### To change the model being served, duplicate the space and update the `repo_id`/other settings in `app.py`.
 #### Train Your Own: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/voice-cloning/blob/main/training_so_vits_svc_fork.ipynb)
 """.strip()

 # The default ratio of cluster inference to SVC inference.
 # If cluster_model_name is not found in the repo, this is set to 0.
 default_cluster_infer_ratio = 0.5
+# Limit on duration of audio at inference time. increase if you can
+duration_limit = 30
 ###################################################################
 # Figure out the latest generator by taking highest value one.
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
+    audio, _ = librosa.load(audio, sr=model.target_sample, duration=duration_limit)
     audio = model.infer_silence(
         audio.astype(np.float32),
         speaker=speaker,
     chunk_seconds: float = 0.5,
     absolute_thresh: bool = False,
 ):
+    end = min(start + duration_limit, end)
     original_track_filepath = download_youtube_clip(
         ytid_or_url,
         start,
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
+#### This app uses models trained with [so-vits-svc-fork](https://github.com/voicepaw/so-vits-svc-fork) to clone a voice. Model currently being used is https://hf.co/{repo_id}. To change the model being served, duplicate the space and update the `repo_id`/other settings in `app.py`.
 #### Train Your Own: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/voice-cloning/blob/main/training_so_vits_svc_fork.ipynb)
 """.strip()