Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,17 +24,6 @@ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
|
24 |
from TTS.api import TTS
|
25 |
import pickle
|
26 |
|
27 |
-
# Try to install OpenVoice from GitHub if not found
|
28 |
-
try:
|
29 |
-
from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
|
30 |
-
from openvoice.se_extractor import get_se
|
31 |
-
except ImportError:
|
32 |
-
print("Installing OpenVoice from GitHub...")
|
33 |
-
import subprocess
|
34 |
-
subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
|
35 |
-
from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
|
36 |
-
from openvoice.se_extractor import get_se
|
37 |
-
|
38 |
# Suppress warnings
|
39 |
warnings.filterwarnings("ignore")
|
40 |
|
@@ -337,30 +326,9 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
337 |
mixed.export(out_path, format="wav")
|
338 |
return out_path
|
339 |
|
340 |
-
# === Voice Cloning
|
341 |
-
def clone_voice(
|
342 |
-
|
343 |
-
source_se, _ = get_se(source_audio)
|
344 |
-
target_se, _ = get_se(target_audio)
|
345 |
-
|
346 |
-
# Generate base TTS
|
347 |
-
out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
|
348 |
-
tts.tts_to_file(text=text, file_path=out_path)
|
349 |
-
|
350 |
-
# Apply voice conversion
|
351 |
-
tone_converter.convert(
|
352 |
-
audio_src_path=out_path,
|
353 |
-
src_se=source_se,
|
354 |
-
tgt_se=target_se,
|
355 |
-
output_path=out_path
|
356 |
-
)
|
357 |
-
|
358 |
-
return out_path
|
359 |
-
except Exception as e:
|
360 |
-
return f"⚠️ Cloning failed: {str(e)}"
|
361 |
-
|
362 |
-
tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
|
363 |
-
openvoice_tts = OpenVoiceTTS(lang='en')
|
364 |
|
365 |
# === Speaker Diarization ("Who Spoke When?") ===
|
366 |
try:
|
@@ -376,19 +344,21 @@ except Exception as e:
|
|
376 |
print(f"⚠️ Failed to load diarization: {e}")
|
377 |
|
378 |
def diarize_and_transcribe(audio_path):
|
379 |
-
if diarize_pipeline
|
380 |
return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
|
381 |
|
|
|
382 |
audio = AudioSegment.from_file(audio_path)
|
383 |
temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
|
384 |
audio.export(temp_wav, format="wav")
|
385 |
|
386 |
try:
|
|
|
387 |
diarization = diarize_pipeline(temp_wav)
|
388 |
|
389 |
result = whisper.transcribe(temp_wav)
|
390 |
-
segments = []
|
391 |
|
|
|
392 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
393 |
text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
|
394 |
segments.append({
|
@@ -492,8 +462,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
492 |
description="Convert voice to text and edit it before exporting again."
|
493 |
)
|
494 |
|
495 |
-
# --- Voice Cloning (Dubbing) ===
|
496 |
-
with gr.Tab("🎭 Voice Cloning (
|
497 |
gr.Interface(
|
498 |
fn=clone_voice,
|
499 |
inputs=[
|
@@ -502,7 +472,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
502 |
gr.Textbox(label="Text to Clone", lines=5)
|
503 |
],
|
504 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
505 |
-
title="Replace One Voice With Another",
|
506 |
description="Clone voice from source to target speaker using AI"
|
507 |
)
|
508 |
|
@@ -543,7 +513,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
543 |
return None, None, None, None
|
544 |
|
545 |
with gr.Tab("🧾 Auto-Save & Resume"):
|
546 |
-
gr.Markdown("Save your current state and resume later.")
|
547 |
|
548 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
549 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
|
|
24 |
from TTS.api import TTS
|
25 |
import pickle
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Suppress warnings
|
28 |
warnings.filterwarnings("ignore")
|
29 |
|
|
|
326 |
mixed.export(out_path, format="wav")
|
327 |
return out_path
|
328 |
|
329 |
+
# === Dummy Voice Cloning Tab – Works on Hugging Face ===
|
330 |
+
def clone_voice(*args):
|
331 |
+
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
# === Speaker Diarization ("Who Spoke When?") ===
|
334 |
try:
|
|
|
344 |
print(f"⚠️ Failed to load diarization: {e}")
|
345 |
|
346 |
def diarize_and_transcribe(audio_path):
|
347 |
+
if not diarize_pipeline:
|
348 |
return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
|
349 |
|
350 |
+
# Run diarization
|
351 |
audio = AudioSegment.from_file(audio_path)
|
352 |
temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
|
353 |
audio.export(temp_wav, format="wav")
|
354 |
|
355 |
try:
|
356 |
+
from pyannote.audio import Pipeline as DiarizationPipeline
|
357 |
diarization = diarize_pipeline(temp_wav)
|
358 |
|
359 |
result = whisper.transcribe(temp_wav)
|
|
|
360 |
|
361 |
+
segments = []
|
362 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
363 |
text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
|
364 |
segments.append({
|
|
|
462 |
description="Convert voice to text and edit it before exporting again."
|
463 |
)
|
464 |
|
465 |
+
# --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
|
466 |
+
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
467 |
gr.Interface(
|
468 |
fn=clone_voice,
|
469 |
inputs=[
|
|
|
472 |
gr.Textbox(label="Text to Clone", lines=5)
|
473 |
],
|
474 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
475 |
+
title="Replace One Voice With Another (Local Only)",
|
476 |
description="Clone voice from source to target speaker using AI"
|
477 |
)
|
478 |
|
|
|
513 |
return None, None, None, None
|
514 |
|
515 |
with gr.Tab("🧾 Auto-Save & Resume"):
|
516 |
+
gr.Markdown("Save your current state and resume editing later.")
|
517 |
|
518 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
519 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|