tee342 commited on
Commit
6085d7e
·
verified ·
1 Parent(s): aea8d70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -41
app.py CHANGED
@@ -24,17 +24,6 @@ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
  from TTS.api import TTS
25
  import pickle
26
 
27
- # Try to install OpenVoice from GitHub if not found
28
- try:
29
- from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
30
- from openvoice.se_extractor import get_se
31
- except ImportError:
32
- print("Installing OpenVoice from GitHub...")
33
- import subprocess
34
- subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
35
- from openvoice.api import TTS as OpenVoiceTTS, ToneColorConverter
36
- from openvoice.se_extractor import get_se
37
-
38
  # Suppress warnings
39
  warnings.filterwarnings("ignore")
40
 
@@ -337,30 +326,9 @@ def mix_tracks(track1, track2, volume_offset=0):
337
  mixed.export(out_path, format="wav")
338
  return out_path
339
 
340
- # === Voice Cloning / Dubbing Tab ===
341
- def clone_voice(source_audio, target_audio, text):
342
- try:
343
- source_se, _ = get_se(source_audio)
344
- target_se, _ = get_se(target_audio)
345
-
346
- # Generate base TTS
347
- out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
348
- tts.tts_to_file(text=text, file_path=out_path)
349
-
350
- # Apply voice conversion
351
- tone_converter.convert(
352
- audio_src_path=out_path,
353
- src_se=source_se,
354
- tgt_se=target_se,
355
- output_path=out_path
356
- )
357
-
358
- return out_path
359
- except Exception as e:
360
- return f"⚠️ Cloning failed: {str(e)}"
361
-
362
- tone_converter = ToneColorConverter().to("cuda" if torch.cuda.is_available() else "cpu")
363
- openvoice_tts = OpenVoiceTTS(lang='en')
364
 
365
  # === Speaker Diarization ("Who Spoke When?") ===
366
  try:
@@ -376,19 +344,21 @@ except Exception as e:
376
  print(f"⚠️ Failed to load diarization: {e}")
377
 
378
  def diarize_and_transcribe(audio_path):
379
- if diarize_pipeline is None:
380
  return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
381
 
 
382
  audio = AudioSegment.from_file(audio_path)
383
  temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
384
  audio.export(temp_wav, format="wav")
385
 
386
  try:
 
387
  diarization = diarize_pipeline(temp_wav)
388
 
389
  result = whisper.transcribe(temp_wav)
390
- segments = []
391
 
 
392
  for turn, _, speaker in diarization.itertracks(yield_label=True):
393
  text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
394
  segments.append({
@@ -492,8 +462,8 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
492
  description="Convert voice to text and edit it before exporting again."
493
  )
494
 
495
- # --- Voice Cloning (Dubbing) ===
496
- with gr.Tab("🎭 Voice Cloning (Dubbing)"):
497
  gr.Interface(
498
  fn=clone_voice,
499
  inputs=[
@@ -502,7 +472,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
502
  gr.Textbox(label="Text to Clone", lines=5)
503
  ],
504
  outputs=gr.Audio(label="Cloned Output", type="filepath"),
505
- title="Replace One Voice With Another",
506
  description="Clone voice from source to target speaker using AI"
507
  )
508
 
@@ -543,7 +513,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
543
  return None, None, None, None
544
 
545
  with gr.Tab("🧾 Auto-Save & Resume"):
546
- gr.Markdown("Save your current state and resume later.")
547
 
548
  action_radio = gr.Radio(["save", "load"], label="Action", value="save")
549
  audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
 
24
  from TTS.api import TTS
25
  import pickle
26
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Suppress warnings
28
  warnings.filterwarnings("ignore")
29
 
 
326
  mixed.export(out_path, format="wav")
327
  return out_path
328
 
329
+ # === Dummy Voice Cloning Tab Works on Hugging Face ===
330
+ def clone_voice(*args):
331
+ return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
  # === Speaker Diarization ("Who Spoke When?") ===
334
  try:
 
344
  print(f"⚠️ Failed to load diarization: {e}")
345
 
346
  def diarize_and_transcribe(audio_path):
347
+ if not diarize_pipeline:
348
  return "⚠️ Diarization pipeline not loaded – check HF token or install pyannote.audio"
349
 
350
+ # Run diarization
351
  audio = AudioSegment.from_file(audio_path)
352
  temp_wav = os.path.join(tempfile.gettempdir(), "diarize.wav")
353
  audio.export(temp_wav, format="wav")
354
 
355
  try:
356
+ from pyannote.audio import Pipeline as DiarizationPipeline
357
  diarization = diarize_pipeline(temp_wav)
358
 
359
  result = whisper.transcribe(temp_wav)
 
360
 
361
+ segments = []
362
  for turn, _, speaker in diarization.itertracks(yield_label=True):
363
  text = " ".join([seg["text"] for seg in result["segments"] if seg["start"] >= turn.start and seg["end"] <= turn.end])
364
  segments.append({
 
462
  description="Convert voice to text and edit it before exporting again."
463
  )
464
 
465
+ # --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
466
+ with gr.Tab("🎭 Voice Cloning (Local Only)"):
467
  gr.Interface(
468
  fn=clone_voice,
469
  inputs=[
 
472
  gr.Textbox(label="Text to Clone", lines=5)
473
  ],
474
  outputs=gr.Audio(label="Cloned Output", type="filepath"),
475
+ title="Replace One Voice With Another (Local Only)",
476
  description="Clone voice from source to target speaker using AI"
477
  )
478
 
 
513
  return None, None, None, None
514
 
515
  with gr.Tab("🧾 Auto-Save & Resume"):
516
+ gr.Markdown("Save your current state and resume editing later.")
517
 
518
  action_radio = gr.Radio(["save", "load"], label="Action", value="save")
519
  audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")