Hammad112 commited on
Commit
31beb62
·
verified ·
1 Parent(s): 410e3d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -4,6 +4,9 @@ from scipy.io.wavfile import write
4
  import tempfile
5
  import os
6
  from pydub import AudioSegment
 
 
 
7
 
8
  # Initialize model configuration
9
  model_config = outetts.HFModelConfig_v1(
@@ -21,6 +24,7 @@ st.write("Enter text below to generate speech.")
21
  # Sidebar for reference voice
22
  st.sidebar.title("Voice Cloning")
23
  reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
 
24
 
25
  # Function to convert audio to WAV format
26
  def convert_to_wav(audio_file):
@@ -29,28 +33,51 @@ def convert_to_wav(audio_file):
29
  audio.export(temp_audio.name, format="wav")
30
  return temp_audio.name
31
 
32
- if reference_audio:
33
  ref_audio_path = convert_to_wav(reference_audio)
 
 
 
 
34
  else:
35
- ref_audio_path = None
36
 
37
  # Recording functionality
38
- if ref_audio_path is None:
 
 
 
 
 
 
 
 
 
 
 
 
39
  st.sidebar.write("Or record your voice below:")
40
  if st.sidebar.button("Record Voice"):
41
- st.sidebar.warning("Recording functionality not implemented yet. Please upload a file.")
 
 
 
 
 
 
 
42
 
43
  text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
44
 
45
  if st.button("Generate Speech"):
46
  with st.spinner("Generating audio..."):
47
- # Generate speech with reference audio
48
  output = interface.generate(
49
  text=text_input,
50
  temperature=0.1,
51
  repetition_penalty=1.1,
52
  max_length=4096,
53
- speaker_wav=ref_audio_path if ref_audio_path else None
54
  )
55
 
56
  # Save the synthesized speech to a file
@@ -62,5 +89,5 @@ if st.button("Generate Speech"):
62
  st.success("Speech generated successfully!")
63
 
64
  # Clean up temporary files
65
- if ref_audio_path:
66
  os.remove(ref_audio_path)
 
4
  import tempfile
5
  import os
6
  from pydub import AudioSegment
7
+ import sounddevice as sd
8
+ import wave
9
+ import numpy as np
10
 
11
  # Initialize model configuration
12
  model_config = outetts.HFModelConfig_v1(
 
24
  # Sidebar for reference voice
25
  st.sidebar.title("Voice Cloning")
26
  reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
27
+ transcript = st.sidebar.text_area("Transcription of the reference audio")
28
 
29
  # Function to convert audio to WAV format
30
  def convert_to_wav(audio_file):
 
33
  audio.export(temp_audio.name, format="wav")
34
  return temp_audio.name
35
 
36
+ if reference_audio and transcript:
37
  ref_audio_path = convert_to_wav(reference_audio)
38
+ # Create speaker profile
39
+ speaker = interface.create_speaker(ref_audio_path, transcript)
40
+ # Save the speaker profile
41
+ interface.save_speaker(speaker, "speaker.json")
42
  else:
43
+ speaker = None
44
 
45
  # Recording functionality
46
+ def record_audio(duration=5, samplerate=44100):
47
+ st.sidebar.write("Recording...")
48
+ recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16)
49
+ sd.wait()
50
+ temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
51
+ with wave.open(temp_audio_path, "wb") as wf:
52
+ wf.setnchannels(1)
53
+ wf.setsampwidth(2)
54
+ wf.setframerate(samplerate)
55
+ wf.writeframes(recording.tobytes())
56
+ return temp_audio_path
57
+
58
+ if not speaker:
59
  st.sidebar.write("Or record your voice below:")
60
  if st.sidebar.button("Record Voice"):
61
+ ref_audio_path = record_audio()
62
+ st.sidebar.success("Recording complete!")
63
+ transcript = st.sidebar.text_area("Transcription of the recorded audio")
64
+ if transcript:
65
+ # Create speaker profile from recorded audio
66
+ speaker = interface.create_speaker(ref_audio_path, transcript)
67
+ # Save the speaker profile
68
+ interface.save_speaker(speaker, "speaker.json")
69
 
70
  text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
71
 
72
  if st.button("Generate Speech"):
73
  with st.spinner("Generating audio..."):
74
+ # Generate speech with or without the speaker profile
75
  output = interface.generate(
76
  text=text_input,
77
  temperature=0.1,
78
  repetition_penalty=1.1,
79
  max_length=4096,
80
+ speaker=speaker
81
  )
82
 
83
  # Save the synthesized speech to a file
 
89
  st.success("Speech generated successfully!")
90
 
91
  # Clean up temporary files
92
+ if reference_audio:
93
  os.remove(ref_audio_path)