Hammad112 commited on
Commit
084f05a
·
verified ·
1 Parent(s): 31beb62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -39
app.py CHANGED
@@ -4,9 +4,6 @@ from scipy.io.wavfile import write
4
  import tempfile
5
  import os
6
  from pydub import AudioSegment
7
- import sounddevice as sd
8
- import wave
9
- import numpy as np
10
 
11
  # Initialize model configuration
12
  model_config = outetts.HFModelConfig_v1(
@@ -24,7 +21,6 @@ st.write("Enter text below to generate speech.")
24
  # Sidebar for reference voice
25
  st.sidebar.title("Voice Cloning")
26
  reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
27
- transcript = st.sidebar.text_area("Transcription of the reference audio")
28
 
29
  # Function to convert audio to WAV format
30
  def convert_to_wav(audio_file):
@@ -33,51 +29,21 @@ def convert_to_wav(audio_file):
33
  audio.export(temp_audio.name, format="wav")
34
  return temp_audio.name
35
 
36
- if reference_audio and transcript:
37
  ref_audio_path = convert_to_wav(reference_audio)
38
- # Create speaker profile
39
- speaker = interface.create_speaker(ref_audio_path, transcript)
40
- # Save the speaker profile
41
- interface.save_speaker(speaker, "speaker.json")
42
  else:
43
- speaker = None
44
-
45
- # Recording functionality
46
- def record_audio(duration=5, samplerate=44100):
47
- st.sidebar.write("Recording...")
48
- recording = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16)
49
- sd.wait()
50
- temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
51
- with wave.open(temp_audio_path, "wb") as wf:
52
- wf.setnchannels(1)
53
- wf.setsampwidth(2)
54
- wf.setframerate(samplerate)
55
- wf.writeframes(recording.tobytes())
56
- return temp_audio_path
57
-
58
- if not speaker:
59
- st.sidebar.write("Or record your voice below:")
60
- if st.sidebar.button("Record Voice"):
61
- ref_audio_path = record_audio()
62
- st.sidebar.success("Recording complete!")
63
- transcript = st.sidebar.text_area("Transcription of the recorded audio")
64
- if transcript:
65
- # Create speaker profile from recorded audio
66
- speaker = interface.create_speaker(ref_audio_path, transcript)
67
- # Save the speaker profile
68
- interface.save_speaker(speaker, "speaker.json")
69
 
70
  text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
71
 
72
  if st.button("Generate Speech"):
73
  with st.spinner("Generating audio..."):
74
- # Generate speech with or without the speaker profile
75
  output = interface.generate(
76
  text=text_input,
77
  temperature=0.1,
78
  repetition_penalty=1.1,
79
- max_length=4096,
80
- speaker=speaker
81
  )
82
 
83
  # Save the synthesized speech to a file
@@ -89,5 +55,5 @@ if st.button("Generate Speech"):
89
  st.success("Speech generated successfully!")
90
 
91
  # Clean up temporary files
92
- if reference_audio:
93
  os.remove(ref_audio_path)
 
4
  import tempfile
5
  import os
6
  from pydub import AudioSegment
 
 
 
7
 
8
  # Initialize model configuration
9
  model_config = outetts.HFModelConfig_v1(
 
21
  # Sidebar for reference voice
22
  st.sidebar.title("Voice Cloning")
23
  reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])
 
24
 
25
  # Function to convert audio to WAV format
26
  def convert_to_wav(audio_file):
 
29
  audio.export(temp_audio.name, format="wav")
30
  return temp_audio.name
31
 
32
+ if reference_audio:
33
  ref_audio_path = convert_to_wav(reference_audio)
 
 
 
 
34
  else:
35
+ ref_audio_path = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")
38
 
39
  if st.button("Generate Speech"):
40
  with st.spinner("Generating audio..."):
41
+ # Generate speech with reference audio
42
  output = interface.generate(
43
  text=text_input,
44
  temperature=0.1,
45
  repetition_penalty=1.1,
46
+ max_length=4096
 
47
  )
48
 
49
  # Save the synthesized speech to a file
 
55
  st.success("Speech generated successfully!")
56
 
57
  # Clean up temporary files
58
+ if ref_audio_path:
59
  os.remove(ref_audio_path)