pradeep4321 commited on
Commit
374b25a
·
verified ·
1 Parent(s): d22d3f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -80
app.py CHANGED
@@ -1,80 +1,63 @@
1
- import os
2
- import tempfile
3
- import pyttsx3
4
- from docx import Document
5
- from PyPDF2 import PdfReader
6
- import streamlit as st
7
- import base64 # Add this import
8
- from io import BytesIO
9
-
10
- def text_to_speech(text, output_file):
11
- engine = pyttsx3.init()
12
- engine.setProperty('rate', 150)
13
- voices = engine.getProperty('voices')
14
- engine.setProperty('voice', voices[1].id)
15
- engine.save_to_file(text, output_file)
16
- engine.runAndWait()
17
-
18
- def convert_docx_to_text(docx_file):
19
- doc = Document(docx_file)
20
- text = ""
21
- for paragraph in doc.paragraphs:
22
- text += paragraph.text + "\n"
23
- return text
24
-
25
- def convert_pdf_to_text(pdf_file):
26
- text = ""
27
- pdf_reader = PdfReader(pdf_file)
28
- for page in pdf_reader.pages:
29
- text += page.extract_text()
30
- return text
31
-
32
- def main():
33
- st.title("Text to Speech Converter")
34
-
35
- uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
36
-
37
- if uploaded_file:
38
- try:
39
- # Save uploaded file content to a temporary file
40
- temp_file = tempfile.NamedTemporaryFile(delete=False)
41
- temp_file.write(uploaded_file.read())
42
- temp_file.close()
43
-
44
- file_extension = uploaded_file.name.split('.')[-1]
45
-
46
- if file_extension.lower() == 'txt':
47
- with open(temp_file.name, 'r', encoding='utf-8') as txt_file:
48
- text = txt_file.read()
49
- elif file_extension.lower() == 'docx':
50
- text = convert_docx_to_text(temp_file.name)
51
- elif file_extension.lower() == 'pdf':
52
- text = convert_pdf_to_text(temp_file.name)
53
- else:
54
- st.error("Unsupported file format")
55
- return
56
-
57
- with st.spinner("Converting text to speech..."):
58
- output_audio_file = "output.mp3"
59
- text_to_speech(text, output_audio_file)
60
-
61
- st.audio(output_audio_file, format="audio/mp3", start_time=0)
62
-
63
- # Provide a download link for the audio file
64
- with open(output_audio_file, 'rb') as audio_file:
65
- audio_bytes = audio_file.read()
66
- st.markdown(get_binary_file_downloader_html(audio_bytes, output_audio_file), unsafe_allow_html=True)
67
-
68
- except Exception as e:
69
- st.error(f"An error occurred: {str(e)}")
70
-
71
- # Function to create a download link
72
- def get_binary_file_downloader_html(bin_file, file_label='File'):
73
- with st.spinner("Preparing download link..."):
74
- data = bin_file
75
- b64 = base64.b64encode(data).decode()
76
- href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_label}" target="_blank">Download {file_label}</a>'
77
- return href
78
-
79
- if __name__ == '__main__':
80
- main()
 
1
+ import streamlit as st
2
+ from TTS.api import TTS
3
+ import tempfile
4
+ import os
5
+
6
+ # Initialize TTS model (only once)
7
+ @st.cache_resource
8
+ def load_tts_model():
9
+ return TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
10
+
11
+ tts = load_tts_model()
12
+
13
+ # App title
14
+ st.title("🔊 Voice Cloning with XTTS v2")
15
+
16
+ # Text input
17
+ text_input = st.text_area("Enter the text you want to synthesize", height=150)
18
+
19
+ # Speaker file uploader
20
+ speaker_file = st.file_uploader("Upload a speaker WAV file", type=["wav"])
21
+
22
+ # Button to generate
23
+ if st.button("Generate Speech"):
24
+ if not text_input:
25
+ st.error("Please enter text.")
26
+ elif not speaker_file:
27
+ st.error("Please upload a speaker WAV file.")
28
+ else:
29
+ try:
30
+ with st.spinner("Generating voice..."):
31
+ # Save uploaded speaker audio temporarily
32
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as speaker_temp:
33
+ speaker_temp.write(speaker_file.read())
34
+ speaker_path = speaker_temp.name
35
+
36
+ # Temporary file to store output
37
+ output_path = os.path.join(tempfile.gettempdir(), "output.wav")
38
+
39
+ # Generate speech
40
+ tts.tts_to_file(
41
+ text=text_input,
42
+ file_path=output_path,
43
+ speaker_wav=speaker_path,
44
+ language="en"
45
+ )
46
+
47
+ # Playback
48
+ st.audio(output_path, format="audio/wav")
49
+
50
+ # Download link
51
+ with open(output_path, "rb") as f:
52
+ st.download_button(
53
+ label="Download Audio",
54
+ data=f,
55
+ file_name="cloned_voice.wav",
56
+ mime="audio/wav"
57
+ )
58
+
59
+ # Clean up
60
+ os.remove(speaker_path)
61
+
62
+ except Exception as e:
63
+ st.error(f"An error occurred: {e}")