pradeep4321 commited on
Commit
b2cfb3e
·
verified ·
1 Parent(s): f4be530

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +27 -17
src/app.py CHANGED
@@ -2,14 +2,17 @@ import streamlit as st
2
  from docx import Document
3
  from PyPDF2 import PdfReader
4
  from io import BytesIO
5
- from TTS.api import TTS
6
- from scipy.io.wavfile import write
7
- import numpy as np
 
8
 
9
- # Load offline TTS model once
10
  @st.cache_resource
11
- def load_tts_model():
12
- return TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 
 
13
 
14
  def convert_docx_to_text(docx_file):
15
  doc = Document(docx_file)
@@ -19,23 +22,26 @@ def convert_pdf_to_text(pdf_file):
19
  reader = PdfReader(pdf_file)
20
  return "\n".join([page.extract_text() or '' for page in reader.pages])
21
 
22
- def text_to_speech(text):
23
- tts = load_tts_model()
24
- wav = tts.tts(text)
 
 
 
25
  buffer = BytesIO()
26
- write(buffer, 22050, np.array(wav)) # Save as WAV
27
  buffer.seek(0)
28
  return buffer
29
 
30
- def get_download_link(audio_data, filename="output.wav"):
31
- b64 = st.base64.b64encode(audio_data.getvalue()).decode()
32
  href = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">Download {filename}</a>'
33
  return href
34
 
35
  def main():
36
- st.title("Text to Speech Converter (Offline - Hugging Face Safe)")
37
 
38
- uploaded_file = st.file_uploader("Upload a text, docx, or pdf file", type=["txt", "docx", "pdf"])
39
 
40
  if uploaded_file:
41
  ext = uploaded_file.name.split('.')[-1].lower()
@@ -47,15 +53,19 @@ def main():
47
  elif ext == 'pdf':
48
  text = convert_pdf_to_text(uploaded_file)
49
  else:
50
- st.error("Unsupported file format")
51
  return
52
 
53
  if not text.strip():
54
  st.warning("No readable text found.")
55
  return
56
 
57
- with st.spinner("Generating speech..."):
58
- audio_buffer = text_to_speech(text)
 
 
 
 
59
 
60
  st.audio(audio_buffer, format="audio/wav")
61
  st.markdown(get_download_link(audio_buffer), unsafe_allow_html=True)
 
2
  from docx import Document
3
  from PyPDF2 import PdfReader
4
  from io import BytesIO
5
+ import torch
6
+ import torchaudio
7
+ import soundfile as sf
8
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
9
 
10
+ # Load TTS model and processor
11
  @st.cache_resource
12
+ def load_model():
13
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("espnet/kan-bayashi_ljspeech_vits")
14
+ processor = AutoProcessor.from_pretrained("espnet/kan-bayashi_ljspeech_vits")
15
+ return model, processor
16
 
17
  def convert_docx_to_text(docx_file):
18
  doc = Document(docx_file)
 
22
  reader = PdfReader(pdf_file)
23
  return "\n".join([page.extract_text() or '' for page in reader.pages])
24
 
25
+ def text_to_speech(text, model, processor):
26
+ inputs = processor(text, return_tensors="pt")
27
+ with torch.no_grad():
28
+ speech = model.generate(**inputs)
29
+
30
+ waveform = speech.squeeze().cpu().numpy()
31
  buffer = BytesIO()
32
+ sf.write(buffer, waveform, 22050, format="WAV")
33
  buffer.seek(0)
34
  return buffer
35
 
36
+ def get_download_link(audio_buffer, filename="output.wav"):
37
+ b64 = st.base64.b64encode(audio_buffer.getvalue()).decode()
38
  href = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">Download {filename}</a>'
39
  return href
40
 
41
  def main():
42
+ st.title("Text to Speech with Transformers (Offline Hugging Face)")
43
 
44
+ uploaded_file = st.file_uploader("Upload a TXT, DOCX, or PDF file", type=["txt", "docx", "pdf"])
45
 
46
  if uploaded_file:
47
  ext = uploaded_file.name.split('.')[-1].lower()
 
53
  elif ext == 'pdf':
54
  text = convert_pdf_to_text(uploaded_file)
55
  else:
56
+ st.error("Unsupported file type")
57
  return
58
 
59
  if not text.strip():
60
  st.warning("No readable text found.")
61
  return
62
 
63
+ st.subheader("Extracted Text:")
64
+ st.write(text[:1000] + ("..." if len(text) > 1000 else ""))
65
+
66
+ with st.spinner("Generating audio..."):
67
+ model, processor = load_model()
68
+ audio_buffer = text_to_speech(text, model, processor)
69
 
70
  st.audio(audio_buffer, format="audio/wav")
71
  st.markdown(get_download_link(audio_buffer), unsafe_allow_html=True)