File size: 2,727 Bytes
8751284
76a7f8a
 
 
 
 
 
 
61f242d
 
11f1864
76a7f8a
 
11f1864
76a7f8a
 
 
 
8751284
 
 
 
 
11f1864
 
 
f140b92
a15612e
da2b4c1
61f242d
76a7f8a
 
 
 
 
c50ad1a
76a7f8a
 
 
 
 
 
 
11f1864
 
76a7f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11f1864
 
 
 
e522a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92e10eb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import spaces
import PyPDF2 
from docx import Document
import os
from unittest.mock import patch
from TTS.api import TTS
import torch
import gradio as gr
from torchaudio import load

print("completed imported packages")
os.environ["COQUI_TOS_AGREED"] = "1"


# Function to always return 'y'
def always_yes(*args, **kwargs):
    return 'y'

# Patch the input function to always return 'y'
with patch('builtins.input', always_yes):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=False).to(device)



print("loded the model")
sa = 'sampleaudio.wav'

@spaces.GPU(enable_queue=True)

def text_to_speech(text):
    save_path = "outputz.wav"
    try:
        tts.tts_to_file(
            text,
            speaker_wav=sa,
            language="en",
            file_path=save_path,
            split_sentences=True,
        )
        return save_path
    except Exception as e:
        return str(e)
        
print("first function")

def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = "".join([page.extract_text() for page in reader.pages])
    return text

def extract_text_from_doc(file_path):
    doc = Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs])

def file_to_audio(file_path):
    file_type = file_path.split('.')[-1]
    if file_type == 'pdf':
        text = extract_text_from_pdf(file_path)
    elif file_type in ['doc', 'docx']:
        text = extract_text_from_doc(file_path)
    else:
        return "Unsupported file format"

    return text_to_speech(text)
def handle_inputs(text, file):
    if text and file:
        return None, "Please provide only one input at a time: text or file."
    elif text:
        audio_path = text_to_speech(text)
        return audio_path, "Thanks for your text input"
    elif file:
        audio_path = file_to_audio(file)
        return audio_path, 'Thanks for your File'
    else:
        return None, "No input provided." 


print("last  function")



with gr.Blocks() as demo:
    gr.Markdown("## Welcome to Text to Speech 📝 ➡️ 🎧")

    with gr.Row():
        text_input = gr.Textbox(label="Text to Speak", placeholder="Enter text...")
        file_input = gr.File(type="filepath", label="Upload a PDF or Word file", file_count="single")

    submit_btn = gr.Button("Generate Voice")

    output_audio = gr.Audio(label="Audio Output", type="filepath")
    output_message = gr.Textbox(label="Message")

    submit_btn.click(fn=handle_inputs, inputs=[text_input, file_input], outputs=[output_audio, output_message])

# Launch the Gradio app
demo.launch()