File size: 2,908 Bytes
f2a547a
 
 
 
 
 
 
c8c664f
f2a547a
 
 
 
1bce0f9
 
 
f2a547a
 
60761c3
 
 
f2a547a
 
 
 
 
 
 
 
 
 
 
 
60761c3
 
f2a547a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bce0f9
 
 
 
 
f2a547a
5202de8
c369440
f2a547a
 
 
 
5202de8
 
f2a547a
 
 
 
c8c664f
 
c369440
c8c664f
f2a547a
 
 
 
5202de8
 
f2a547a
 
 
 
 
5202de8
f2a547a
c8c664f
f2a547a
 
 
 
c8c664f
f2a547a
 
 
 
c369440
f2a547a
c369440
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from TTS.api import TTS
from bs4 import BeautifulSoup
import requests
import streamlit as st
import tempfile
import os
import json
import datetime

with open('config.json', 'r') as f:
    config = json.load(f)

APP_NAME = config['APP_NAME']
APP_LOGO = config['APP_LOGO']
APP_DESCRIPTION = config['APP_DESCRIPTION']
LANGUAGES_URL = config['LANGUAGES_URL']

def contains_only_ascii(input_string):
    return all(ord(char) < 128 for char in input_string)

def get_iso_languages():
    response = requests.get(LANGUAGES_URL)
    soup = BeautifulSoup(response.text, 'html.parser')

    p_tags = soup.find_all('p')

    iso_language_dict = {}

    for p_tag in p_tags[1:]:  # Skipping the first <p> which contains the header
        parts = p_tag.get_text().split()
        if len(parts) == 2:
            iso_code, language_name = parts
            if contains_only_ascii(language_name):
                iso_language_dict[language_name] = iso_code

    return iso_language_dict

def create_temp_file(input_wav):
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    temp_file.write(input_wav.read())
    return temp_file

def remove_temp_file(temp_file):
    temp_file.close()
    os.remove(temp_file.name)

def update_progress(percent, text):
    progress_bar.progress(percent)
    status_text.text(text)

iso_languages = get_iso_languages()
languages = list(iso_languages.keys())

st.set_page_config(page_title=APP_NAME)
st.title(APP_NAME)
st.image(APP_LOGO, use_column_width=True)
st.markdown(APP_DESCRIPTION)

language = st.selectbox('Select a language', languages)
prompt = st.text_input('Enter your prompt')
input_wav = st.file_uploader("Upload a WAV file", type=["mp3"])

if input_wav:
    if not input_wav or input_wav is None:
        st.error('Please upload wav input audio')
    elif not prompt:
        st.error('Please write prompt')
    else:
        progress_bar = st.progress(0)
        status_text = st.empty()

        current_datetime = datetime.datetime.now()
        formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S")
        output_filename = f"recording_{formatted_datetime}.mp3"

        temp_file = create_temp_file(input_wav)

        iso_code = iso_languages[language]

        print(f'Language: {language}, prompt: {prompt}')

        update_progress(0, 'Loading TTS model...')
        api = TTS(f"tts_models/{iso_code}/fairseq/vits")
        
        update_progress(50, 'Generating audio...')
        api.tts_with_vc_to_file(
            prompt,
            speaker_wav=temp_file.name,
            file_path=output_filename
        )

        remove_temp_file(temp_file)

        audio_file = open(output_filename, 'rb')
        audio_bytes = audio_file.read()

        update_progress(100, 'Audio generated successfully!')

        st.audio(audio_bytes, format='audio/mp3')

        st.download_button('Download mp3', data=audio_bytes, file_name='output.mp3')