Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,53 +1,14 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
from transformers import pipeline
|
4 |
-
import gradio as gr
|
5 |
-
import tempfile
|
6 |
-
from typing import Optional
|
7 |
-
import numpy as np
|
8 |
from TTS.utils.manage import ModelManager
|
9 |
from TTS.utils.synthesizer import Synthesizer
|
10 |
-
import
|
11 |
-
import
|
12 |
-
import huggingface_hub
|
13 |
-
from huggingface_hub import Repository, hf_hub_download, upload_file
|
14 |
-
from datetime import datetime
|
15 |
-
|
16 |
-
# π Setup dataset repo π
|
17 |
-
# Created new dataset as awacke1/MindfulStory.csv
|
18 |
-
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
|
19 |
-
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
|
20 |
-
DATA_FILENAME = "MindfulStory.csv"
|
21 |
-
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
22 |
-
HF_TOKEN = os.environ.get("HF_TOKEN")
|
23 |
-
|
24 |
-
# π
Oops! Try downloading the dataset (We hope it works!)
|
25 |
-
try:
|
26 |
-
hf_hub_download(
|
27 |
-
repo_id=DATASET_REPO_ID,
|
28 |
-
filename=DATA_FILENAME,
|
29 |
-
cache_dir="data",
|
30 |
-
force_filename=DATA_FILENAME
|
31 |
-
)
|
32 |
-
except:
|
33 |
-
print("π¬ File not found, weβll act like itβs not a problem...")
|
34 |
-
|
35 |
-
# π§ AI Memory: Because forgetting is for humans π€
|
36 |
-
def AIMemory(name: str, message: str):
|
37 |
-
if name and message:
|
38 |
-
with open(DATA_FILE, "a") as csvfile:
|
39 |
-
writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
|
40 |
-
writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
|
41 |
-
commit_url = repo.push_to_hub()
|
42 |
-
return {"name": name, "message": message, "time": str(datetime.now())}
|
43 |
-
|
44 |
-
# π Repository setup! Letβs clone like pros π¨βπ»
|
45 |
-
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
|
46 |
|
47 |
-
#
|
48 |
-
|
|
|
49 |
|
50 |
-
# π€ Set up TTS Models. Letβs find that sweet robotic voice!
|
51 |
MODEL_NAMES = [
|
52 |
"en/ljspeech/tacotron2-DDC",
|
53 |
"en/ljspeech/glow-tts",
|
@@ -58,97 +19,49 @@ MODEL_NAMES = [
|
|
58 |
"de/thorsten/tacotron2-DCA",
|
59 |
]
|
60 |
|
61 |
-
# π οΈ Use Model Manager to load vocoders (Fancy tech magic here)
|
62 |
-
MODELS = {}
|
63 |
-
manager = ModelManager()
|
64 |
for MODEL_NAME in MODEL_NAMES:
|
65 |
print(f"π Downloading {MODEL_NAME}... because waiting is fun!")
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
#
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
sentiment = classifier(text)[0]["label"]
|
94 |
-
return sentiment
|
95 |
-
|
96 |
-
# π¦ Saving it for later: Store this priceless info!
|
97 |
-
def upsert(text):
|
98 |
-
date_time = str(datetime.datetime.today())
|
99 |
-
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
|
100 |
-
doc_ref.set({
|
101 |
-
u'firefield': 'Recognize Speech',
|
102 |
-
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
|
103 |
-
u'last': text,
|
104 |
-
u'born': date_time,
|
105 |
-
})
|
106 |
-
saved = select('TTS-STT', date_time)
|
107 |
-
return saved
|
108 |
-
|
109 |
-
# π Retrieve all records: Gotta catch βem all!
|
110 |
-
def selectall(text):
|
111 |
-
docs = db.collection('Text2SpeechSentimentSave').stream()
|
112 |
-
doclist = ''
|
113 |
-
for doc in docs:
|
114 |
-
r = (f'{doc.id} => {doc.to_dict()}')
|
115 |
-
doclist += r
|
116 |
-
return doclist
|
117 |
-
|
118 |
-
# π£οΈ Text to Speech (Because speaking is fun, but robots do it better)
|
119 |
def tts(text: str, model_name: str):
|
120 |
print(text, model_name)
|
121 |
synthesizer = MODELS.get(model_name, None)
|
|
|
122 |
if synthesizer is None:
|
123 |
-
raise NameError("
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
# ποΈ Gradio UI with Emoji and Fun Comments π
|
131 |
-
demo = gr.Blocks()
|
132 |
-
|
133 |
-
with demo:
|
134 |
-
# π€ Microphone input to capture your golden voice π€
|
135 |
-
audio_file = gr.Audio(source="microphone", type="filepath")
|
136 |
-
|
137 |
-
# π Textbox to display transcribed text π
|
138 |
-
text = gr.Textbox(label="Speech to Text")
|
139 |
-
|
140 |
-
# ποΈ Radio input to choose the best Text to Speech model ποΈ
|
141 |
-
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
|
142 |
-
|
143 |
-
# π Audio player to play back the robotβs voice π
|
144 |
-
audio = gr.Audio(label="Output", interactive=False)
|
145 |
-
|
146 |
-
# π Buttons for all your needs π
|
147 |
-
b1 = gr.Button("π€ Recognize Speech")
|
148 |
-
b5 = gr.Button("π Read It Back Aloud")
|
149 |
-
|
150 |
-
# π±οΈ Click buttons to perform actions! π±οΈ
|
151 |
-
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
152 |
-
b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
|
153 |
-
|
154 |
-
demo.launch(share=True)
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
|
|
|
|
|
|
|
|
|
|
3 |
from TTS.utils.manage import ModelManager
|
4 |
from TTS.utils.synthesizer import Synthesizer
|
5 |
+
import tempfile
|
6 |
+
from typing import Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# π οΈ Use Model Manager to load vocoders
|
9 |
+
MODELS = {}
|
10 |
+
manager = ModelManager()
|
11 |
|
|
|
12 |
MODEL_NAMES = [
|
13 |
"en/ljspeech/tacotron2-DDC",
|
14 |
"en/ljspeech/glow-tts",
|
|
|
19 |
"de/thorsten/tacotron2-DCA",
|
20 |
]
|
21 |
|
|
|
|
|
|
|
22 |
for MODEL_NAME in MODEL_NAMES:
|
23 |
print(f"π Downloading {MODEL_NAME}... because waiting is fun!")
|
24 |
+
|
25 |
+
try:
|
26 |
+
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
27 |
+
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
28 |
+
vocoder_path = None
|
29 |
+
vocoder_config_path = None
|
30 |
+
|
31 |
+
if vocoder_name is not None:
|
32 |
+
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
33 |
+
|
34 |
+
# π§ββοΈ Load the synthesizer with vocoder and safe loading of weights
|
35 |
+
synthesizer = Synthesizer(
|
36 |
+
model_path,
|
37 |
+
config_path,
|
38 |
+
None,
|
39 |
+
vocoder_path,
|
40 |
+
vocoder_config_path,
|
41 |
+
use_cuda=False # Make sure you're not forcing CUDA unless needed
|
42 |
+
)
|
43 |
+
|
44 |
+
MODELS[MODEL_NAME] = synthesizer
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(f"π¬ Failed to load model {MODEL_NAME}: {str(e)}")
|
48 |
+
continue
|
49 |
+
|
50 |
+
# π£οΈ Text to Speech (because speaking is fun, but robots do it better)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def tts(text: str, model_name: str):
|
52 |
print(text, model_name)
|
53 |
synthesizer = MODELS.get(model_name, None)
|
54 |
+
|
55 |
if synthesizer is None:
|
56 |
+
raise NameError("Model not found, check if it's loaded properly!")
|
57 |
|
58 |
+
try:
|
59 |
+
wavs = synthesizer.tts(text)
|
60 |
+
|
61 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
62 |
+
synthesizer.save_wav(wavs, fp)
|
63 |
+
return fp.name
|
64 |
+
except Exception as e:
|
65 |
+
print(f"π¬ Error generating speech: {str(e)}")
|
66 |
+
return None
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|