Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,40 +2,37 @@ import streamlit as st
|
|
2 |
import datetime
|
3 |
from transformers import pipeline
|
4 |
import gradio as gr
|
5 |
-
|
6 |
import tempfile
|
7 |
from typing import Optional
|
8 |
import numpy as np
|
9 |
from TTS.utils.manage import ModelManager
|
10 |
from TTS.utils.synthesizer import Synthesizer
|
11 |
-
|
12 |
-
# PersistDataset -----
|
13 |
import os
|
14 |
import csv
|
15 |
-
import gradio as gr
|
16 |
-
from gradio import inputs, outputs
|
17 |
import huggingface_hub
|
18 |
from huggingface_hub import Repository, hf_hub_download, upload_file
|
19 |
from datetime import datetime
|
20 |
|
21 |
-
#
|
|
|
22 |
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
|
23 |
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
|
24 |
DATA_FILENAME = "MindfulStory.csv"
|
25 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
26 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
27 |
|
28 |
-
#
|
29 |
try:
|
30 |
hf_hub_download(
|
31 |
repo_id=DATASET_REPO_ID,
|
32 |
filename=DATA_FILENAME,
|
33 |
-
cache_dir=
|
34 |
force_filename=DATA_FILENAME
|
35 |
)
|
36 |
except:
|
37 |
-
print("
|
38 |
-
|
|
|
39 |
def AIMemory(name: str, message: str):
|
40 |
if name and message:
|
41 |
with open(DATA_FILE, "a") as csvfile:
|
@@ -44,16 +41,13 @@ def AIMemory(name: str, message: str):
|
|
44 |
commit_url = repo.push_to_hub()
|
45 |
return {"name": name, "message": message, "time": str(datetime.now())}
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
# Set up cloned dataset from repo for operations
|
51 |
-
repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
|
52 |
|
53 |
-
#
|
54 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
55 |
|
56 |
-
#
|
57 |
MODEL_NAMES = [
|
58 |
"en/ljspeech/tacotron2-DDC",
|
59 |
"en/ljspeech/glow-tts",
|
@@ -64,11 +58,11 @@ MODEL_NAMES = [
|
|
64 |
"de/thorsten/tacotron2-DCA",
|
65 |
]
|
66 |
|
67 |
-
# Use Model Manager to load vocoders
|
68 |
MODELS = {}
|
69 |
manager = ModelManager()
|
70 |
for MODEL_NAME in MODEL_NAMES:
|
71 |
-
print(f"
|
72 |
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
73 |
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
74 |
vocoder_path = None
|
@@ -81,80 +75,80 @@ for MODEL_NAME in MODEL_NAMES:
|
|
81 |
)
|
82 |
MODELS[MODEL_NAME] = synthesizer
|
83 |
|
84 |
-
#
|
85 |
def transcribe(audio):
|
86 |
text = asr(audio)["text"]
|
87 |
return text
|
88 |
|
89 |
-
#
|
90 |
classifier = pipeline("text-classification")
|
91 |
|
92 |
-
|
93 |
def speech_to_text(speech):
|
94 |
text = asr(speech)["text"]
|
95 |
-
#rMem = AIMemory("STT", text)
|
96 |
return text
|
97 |
|
|
|
98 |
def text_to_sentiment(text):
|
99 |
sentiment = classifier(text)[0]["label"]
|
100 |
-
#rMem = AIMemory(text, sentiment)
|
101 |
return sentiment
|
102 |
|
|
|
103 |
def upsert(text):
|
104 |
-
date_time =str(datetime.datetime.today())
|
105 |
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
|
106 |
-
doc_ref.set({
|
|
|
|
|
|
|
|
|
|
|
107 |
saved = select('TTS-STT', date_time)
|
108 |
return saved
|
109 |
-
|
110 |
-
|
111 |
-
doc_ref = db.collection(collection).document(document)
|
112 |
-
doc = doc_ref.get()
|
113 |
-
docid = ("The id is: ", doc.id)
|
114 |
-
contents = ("The contents are: ", doc.to_dict())
|
115 |
-
return contents
|
116 |
-
|
117 |
def selectall(text):
|
118 |
docs = db.collection('Text2SpeechSentimentSave').stream()
|
119 |
-
doclist=''
|
120 |
for doc in docs:
|
121 |
-
r=(f'{doc.id} => {doc.to_dict()}')
|
122 |
doclist += r
|
123 |
return doclist
|
124 |
-
|
|
|
125 |
def tts(text: str, model_name: str):
|
126 |
print(text, model_name)
|
127 |
synthesizer = MODELS.get(model_name, None)
|
128 |
if synthesizer is None:
|
129 |
-
raise NameError("
|
|
|
130 |
wavs = synthesizer.tts(text)
|
131 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
132 |
synthesizer.save_wav(wavs, fp)
|
133 |
-
|
134 |
-
#rMem = AIMemory("TTS", text + model_name)
|
135 |
-
|
136 |
return fp.name
|
137 |
|
|
|
138 |
demo = gr.Blocks()
|
|
|
139 |
with demo:
|
140 |
-
|
|
|
|
|
|
|
141 |
text = gr.Textbox(label="Speech to Text")
|
142 |
-
|
143 |
-
#
|
144 |
-
|
145 |
-
|
|
|
146 |
audio = gr.Audio(label="Output", interactive=False)
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
155 |
-
|
156 |
-
#b3.click(upsert, inputs=text, outputs=saved)
|
157 |
-
#b4.click(selectall, inputs=text, outputs=savedAll)
|
158 |
-
b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
|
159 |
|
160 |
-
demo.launch(share=True)
|
|
|
2 |
import datetime
|
3 |
from transformers import pipeline
|
4 |
import gradio as gr
|
|
|
5 |
import tempfile
|
6 |
from typing import Optional
|
7 |
import numpy as np
|
8 |
from TTS.utils.manage import ModelManager
|
9 |
from TTS.utils.synthesizer import Synthesizer
|
|
|
|
|
10 |
import os
|
11 |
import csv
|
|
|
|
|
12 |
import huggingface_hub
|
13 |
from huggingface_hub import Repository, hf_hub_download, upload_file
|
14 |
from datetime import datetime
|
15 |
|
16 |
+
# π Setup dataset repo π
|
17 |
+
# Created new dataset as awacke1/MindfulStory.csv
|
18 |
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
|
19 |
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
|
20 |
DATA_FILENAME = "MindfulStory.csv"
|
21 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
22 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
23 |
|
24 |
+
# π
Oops! Try downloading the dataset (We hope it works!)
|
25 |
try:
|
26 |
hf_hub_download(
|
27 |
repo_id=DATASET_REPO_ID,
|
28 |
filename=DATA_FILENAME,
|
29 |
+
cache_dir="data",
|
30 |
force_filename=DATA_FILENAME
|
31 |
)
|
32 |
except:
|
33 |
+
print("π¬ File not found, weβll act like itβs not a problem...")
|
34 |
+
|
35 |
+
# π§ AI Memory: Because forgetting is for humans π€
|
36 |
def AIMemory(name: str, message: str):
|
37 |
if name and message:
|
38 |
with open(DATA_FILE, "a") as csvfile:
|
|
|
41 |
commit_url = repo.push_to_hub()
|
42 |
return {"name": name, "message": message, "time": str(datetime.now())}
|
43 |
|
44 |
+
# π Repository setup! Letβs clone like pros π¨βπ»
|
45 |
+
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
|
|
|
|
|
|
|
46 |
|
47 |
+
# π£οΈ Set up Speech Recognition
|
48 |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
|
49 |
|
50 |
+
# π€ Set up TTS Models. Letβs find that sweet robotic voice!
|
51 |
MODEL_NAMES = [
|
52 |
"en/ljspeech/tacotron2-DDC",
|
53 |
"en/ljspeech/glow-tts",
|
|
|
58 |
"de/thorsten/tacotron2-DCA",
|
59 |
]
|
60 |
|
61 |
+
# π οΈ Use Model Manager to load vocoders (Fancy tech magic here)
|
62 |
MODELS = {}
|
63 |
manager = ModelManager()
|
64 |
for MODEL_NAME in MODEL_NAMES:
|
65 |
+
print(f"π Downloading {MODEL_NAME}... because waiting is fun!")
|
66 |
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
|
67 |
vocoder_name: Optional[str] = model_item["default_vocoder"]
|
68 |
vocoder_path = None
|
|
|
75 |
)
|
76 |
MODELS[MODEL_NAME] = synthesizer
|
77 |
|
78 |
+
# π§ββοΈ Transcribe function: Turning audio into text with a sprinkle of magic!
|
79 |
def transcribe(audio):
|
80 |
text = asr(audio)["text"]
|
81 |
return text
|
82 |
|
83 |
+
# π Text classifier (because we love labeling things, right?)
|
84 |
classifier = pipeline("text-classification")
|
85 |
|
86 |
+
# π€ Speech to Text: Give me your voice, Iβll give you text!
|
87 |
def speech_to_text(speech):
|
88 |
text = asr(speech)["text"]
|
|
|
89 |
return text
|
90 |
|
91 |
+
# π Sentiment Analysis (because even robots care about feelings π)
|
92 |
def text_to_sentiment(text):
|
93 |
sentiment = classifier(text)[0]["label"]
|
|
|
94 |
return sentiment
|
95 |
|
96 |
+
# π¦ Saving it for later: Store this priceless info!
|
97 |
def upsert(text):
|
98 |
+
date_time = str(datetime.datetime.today())
|
99 |
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
|
100 |
+
doc_ref.set({
|
101 |
+
u'firefield': 'Recognize Speech',
|
102 |
+
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
|
103 |
+
u'last': text,
|
104 |
+
u'born': date_time,
|
105 |
+
})
|
106 |
saved = select('TTS-STT', date_time)
|
107 |
return saved
|
108 |
+
|
109 |
+
# π Retrieve all records: Gotta catch βem all!
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
def selectall(text):
|
111 |
docs = db.collection('Text2SpeechSentimentSave').stream()
|
112 |
+
doclist = ''
|
113 |
for doc in docs:
|
114 |
+
r = (f'{doc.id} => {doc.to_dict()}')
|
115 |
doclist += r
|
116 |
return doclist
|
117 |
+
|
118 |
+
# π£οΈ Text to Speech (Because speaking is fun, but robots do it better)
|
119 |
def tts(text: str, model_name: str):
|
120 |
print(text, model_name)
|
121 |
synthesizer = MODELS.get(model_name, None)
|
122 |
if synthesizer is None:
|
123 |
+
raise NameError("π¬ Oops! Model not found.")
|
124 |
+
|
125 |
wavs = synthesizer.tts(text)
|
126 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
127 |
synthesizer.save_wav(wavs, fp)
|
|
|
|
|
|
|
128 |
return fp.name
|
129 |
|
130 |
+
# ποΈ Gradio UI with Emoji and Fun Comments π
|
131 |
demo = gr.Blocks()
|
132 |
+
|
133 |
with demo:
|
134 |
+
# π€ Microphone input to capture your golden voice π€
|
135 |
+
audio_file = gr.Audio(source="microphone", type="filepath")
|
136 |
+
|
137 |
+
# π Textbox to display transcribed text π
|
138 |
text = gr.Textbox(label="Speech to Text")
|
139 |
+
|
140 |
+
# ποΈ Radio input to choose the best Text to Speech model ποΈ
|
141 |
+
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
|
142 |
+
|
143 |
+
# π Audio player to play back the robotβs voice π
|
144 |
audio = gr.Audio(label="Output", interactive=False)
|
145 |
|
146 |
+
# π Buttons for all your needs π
|
147 |
+
b1 = gr.Button("π€ Recognize Speech")
|
148 |
+
b5 = gr.Button("π Read It Back Aloud")
|
149 |
+
|
150 |
+
# π±οΈ Click buttons to perform actions! π±οΈ
|
|
|
151 |
b1.click(speech_to_text, inputs=audio_file, outputs=text)
|
152 |
+
b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
|
|
|
|
|
|
|
153 |
|
154 |
+
demo.launch(share=True)
|