TTS-STT-Blocks / app.py
awacke1's picture
Update app.py
5b90064 verified
raw
history blame
5.41 kB
import streamlit as st
import datetime
from transformers import pipeline
import gradio as gr
import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import os
import csv
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
# 🌟 Setup dataset repo 🌟
# Created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
DATA_FILENAME = "MindfulStory.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
# πŸ˜… Oops! Try downloading the dataset (We hope it works!)
try:
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=DATA_FILENAME,
cache_dir="data",
force_filename=DATA_FILENAME
)
except:
print("😬 File not found, we’ll act like it’s not a problem...")
# 🧠 AI Memory: Because forgetting is for humans πŸ€–
def AIMemory(name: str, message: str):
if name and message:
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
commit_url = repo.push_to_hub()
return {"name": name, "message": message, "time": str(datetime.now())}
# 🌍 Repository setup! Let’s clone like pros πŸ‘¨β€πŸ’»
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
# πŸ—£οΈ Set up Speech Recognition
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
# 🎀 Set up TTS Models. Let’s find that sweet robotic voice!
MODEL_NAMES = [
"en/ljspeech/tacotron2-DDC",
"en/ljspeech/glow-tts",
"en/ljspeech/speedy-speech-wn",
"en/ljspeech/vits",
"en/sam/tacotron-DDC",
"fr/mai/tacotron2-DDC",
"de/thorsten/tacotron2-DCA",
]
# πŸ› οΈ Use Model Manager to load vocoders (Fancy tech magic here)
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!")
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
vocoder_name: Optional[str] = model_item["default_vocoder"]
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
synthesizer = Synthesizer(
model_path, config_path, None, vocoder_path, vocoder_config_path,
)
MODELS[MODEL_NAME] = synthesizer
# πŸ§™β€β™‚οΈ Transcribe function: Turning audio into text with a sprinkle of magic!
def transcribe(audio):
text = asr(audio)["text"]
return text
# πŸ“Š Text classifier (because we love labeling things, right?)
classifier = pipeline("text-classification")
# 🎀 Speech to Text: Give me your voice, I’ll give you text!
def speech_to_text(speech):
text = asr(speech)["text"]
return text
# 😎 Sentiment Analysis (because even robots care about feelings πŸ’”)
def text_to_sentiment(text):
sentiment = classifier(text)[0]["label"]
return sentiment
# πŸ“¦ Saving it for later: Store this priceless info!
def upsert(text):
date_time = str(datetime.datetime.today())
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
doc_ref.set({
u'firefield': 'Recognize Speech',
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
u'last': text,
u'born': date_time,
})
saved = select('TTS-STT', date_time)
return saved
# πŸ” Retrieve all records: Gotta catch β€˜em all!
def selectall(text):
docs = db.collection('Text2SpeechSentimentSave').stream()
doclist = ''
for doc in docs:
r = (f'{doc.id} => {doc.to_dict()}')
doclist += r
return doclist
# πŸ—£οΈ Text to Speech (Because speaking is fun, but robots do it better)
def tts(text: str, model_name: str):
print(text, model_name)
synthesizer = MODELS.get(model_name, None)
if synthesizer is None:
raise NameError("😬 Oops! Model not found.")
wavs = synthesizer.tts(text)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
# πŸŽ›οΈ Gradio UI with Emoji and Fun Comments πŸŽ‰
demo = gr.Blocks()
with demo:
# 🎀 Microphone input to capture your golden voice 🎀
audio_file = gr.Audio(source="microphone", type="filepath")
# πŸ“œ Textbox to display transcribed text πŸ“œ
text = gr.Textbox(label="Speech to Text")
# πŸŽ™οΈ Radio input to choose the best Text to Speech model πŸŽ™οΈ
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
# πŸ”Š Audio player to play back the robot’s voice πŸ”Š
audio = gr.Audio(label="Output", interactive=False)
# πŸŽ‰ Buttons for all your needs πŸŽ‰
b1 = gr.Button("🎀 Recognize Speech")
b5 = gr.Button("πŸ”Š Read It Back Aloud")
# πŸ–±οΈ Click buttons to perform actions! πŸ–±οΈ
b1.click(speech_to_text, inputs=audio_file, outputs=text)
b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
demo.launch(share=True)