Spaces:
Runtime error
Runtime error
import streamlit as st | |
import datetime | |
from transformers import pipeline | |
import gradio as gr | |
import tempfile | |
from typing import Optional | |
import numpy as np | |
from TTS.utils.manage import ModelManager | |
from TTS.utils.synthesizer import Synthesizer | |
import os | |
import csv | |
import huggingface_hub | |
from huggingface_hub import Repository, hf_hub_download, upload_file | |
from datetime import datetime | |
# π Setup dataset repo π | |
# Created new dataset as awacke1/MindfulStory.csv | |
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv" | |
DATASET_REPO_ID = "awacke1/MindfulStory.csv" | |
DATA_FILENAME = "MindfulStory.csv" | |
DATA_FILE = os.path.join("data", DATA_FILENAME) | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
# π Oops! Try downloading the dataset (We hope it works!) | |
try: | |
hf_hub_download( | |
repo_id=DATASET_REPO_ID, | |
filename=DATA_FILENAME, | |
cache_dir="data", | |
force_filename=DATA_FILENAME | |
) | |
except: | |
print("π¬ File not found, weβll act like itβs not a problem...") | |
# π§ AI Memory: Because forgetting is for humans π€ | |
def AIMemory(name: str, message: str): | |
if name and message: | |
with open(DATA_FILE, "a") as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"]) | |
writer.writerow({"name": name, "message": message, "time": str(datetime.now())}) | |
commit_url = repo.push_to_hub() | |
return {"name": name, "message": message, "time": str(datetime.now())} | |
# π Repository setup! Letβs clone like pros π¨βπ» | |
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) | |
# π£οΈ Set up Speech Recognition | |
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") | |
# π€ Set up TTS Models. Letβs find that sweet robotic voice! | |
MODEL_NAMES = [ | |
"en/ljspeech/tacotron2-DDC", | |
"en/ljspeech/glow-tts", | |
"en/ljspeech/speedy-speech-wn", | |
"en/ljspeech/vits", | |
"en/sam/tacotron-DDC", | |
"fr/mai/tacotron2-DDC", | |
"de/thorsten/tacotron2-DCA", | |
] | |
# π οΈ Use Model Manager to load vocoders (Fancy tech magic here) | |
MODELS = {} | |
manager = ModelManager() | |
for MODEL_NAME in MODEL_NAMES: | |
print(f"π Downloading {MODEL_NAME}... because waiting is fun!") | |
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}") | |
vocoder_name: Optional[str] = model_item["default_vocoder"] | |
vocoder_path = None | |
vocoder_config_path = None | |
if vocoder_name is not None: | |
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) | |
synthesizer = Synthesizer( | |
model_path, config_path, None, vocoder_path, vocoder_config_path, | |
) | |
MODELS[MODEL_NAME] = synthesizer | |
# π§ββοΈ Transcribe function: Turning audio into text with a sprinkle of magic! | |
def transcribe(audio): | |
text = asr(audio)["text"] | |
return text | |
# π Text classifier (because we love labeling things, right?) | |
classifier = pipeline("text-classification") | |
# π€ Speech to Text: Give me your voice, Iβll give you text! | |
def speech_to_text(speech): | |
text = asr(speech)["text"] | |
return text | |
# π Sentiment Analysis (because even robots care about feelings π) | |
def text_to_sentiment(text): | |
sentiment = classifier(text)[0]["label"] | |
return sentiment | |
# π¦ Saving it for later: Store this priceless info! | |
def upsert(text): | |
date_time = str(datetime.datetime.today()) | |
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time) | |
doc_ref.set({ | |
u'firefield': 'Recognize Speech', | |
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', | |
u'last': text, | |
u'born': date_time, | |
}) | |
saved = select('TTS-STT', date_time) | |
return saved | |
# π Retrieve all records: Gotta catch βem all! | |
def selectall(text): | |
docs = db.collection('Text2SpeechSentimentSave').stream() | |
doclist = '' | |
for doc in docs: | |
r = (f'{doc.id} => {doc.to_dict()}') | |
doclist += r | |
return doclist | |
# π£οΈ Text to Speech (Because speaking is fun, but robots do it better) | |
def tts(text: str, model_name: str): | |
print(text, model_name) | |
synthesizer = MODELS.get(model_name, None) | |
if synthesizer is None: | |
raise NameError("π¬ Oops! Model not found.") | |
wavs = synthesizer.tts(text) | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
synthesizer.save_wav(wavs, fp) | |
return fp.name | |
# ποΈ Gradio UI with Emoji and Fun Comments π | |
demo = gr.Blocks() | |
with demo: | |
# π€ Microphone input to capture your golden voice π€ | |
audio_file = gr.Audio(source="microphone", type="filepath") | |
# π Textbox to display transcribed text π | |
text = gr.Textbox(label="Speech to Text") | |
# ποΈ Radio input to choose the best Text to Speech model ποΈ | |
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES) | |
# π Audio player to play back the robotβs voice π | |
audio = gr.Audio(label="Output", interactive=False) | |
# π Buttons for all your needs π | |
b1 = gr.Button("π€ Recognize Speech") | |
b5 = gr.Button("π Read It Back Aloud") | |
# π±οΈ Click buttons to perform actions! π±οΈ | |
b1.click(speech_to_text, inputs=audio_file, outputs=text) | |
b5.click(tts, inputs=[text, TTSchoice], outputs=audio) | |
demo.launch(share=True) | |