Spaces:

awacke1
/

TTS-STT-Blocks

Runtime error

File size: 5,405 Bytes

import streamlit as st
import datetime
from transformers import pipeline
import gradio as gr
import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import os
import csv
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime

# 🌟 Setup dataset repo 🌟
# Created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
DATA_FILENAME = "MindfulStory.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")

# 😅 Oops! Try downloading the dataset (We hope it works!)
try:
    hf_hub_download(
        repo_id=DATASET_REPO_ID,
        filename=DATA_FILENAME,
        cache_dir="data",
        force_filename=DATA_FILENAME
    )
except:
    print("😬 File not found, we’ll act like it’s not a problem...")

# 🧠 AI Memory: Because forgetting is for humans 🤖
def AIMemory(name: str, message: str):
    if name and message:
        with open(DATA_FILE, "a") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
            writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
        commit_url = repo.push_to_hub()
    return {"name": name, "message": message, "time": str(datetime.now())}

# 🌍 Repository setup! Let’s clone like pros 👨‍💻
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)

# 🗣️ Set up Speech Recognition
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

# 🎤 Set up TTS Models. Let’s find that sweet robotic voice!
MODEL_NAMES = [
    "en/ljspeech/tacotron2-DDC",
    "en/ljspeech/glow-tts",
    "en/ljspeech/speedy-speech-wn",
    "en/ljspeech/vits",
    "en/sam/tacotron-DDC",
    "fr/mai/tacotron2-DDC",
    "de/thorsten/tacotron2-DCA",
]

# 🛠️ Use Model Manager to load vocoders (Fancy tech magic here)
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
    print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

    synthesizer = Synthesizer(
        model_path, config_path, None, vocoder_path, vocoder_config_path,
    )
    MODELS[MODEL_NAME] = synthesizer

# 🧙‍♂️ Transcribe function: Turning audio into text with a sprinkle of magic!
def transcribe(audio):
    text = asr(audio)["text"]
    return text

# 📊 Text classifier (because we love labeling things, right?)
classifier = pipeline("text-classification")

# 🎤 Speech to Text: Give me your voice, I’ll give you text!
def speech_to_text(speech):
    text = asr(speech)["text"]
    return text

# 😎 Sentiment Analysis (because even robots care about feelings 💔)
def text_to_sentiment(text):
    sentiment = classifier(text)[0]["label"]
    return sentiment 

# 📦 Saving it for later: Store this priceless info!
def upsert(text):
    date_time = str(datetime.datetime.today())
    doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
    doc_ref.set({
        u'firefield': 'Recognize Speech',
        u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
        u'last': text,
        u'born': date_time,
    })
    saved = select('TTS-STT', date_time)
    return saved

# 🔍 Retrieve all records: Gotta catch ‘em all!
def selectall(text):
    docs = db.collection('Text2SpeechSentimentSave').stream()
    doclist = ''
    for doc in docs:
        r = (f'{doc.id} => {doc.to_dict()}')
        doclist += r
    return doclist 

# 🗣️ Text to Speech (Because speaking is fun, but robots do it better)
def tts(text: str, model_name: str):
    print(text, model_name)
    synthesizer = MODELS.get(model_name, None)
    if synthesizer is None:
        raise NameError("😬 Oops! Model not found.")
    
    wavs = synthesizer.tts(text)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

# 🎛️ Gradio UI with Emoji and Fun Comments 🎉
demo = gr.Blocks()

with demo:
    # 🎤 Microphone input to capture your golden voice 🎤
    audio_file = gr.Audio(source="microphone", type="filepath")
    
    # 📜 Textbox to display transcribed text 📜
    text = gr.Textbox(label="Speech to Text")
    
    # 🎙️ Radio input to choose the best Text to Speech model 🎙️
    TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
    
    # 🔊 Audio player to play back the robot’s voice 🔊
    audio = gr.Audio(label="Output", interactive=False)
    
    # 🎉 Buttons for all your needs 🎉
    b1 = gr.Button("🎤 Recognize Speech")
    b5 = gr.Button("🔊 Read It Back Aloud")
    
    # 🖱️ Click buttons to perform actions! 🖱️
    b1.click(speech_to_text, inputs=audio_file, outputs=text)
    b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
    
demo.launch(share=True)