Spaces:
Runtime error
Runtime error
File size: 5,405 Bytes
e954652 b5daf8c 5b90064 b5daf8c bee8f8a 5b90064 b5daf8c 5b90064 b5daf8c 5b90064 b5daf8c 5b90064 b5daf8c 5b90064 e954652 5b90064 1f705ff 3d7c8ba 1f705ff bee8f8a 5b90064 1f705ff 5b90064 1f705ff 5b90064 e954652 5b90064 e954652 5b90064 e954652 5b90064 e954652 5b90064 e954652 5b90064 e954652 5b90064 caa3d15 e954652 5b90064 e954652 5b90064 e954652 5b90064 e954652 5b90064 1f705ff 5b90064 1f705ff e954652 5b90064 1f705ff 5b90064 e954652 5b90064 bee8f8a 5b90064 821f65e e954652 5b90064 e954652 5b90064 e954652 5b90064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import streamlit as st
import datetime
from transformers import pipeline
import gradio as gr
import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import os
import csv
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
# π Setup dataset repo π
# Created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
DATA_FILENAME = "MindfulStory.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
# π
Oops! Try downloading the dataset (We hope it works!)
try:
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=DATA_FILENAME,
cache_dir="data",
force_filename=DATA_FILENAME
)
except:
print("π¬ File not found, weβll act like itβs not a problem...")
# π§ AI Memory: Because forgetting is for humans π€
def AIMemory(name: str, message: str):
if name and message:
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
commit_url = repo.push_to_hub()
return {"name": name, "message": message, "time": str(datetime.now())}
# π Repository setup! Letβs clone like pros π¨βπ»
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
# π£οΈ Set up Speech Recognition
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
# π€ Set up TTS Models. Letβs find that sweet robotic voice!
MODEL_NAMES = [
"en/ljspeech/tacotron2-DDC",
"en/ljspeech/glow-tts",
"en/ljspeech/speedy-speech-wn",
"en/ljspeech/vits",
"en/sam/tacotron-DDC",
"fr/mai/tacotron2-DDC",
"de/thorsten/tacotron2-DCA",
]
# π οΈ Use Model Manager to load vocoders (Fancy tech magic here)
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
print(f"π Downloading {MODEL_NAME}... because waiting is fun!")
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
vocoder_name: Optional[str] = model_item["default_vocoder"]
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
synthesizer = Synthesizer(
model_path, config_path, None, vocoder_path, vocoder_config_path,
)
MODELS[MODEL_NAME] = synthesizer
# π§ββοΈ Transcribe function: Turning audio into text with a sprinkle of magic!
def transcribe(audio):
text = asr(audio)["text"]
return text
# π Text classifier (because we love labeling things, right?)
classifier = pipeline("text-classification")
# π€ Speech to Text: Give me your voice, Iβll give you text!
def speech_to_text(speech):
text = asr(speech)["text"]
return text
# π Sentiment Analysis (because even robots care about feelings π)
def text_to_sentiment(text):
sentiment = classifier(text)[0]["label"]
return sentiment
# π¦ Saving it for later: Store this priceless info!
def upsert(text):
date_time = str(datetime.datetime.today())
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
doc_ref.set({
u'firefield': 'Recognize Speech',
u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
u'last': text,
u'born': date_time,
})
saved = select('TTS-STT', date_time)
return saved
# π Retrieve all records: Gotta catch βem all!
def selectall(text):
docs = db.collection('Text2SpeechSentimentSave').stream()
doclist = ''
for doc in docs:
r = (f'{doc.id} => {doc.to_dict()}')
doclist += r
return doclist
# π£οΈ Text to Speech (Because speaking is fun, but robots do it better)
def tts(text: str, model_name: str):
print(text, model_name)
synthesizer = MODELS.get(model_name, None)
if synthesizer is None:
raise NameError("π¬ Oops! Model not found.")
wavs = synthesizer.tts(text)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
# ποΈ Gradio UI with Emoji and Fun Comments π
demo = gr.Blocks()
with demo:
# π€ Microphone input to capture your golden voice π€
audio_file = gr.Audio(source="microphone", type="filepath")
# π Textbox to display transcribed text π
text = gr.Textbox(label="Speech to Text")
# ποΈ Radio input to choose the best Text to Speech model ποΈ
TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
# π Audio player to play back the robotβs voice π
audio = gr.Audio(label="Output", interactive=False)
# π Buttons for all your needs π
b1 = gr.Button("π€ Recognize Speech")
b5 = gr.Button("π Read It Back Aloud")
# π±οΈ Click buttons to perform actions! π±οΈ
b1.click(speech_to_text, inputs=audio_file, outputs=text)
b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
demo.launch(share=True)
|