Spaces:

awacke1
/

TTS-STT-Blocks

Runtime error

App Files Files Community

TTS-STT-Blocks / app.py

awacke1

Update app.py

5b90064 verified 10 months ago

raw

history blame

5.41 kB

	import streamlit as st
	import datetime
	from transformers import pipeline
	import gradio as gr
	import tempfile
	from typing import Optional
	import numpy as np
	from TTS.utils.manage import ModelManager
	from TTS.utils.synthesizer import Synthesizer
	import os
	import csv
	import huggingface_hub
	from huggingface_hub import Repository, hf_hub_download, upload_file
	from datetime import datetime

	# 🌟 Setup dataset repo 🌟
	# Created new dataset as awacke1/MindfulStory.csv
	DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
	DATASET_REPO_ID = "awacke1/MindfulStory.csv"
	DATA_FILENAME = "MindfulStory.csv"
	DATA_FILE = os.path.join("data", DATA_FILENAME)
	HF_TOKEN = os.environ.get("HF_TOKEN")

	# 😅 Oops! Try downloading the dataset (We hope it works!)
	try:
	hf_hub_download(
	repo_id=DATASET_REPO_ID,
	filename=DATA_FILENAME,
	cache_dir="data",
	force_filename=DATA_FILENAME
	)
	except:
	print("😬 File not found, we’ll act like it’s not a problem...")

	# 🧠 AI Memory: Because forgetting is for humans 🤖
	def AIMemory(name: str, message: str):
	if name and message:
	with open(DATA_FILE, "a") as csvfile:
	writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
	writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
	commit_url = repo.push_to_hub()
	return {"name": name, "message": message, "time": str(datetime.now())}

	# 🌍 Repository setup! Let’s clone like pros 👨‍💻
	repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)

	# 🗣️ Set up Speech Recognition
	asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

	# 🎤 Set up TTS Models. Let’s find that sweet robotic voice!
	MODEL_NAMES = [
	"en/ljspeech/tacotron2-DDC",
	"en/ljspeech/glow-tts",
	"en/ljspeech/speedy-speech-wn",
	"en/ljspeech/vits",
	"en/sam/tacotron-DDC",
	"fr/mai/tacotron2-DDC",
	"de/thorsten/tacotron2-DCA",
	]

	# 🛠️ Use Model Manager to load vocoders (Fancy tech magic here)
	MODELS = {}
	manager = ModelManager()
	for MODEL_NAME in MODEL_NAMES:
	print(f"🚀 Downloading {MODEL_NAME}... because waiting is fun!")
	model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
	vocoder_name: Optional[str] = model_item["default_vocoder"]
	vocoder_path = None
	vocoder_config_path = None
	if vocoder_name is not None:
	vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

	synthesizer = Synthesizer(
	model_path, config_path, None, vocoder_path, vocoder_config_path,
	)
	MODELS[MODEL_NAME] = synthesizer

	# 🧙‍♂️ Transcribe function: Turning audio into text with a sprinkle of magic!
	def transcribe(audio):
	text = asr(audio)["text"]
	return text

	# 📊 Text classifier (because we love labeling things, right?)
	classifier = pipeline("text-classification")

	# 🎤 Speech to Text: Give me your voice, I’ll give you text!
	def speech_to_text(speech):
	text = asr(speech)["text"]
	return text

	# 😎 Sentiment Analysis (because even robots care about feelings 💔)
	def text_to_sentiment(text):
	sentiment = classifier(text)[0]["label"]
	return sentiment

	# 📦 Saving it for later: Store this priceless info!
	def upsert(text):
	date_time = str(datetime.datetime.today())
	doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
	doc_ref.set({
	u'firefield': 'Recognize Speech',
	u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
	u'last': text,
	u'born': date_time,
	})
	saved = select('TTS-STT', date_time)
	return saved

	# 🔍 Retrieve all records: Gotta catch ‘em all!
	def selectall(text):
	docs = db.collection('Text2SpeechSentimentSave').stream()
	doclist = ''
	for doc in docs:
	r = (f'{doc.id} => {doc.to_dict()}')
	doclist += r
	return doclist

	# 🗣️ Text to Speech (Because speaking is fun, but robots do it better)
	def tts(text: str, model_name: str):
	print(text, model_name)
	synthesizer = MODELS.get(model_name, None)
	if synthesizer is None:
	raise NameError("😬 Oops! Model not found.")

	wavs = synthesizer.tts(text)
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	synthesizer.save_wav(wavs, fp)
	return fp.name

	# 🎛️ Gradio UI with Emoji and Fun Comments 🎉
	demo = gr.Blocks()

	with demo:
	# 🎤 Microphone input to capture your golden voice 🎤
	audio_file = gr.Audio(source="microphone", type="filepath")

	# 📜 Textbox to display transcribed text 📜
	text = gr.Textbox(label="Speech to Text")

	# 🎙️ Radio input to choose the best Text to Speech model 🎙️
	TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)

	# 🔊 Audio player to play back the robot’s voice 🔊
	audio = gr.Audio(label="Output", interactive=False)

	# 🎉 Buttons for all your needs 🎉
	b1 = gr.Button("🎤 Recognize Speech")
	b5 = gr.Button("🔊 Read It Back Aloud")

	# 🖱️ Click buttons to perform actions! 🖱️
	b1.click(speech_to_text, inputs=audio_file, outputs=text)
	b5.click(tts, inputs=[text, TTSchoice], outputs=audio)

	demo.launch(share=True)