File size: 5,405 Bytes
e954652
 
 
 
 
 
 
 
 
b5daf8c
 
 
 
 
 
5b90064
 
b5daf8c
 
 
 
 
bee8f8a
5b90064
b5daf8c
 
 
 
5b90064
b5daf8c
 
 
5b90064
 
 
b5daf8c
 
 
 
 
 
 
 
5b90064
 
b5daf8c
5b90064
e954652
 
5b90064
1f705ff
 
3d7c8ba
 
 
 
1f705ff
 
 
bee8f8a
5b90064
1f705ff
 
 
5b90064
1f705ff
 
 
 
 
 
 
 
 
 
 
 
5b90064
e954652
 
 
 
5b90064
e954652
 
5b90064
e954652
 
 
 
5b90064
e954652
 
 
 
5b90064
e954652
5b90064
e954652
5b90064
 
 
 
 
 
caa3d15
e954652
5b90064
 
e954652
 
5b90064
e954652
5b90064
e954652
 
5b90064
 
1f705ff
 
 
 
5b90064
 
1f705ff
 
 
 
e954652
5b90064
1f705ff
5b90064
e954652
5b90064
 
 
 
bee8f8a
5b90064
 
 
 
 
821f65e
e954652
5b90064
 
 
 
 
e954652
5b90064
e954652
5b90064
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
import datetime
from transformers import pipeline
import gradio as gr
import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import os
import csv
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime

# 🌟 Setup dataset repo 🌟
# Created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
DATASET_REPO_ID = "awacke1/MindfulStory.csv"
DATA_FILENAME = "MindfulStory.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")

# πŸ˜… Oops! Try downloading the dataset (We hope it works!)
try:
    hf_hub_download(
        repo_id=DATASET_REPO_ID,
        filename=DATA_FILENAME,
        cache_dir="data",
        force_filename=DATA_FILENAME
    )
except:
    print("😬 File not found, we’ll act like it’s not a problem...")

# 🧠 AI Memory: Because forgetting is for humans πŸ€–
def AIMemory(name: str, message: str):
    if name and message:
        with open(DATA_FILE, "a") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
            writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
        commit_url = repo.push_to_hub()
    return {"name": name, "message": message, "time": str(datetime.now())}

# 🌍 Repository setup! Let’s clone like pros πŸ‘¨β€πŸ’»
repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)

# πŸ—£οΈ Set up Speech Recognition
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

# 🎀 Set up TTS Models. Let’s find that sweet robotic voice!
MODEL_NAMES = [
    "en/ljspeech/tacotron2-DDC",
    "en/ljspeech/glow-tts",
    "en/ljspeech/speedy-speech-wn",
    "en/ljspeech/vits",
    "en/sam/tacotron-DDC",
    "fr/mai/tacotron2-DDC",
    "de/thorsten/tacotron2-DCA",
]

# πŸ› οΈ Use Model Manager to load vocoders (Fancy tech magic here)
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
    print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!")
    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

    synthesizer = Synthesizer(
        model_path, config_path, None, vocoder_path, vocoder_config_path,
    )
    MODELS[MODEL_NAME] = synthesizer

# πŸ§™β€β™‚οΈ Transcribe function: Turning audio into text with a sprinkle of magic!
def transcribe(audio):
    text = asr(audio)["text"]
    return text

# πŸ“Š Text classifier (because we love labeling things, right?)
classifier = pipeline("text-classification")

# 🎀 Speech to Text: Give me your voice, I’ll give you text!
def speech_to_text(speech):
    text = asr(speech)["text"]
    return text

# 😎 Sentiment Analysis (because even robots care about feelings πŸ’”)
def text_to_sentiment(text):
    sentiment = classifier(text)[0]["label"]
    return sentiment 

# πŸ“¦ Saving it for later: Store this priceless info!
def upsert(text):
    date_time = str(datetime.datetime.today())
    doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
    doc_ref.set({
        u'firefield': 'Recognize Speech',
        u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
        u'last': text,
        u'born': date_time,
    })
    saved = select('TTS-STT', date_time)
    return saved

# πŸ” Retrieve all records: Gotta catch β€˜em all!
def selectall(text):
    docs = db.collection('Text2SpeechSentimentSave').stream()
    doclist = ''
    for doc in docs:
        r = (f'{doc.id} => {doc.to_dict()}')
        doclist += r
    return doclist 

# πŸ—£οΈ Text to Speech (Because speaking is fun, but robots do it better)
def tts(text: str, model_name: str):
    print(text, model_name)
    synthesizer = MODELS.get(model_name, None)
    if synthesizer is None:
        raise NameError("😬 Oops! Model not found.")
    
    wavs = synthesizer.tts(text)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

# πŸŽ›οΈ Gradio UI with Emoji and Fun Comments πŸŽ‰
demo = gr.Blocks()

with demo:
    # 🎀 Microphone input to capture your golden voice 🎀
    audio_file = gr.Audio(source="microphone", type="filepath")
    
    # πŸ“œ Textbox to display transcribed text πŸ“œ
    text = gr.Textbox(label="Speech to Text")
    
    # πŸŽ™οΈ Radio input to choose the best Text to Speech model πŸŽ™οΈ
    TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
    
    # πŸ”Š Audio player to play back the robot’s voice πŸ”Š
    audio = gr.Audio(label="Output", interactive=False)
    
    # πŸŽ‰ Buttons for all your needs πŸŽ‰
    b1 = gr.Button("🎀 Recognize Speech")
    b5 = gr.Button("πŸ”Š Read It Back Aloud")
    
    # πŸ–±οΈ Click buttons to perform actions! πŸ–±οΈ
    b1.click(speech_to_text, inputs=audio_file, outputs=text)
    b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
    
demo.launch(share=True)