awacke1 commited on
Commit
c09ad18
Β·
verified Β·
1 Parent(s): 5b90064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -132
app.py CHANGED
@@ -1,53 +1,14 @@
1
- import streamlit as st
2
- import datetime
3
- from transformers import pipeline
4
- import gradio as gr
5
- import tempfile
6
- from typing import Optional
7
- import numpy as np
8
  from TTS.utils.manage import ModelManager
9
  from TTS.utils.synthesizer import Synthesizer
10
- import os
11
- import csv
12
- import huggingface_hub
13
- from huggingface_hub import Repository, hf_hub_download, upload_file
14
- from datetime import datetime
15
-
16
- # 🌟 Setup dataset repo 🌟
17
- # Created new dataset as awacke1/MindfulStory.csv
18
- DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
19
- DATASET_REPO_ID = "awacke1/MindfulStory.csv"
20
- DATA_FILENAME = "MindfulStory.csv"
21
- DATA_FILE = os.path.join("data", DATA_FILENAME)
22
- HF_TOKEN = os.environ.get("HF_TOKEN")
23
-
24
- # πŸ˜… Oops! Try downloading the dataset (We hope it works!)
25
- try:
26
- hf_hub_download(
27
- repo_id=DATASET_REPO_ID,
28
- filename=DATA_FILENAME,
29
- cache_dir="data",
30
- force_filename=DATA_FILENAME
31
- )
32
- except:
33
- print("😬 File not found, we’ll act like it’s not a problem...")
34
-
35
- # 🧠 AI Memory: Because forgetting is for humans πŸ€–
36
- def AIMemory(name: str, message: str):
37
- if name and message:
38
- with open(DATA_FILE, "a") as csvfile:
39
- writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
40
- writer.writerow({"name": name, "message": message, "time": str(datetime.now())})
41
- commit_url = repo.push_to_hub()
42
- return {"name": name, "message": message, "time": str(datetime.now())}
43
-
44
- # 🌍 Repository setup! Let’s clone like pros πŸ‘¨β€πŸ’»
45
- repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
46
 
47
- # πŸ—£οΈ Set up Speech Recognition
48
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 
49
 
50
- # 🎀 Set up TTS Models. Let’s find that sweet robotic voice!
51
  MODEL_NAMES = [
52
  "en/ljspeech/tacotron2-DDC",
53
  "en/ljspeech/glow-tts",
@@ -58,97 +19,49 @@ MODEL_NAMES = [
58
  "de/thorsten/tacotron2-DCA",
59
  ]
60
 
61
- # πŸ› οΈ Use Model Manager to load vocoders (Fancy tech magic here)
62
- MODELS = {}
63
- manager = ModelManager()
64
  for MODEL_NAME in MODEL_NAMES:
65
  print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!")
66
- model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
67
- vocoder_name: Optional[str] = model_item["default_vocoder"]
68
- vocoder_path = None
69
- vocoder_config_path = None
70
- if vocoder_name is not None:
71
- vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
72
-
73
- synthesizer = Synthesizer(
74
- model_path, config_path, None, vocoder_path, vocoder_config_path,
75
- )
76
- MODELS[MODEL_NAME] = synthesizer
77
-
78
- # πŸ§™β€β™‚οΈ Transcribe function: Turning audio into text with a sprinkle of magic!
79
- def transcribe(audio):
80
- text = asr(audio)["text"]
81
- return text
82
-
83
- # πŸ“Š Text classifier (because we love labeling things, right?)
84
- classifier = pipeline("text-classification")
85
-
86
- # 🎀 Speech to Text: Give me your voice, I’ll give you text!
87
- def speech_to_text(speech):
88
- text = asr(speech)["text"]
89
- return text
90
-
91
- # 😎 Sentiment Analysis (because even robots care about feelings πŸ’”)
92
- def text_to_sentiment(text):
93
- sentiment = classifier(text)[0]["label"]
94
- return sentiment
95
-
96
- # πŸ“¦ Saving it for later: Store this priceless info!
97
- def upsert(text):
98
- date_time = str(datetime.datetime.today())
99
- doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
100
- doc_ref.set({
101
- u'firefield': 'Recognize Speech',
102
- u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
103
- u'last': text,
104
- u'born': date_time,
105
- })
106
- saved = select('TTS-STT', date_time)
107
- return saved
108
-
109
- # πŸ” Retrieve all records: Gotta catch β€˜em all!
110
- def selectall(text):
111
- docs = db.collection('Text2SpeechSentimentSave').stream()
112
- doclist = ''
113
- for doc in docs:
114
- r = (f'{doc.id} => {doc.to_dict()}')
115
- doclist += r
116
- return doclist
117
-
118
- # πŸ—£οΈ Text to Speech (Because speaking is fun, but robots do it better)
119
  def tts(text: str, model_name: str):
120
  print(text, model_name)
121
  synthesizer = MODELS.get(model_name, None)
 
122
  if synthesizer is None:
123
- raise NameError("😬 Oops! Model not found.")
124
 
125
- wavs = synthesizer.tts(text)
126
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
127
- synthesizer.save_wav(wavs, fp)
128
- return fp.name
 
 
 
 
 
129
 
130
- # πŸŽ›οΈ Gradio UI with Emoji and Fun Comments πŸŽ‰
131
- demo = gr.Blocks()
132
-
133
- with demo:
134
- # 🎀 Microphone input to capture your golden voice 🎀
135
- audio_file = gr.Audio(source="microphone", type="filepath")
136
-
137
- # πŸ“œ Textbox to display transcribed text πŸ“œ
138
- text = gr.Textbox(label="Speech to Text")
139
-
140
- # πŸŽ™οΈ Radio input to choose the best Text to Speech model πŸŽ™οΈ
141
- TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
142
-
143
- # πŸ”Š Audio player to play back the robot’s voice πŸ”Š
144
- audio = gr.Audio(label="Output", interactive=False)
145
-
146
- # πŸŽ‰ Buttons for all your needs πŸŽ‰
147
- b1 = gr.Button("🎀 Recognize Speech")
148
- b5 = gr.Button("πŸ”Š Read It Back Aloud")
149
-
150
- # πŸ–±οΈ Click buttons to perform actions! πŸ–±οΈ
151
- b1.click(speech_to_text, inputs=audio_file, outputs=text)
152
- b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
153
-
154
- demo.launch(share=True)
 
1
+ import os
2
+ import torch
 
 
 
 
 
3
  from TTS.utils.manage import ModelManager
4
  from TTS.utils.synthesizer import Synthesizer
5
+ import tempfile
6
+ from typing import Optional
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # πŸ› οΈ Use Model Manager to load vocoders
9
+ MODELS = {}
10
+ manager = ModelManager()
11
 
 
12
  MODEL_NAMES = [
13
  "en/ljspeech/tacotron2-DDC",
14
  "en/ljspeech/glow-tts",
 
19
  "de/thorsten/tacotron2-DCA",
20
  ]
21
 
 
 
 
22
  for MODEL_NAME in MODEL_NAMES:
23
  print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!")
24
+
25
+ try:
26
+ model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
27
+ vocoder_name: Optional[str] = model_item["default_vocoder"]
28
+ vocoder_path = None
29
+ vocoder_config_path = None
30
+
31
+ if vocoder_name is not None:
32
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
33
+
34
+ # πŸ§™β€β™‚οΈ Load the synthesizer with vocoder and safe loading of weights
35
+ synthesizer = Synthesizer(
36
+ model_path,
37
+ config_path,
38
+ None,
39
+ vocoder_path,
40
+ vocoder_config_path,
41
+ use_cuda=False # Make sure you're not forcing CUDA unless needed
42
+ )
43
+
44
+ MODELS[MODEL_NAME] = synthesizer
45
+
46
+ except Exception as e:
47
+ print(f"😬 Failed to load model {MODEL_NAME}: {str(e)}")
48
+ continue
49
+
50
+ # πŸ—£οΈ Text to Speech (because speaking is fun, but robots do it better)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def tts(text: str, model_name: str):
52
  print(text, model_name)
53
  synthesizer = MODELS.get(model_name, None)
54
+
55
  if synthesizer is None:
56
+ raise NameError("Model not found, check if it's loaded properly!")
57
 
58
+ try:
59
+ wavs = synthesizer.tts(text)
60
+
61
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
62
+ synthesizer.save_wav(wavs, fp)
63
+ return fp.name
64
+ except Exception as e:
65
+ print(f"😬 Error generating speech: {str(e)}")
66
+ return None
67