awacke1 commited on
Commit
5b90064
Β·
verified Β·
1 Parent(s): 89707c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -57
app.py CHANGED
@@ -2,40 +2,37 @@ import streamlit as st
2
  import datetime
3
  from transformers import pipeline
4
  import gradio as gr
5
-
6
  import tempfile
7
  from typing import Optional
8
  import numpy as np
9
  from TTS.utils.manage import ModelManager
10
  from TTS.utils.synthesizer import Synthesizer
11
-
12
- # PersistDataset -----
13
  import os
14
  import csv
15
- import gradio as gr
16
- from gradio import inputs, outputs
17
  import huggingface_hub
18
  from huggingface_hub import Repository, hf_hub_download, upload_file
19
  from datetime import datetime
20
 
21
- # created new dataset as awacke1/MindfulStory.csv
 
22
  DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
23
  DATASET_REPO_ID = "awacke1/MindfulStory.csv"
24
  DATA_FILENAME = "MindfulStory.csv"
25
  DATA_FILE = os.path.join("data", DATA_FILENAME)
26
  HF_TOKEN = os.environ.get("HF_TOKEN")
27
 
28
- # Download dataset repo using hub download
29
  try:
30
  hf_hub_download(
31
  repo_id=DATASET_REPO_ID,
32
  filename=DATA_FILENAME,
33
- cache_dir=DATA_DIRNAME,
34
  force_filename=DATA_FILENAME
35
  )
36
  except:
37
- print("file not found")
38
-
 
39
  def AIMemory(name: str, message: str):
40
  if name and message:
41
  with open(DATA_FILE, "a") as csvfile:
@@ -44,16 +41,13 @@ def AIMemory(name: str, message: str):
44
  commit_url = repo.push_to_hub()
45
  return {"name": name, "message": message, "time": str(datetime.now())}
46
 
47
- with open('Mindfulness.txt', 'r') as file:
48
- context = file.read()
49
-
50
- # Set up cloned dataset from repo for operations
51
- repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
52
 
53
- # set up ASR
54
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
55
 
56
- # set up TTS
57
  MODEL_NAMES = [
58
  "en/ljspeech/tacotron2-DDC",
59
  "en/ljspeech/glow-tts",
@@ -64,11 +58,11 @@ MODEL_NAMES = [
64
  "de/thorsten/tacotron2-DCA",
65
  ]
66
 
67
- # Use Model Manager to load vocoders
68
  MODELS = {}
69
  manager = ModelManager()
70
  for MODEL_NAME in MODEL_NAMES:
71
- print(f"downloading {MODEL_NAME}")
72
  model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
73
  vocoder_name: Optional[str] = model_item["default_vocoder"]
74
  vocoder_path = None
@@ -81,80 +75,80 @@ for MODEL_NAME in MODEL_NAMES:
81
  )
82
  MODELS[MODEL_NAME] = synthesizer
83
 
84
- # transcribe
85
  def transcribe(audio):
86
  text = asr(audio)["text"]
87
  return text
88
 
89
- #text classifier
90
  classifier = pipeline("text-classification")
91
 
92
-
93
  def speech_to_text(speech):
94
  text = asr(speech)["text"]
95
- #rMem = AIMemory("STT", text)
96
  return text
97
 
 
98
  def text_to_sentiment(text):
99
  sentiment = classifier(text)[0]["label"]
100
- #rMem = AIMemory(text, sentiment)
101
  return sentiment
102
 
 
103
  def upsert(text):
104
- date_time =str(datetime.datetime.today())
105
  doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
106
- doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
 
 
 
 
 
107
  saved = select('TTS-STT', date_time)
108
  return saved
109
-
110
- def select(collection, document):
111
- doc_ref = db.collection(collection).document(document)
112
- doc = doc_ref.get()
113
- docid = ("The id is: ", doc.id)
114
- contents = ("The contents are: ", doc.to_dict())
115
- return contents
116
-
117
  def selectall(text):
118
  docs = db.collection('Text2SpeechSentimentSave').stream()
119
- doclist=''
120
  for doc in docs:
121
- r=(f'{doc.id} => {doc.to_dict()}')
122
  doclist += r
123
  return doclist
124
-
 
125
  def tts(text: str, model_name: str):
126
  print(text, model_name)
127
  synthesizer = MODELS.get(model_name, None)
128
  if synthesizer is None:
129
- raise NameError("model not found")
 
130
  wavs = synthesizer.tts(text)
131
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
132
  synthesizer.save_wav(wavs, fp)
133
-
134
- #rMem = AIMemory("TTS", text + model_name)
135
-
136
  return fp.name
137
 
 
138
  demo = gr.Blocks()
 
139
  with demo:
140
- audio_file = gr.inputs.Audio(source="microphone", type="filepath")
 
 
 
141
  text = gr.Textbox(label="Speech to Text")
142
- #label = gr.Label()
143
- #saved = gr.Textbox(label="Saved")
144
- #savedAll = gr.Textbox(label="SavedAll")
145
- TTSchoice = gr.inputs.Radio( label="Pick a Text to Speech Model", choices=MODEL_NAMES, )
 
146
  audio = gr.Audio(label="Output", interactive=False)
147
 
148
- b1 = gr.Button("Recognize Speech")
149
- #b2 = gr.Button("Classify Sentiment")
150
- #b3 = gr.Button("Save Speech to Text")
151
- #b4 = gr.Button("Retrieve All")
152
- b5 = gr.Button("Read It Back Aloud")
153
-
154
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
155
- #b2.click(text_to_sentiment, inputs=text, outputs=label)
156
- #b3.click(upsert, inputs=text, outputs=saved)
157
- #b4.click(selectall, inputs=text, outputs=savedAll)
158
- b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
159
 
160
- demo.launch(share=True)
 
2
  import datetime
3
  from transformers import pipeline
4
  import gradio as gr
 
5
  import tempfile
6
  from typing import Optional
7
  import numpy as np
8
  from TTS.utils.manage import ModelManager
9
  from TTS.utils.synthesizer import Synthesizer
 
 
10
  import os
11
  import csv
 
 
12
  import huggingface_hub
13
  from huggingface_hub import Repository, hf_hub_download, upload_file
14
  from datetime import datetime
15
 
16
+ # 🌟 Setup dataset repo 🌟
17
+ # Created new dataset as awacke1/MindfulStory.csv
18
  DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv"
19
  DATASET_REPO_ID = "awacke1/MindfulStory.csv"
20
  DATA_FILENAME = "MindfulStory.csv"
21
  DATA_FILE = os.path.join("data", DATA_FILENAME)
22
  HF_TOKEN = os.environ.get("HF_TOKEN")
23
 
24
+ # πŸ˜… Oops! Try downloading the dataset (We hope it works!)
25
  try:
26
  hf_hub_download(
27
  repo_id=DATASET_REPO_ID,
28
  filename=DATA_FILENAME,
29
+ cache_dir="data",
30
  force_filename=DATA_FILENAME
31
  )
32
  except:
33
+ print("😬 File not found, we’ll act like it’s not a problem...")
34
+
35
+ # 🧠 AI Memory: Because forgetting is for humans πŸ€–
36
  def AIMemory(name: str, message: str):
37
  if name and message:
38
  with open(DATA_FILE, "a") as csvfile:
 
41
  commit_url = repo.push_to_hub()
42
  return {"name": name, "message": message, "time": str(datetime.now())}
43
 
44
+ # 🌍 Repository setup! Let’s clone like pros πŸ‘¨β€πŸ’»
45
+ repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
 
 
 
46
 
47
+ # πŸ—£οΈ Set up Speech Recognition
48
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
49
 
50
+ # 🎀 Set up TTS Models. Let’s find that sweet robotic voice!
51
  MODEL_NAMES = [
52
  "en/ljspeech/tacotron2-DDC",
53
  "en/ljspeech/glow-tts",
 
58
  "de/thorsten/tacotron2-DCA",
59
  ]
60
 
61
+ # πŸ› οΈ Use Model Manager to load vocoders (Fancy tech magic here)
62
  MODELS = {}
63
  manager = ModelManager()
64
  for MODEL_NAME in MODEL_NAMES:
65
+ print(f"πŸš€ Downloading {MODEL_NAME}... because waiting is fun!")
66
  model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
67
  vocoder_name: Optional[str] = model_item["default_vocoder"]
68
  vocoder_path = None
 
75
  )
76
  MODELS[MODEL_NAME] = synthesizer
77
 
78
+ # πŸ§™β€β™‚οΈ Transcribe function: Turning audio into text with a sprinkle of magic!
79
  def transcribe(audio):
80
  text = asr(audio)["text"]
81
  return text
82
 
83
+ # πŸ“Š Text classifier (because we love labeling things, right?)
84
  classifier = pipeline("text-classification")
85
 
86
+ # 🎀 Speech to Text: Give me your voice, I’ll give you text!
87
  def speech_to_text(speech):
88
  text = asr(speech)["text"]
 
89
  return text
90
 
91
+ # 😎 Sentiment Analysis (because even robots care about feelings πŸ’”)
92
  def text_to_sentiment(text):
93
  sentiment = classifier(text)[0]["label"]
 
94
  return sentiment
95
 
96
+ # πŸ“¦ Saving it for later: Store this priceless info!
97
  def upsert(text):
98
+ date_time = str(datetime.datetime.today())
99
  doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
100
+ doc_ref.set({
101
+ u'firefield': 'Recognize Speech',
102
+ u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/',
103
+ u'last': text,
104
+ u'born': date_time,
105
+ })
106
  saved = select('TTS-STT', date_time)
107
  return saved
108
+
109
+ # πŸ” Retrieve all records: Gotta catch β€˜em all!
 
 
 
 
 
 
110
  def selectall(text):
111
  docs = db.collection('Text2SpeechSentimentSave').stream()
112
+ doclist = ''
113
  for doc in docs:
114
+ r = (f'{doc.id} => {doc.to_dict()}')
115
  doclist += r
116
  return doclist
117
+
118
+ # πŸ—£οΈ Text to Speech (Because speaking is fun, but robots do it better)
119
  def tts(text: str, model_name: str):
120
  print(text, model_name)
121
  synthesizer = MODELS.get(model_name, None)
122
  if synthesizer is None:
123
+ raise NameError("😬 Oops! Model not found.")
124
+
125
  wavs = synthesizer.tts(text)
126
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
127
  synthesizer.save_wav(wavs, fp)
 
 
 
128
  return fp.name
129
 
130
+ # πŸŽ›οΈ Gradio UI with Emoji and Fun Comments πŸŽ‰
131
  demo = gr.Blocks()
132
+
133
  with demo:
134
+ # 🎀 Microphone input to capture your golden voice 🎀
135
+ audio_file = gr.Audio(source="microphone", type="filepath")
136
+
137
+ # πŸ“œ Textbox to display transcribed text πŸ“œ
138
  text = gr.Textbox(label="Speech to Text")
139
+
140
+ # πŸŽ™οΈ Radio input to choose the best Text to Speech model πŸŽ™οΈ
141
+ TTSchoice = gr.Radio(label="Pick a Text to Speech Model", choices=MODEL_NAMES)
142
+
143
+ # πŸ”Š Audio player to play back the robot’s voice πŸ”Š
144
  audio = gr.Audio(label="Output", interactive=False)
145
 
146
+ # πŸŽ‰ Buttons for all your needs πŸŽ‰
147
+ b1 = gr.Button("🎀 Recognize Speech")
148
+ b5 = gr.Button("πŸ”Š Read It Back Aloud")
149
+
150
+ # πŸ–±οΈ Click buttons to perform actions! πŸ–±οΈ
 
151
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
152
+ b5.click(tts, inputs=[text, TTSchoice], outputs=audio)
 
 
 
153
 
154
+ demo.launch(share=True)