awacke1 commited on
Commit
631754b
·
1 Parent(s): 11ab1dd

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +196 -0
  2. packages.txt +2 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import time
4
+ import librosa
5
+ import soundfile
6
+ import nemo.collections.asr as nemo_asr
7
+ import tempfile
8
+ import os
9
+ import uuid
10
+
11
+ from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
12
+ import torch
13
+
14
+ # PersistDataset -----
15
+ import os
16
+ import csv
17
+ import gradio as gr
18
+ from gradio import inputs, outputs
19
+ import huggingface_hub
20
+ from huggingface_hub import Repository, hf_hub_download, upload_file
21
+ from datetime import datetime
22
+ DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/Carddata.csv"
23
+ DATASET_REPO_ID = "awacke1/Carddata.csv"
24
+ DATA_FILENAME = "Carddata.csv"
25
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
26
+ HF_TOKEN = os.environ.get("HF_TOKEN")
27
+
28
+ SCRIPT = """
29
+ <script>
30
+ if (!window.hasBeenRun) {
31
+ window.hasBeenRun = true;
32
+ console.log("should only happen once");
33
+ document.querySelector("button.submit").click();
34
+ }
35
+ </script>
36
+ """
37
+
38
+
39
+ try:
40
+ hf_hub_download(
41
+ repo_id=DATASET_REPO_ID,
42
+ filename=DATA_FILENAME,
43
+ cache_dir=DATA_DIRNAME,
44
+ force_filename=DATA_FILENAME
45
+ )
46
+ except:
47
+ print("file not found")
48
+ repo = Repository(
49
+ local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
50
+ )
51
+
52
+ def generate_html() -> str:
53
+ with open(DATA_FILE) as csvfile:
54
+ reader = csv.DictReader(csvfile)
55
+ rows = []
56
+ for row in reader:
57
+ rows.append(row)
58
+ rows.reverse()
59
+ if len(rows) == 0:
60
+ return "no messages yet"
61
+ else:
62
+ html = "<div class='chatbot'>"
63
+ for row in rows:
64
+ html += "<div>"
65
+ html += f"<span>{row['inputs']}</span>"
66
+ html += f"<span class='outputs'>{row['outputs']}</span>"
67
+ html += "</div>"
68
+ html += "</div>"
69
+ return html
70
+
71
+
72
+ def store_message(name: str, message: str):
73
+ if name and message:
74
+ with open(DATA_FILE, "a") as csvfile:
75
+ writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
76
+ writer.writerow(
77
+ {"name": name.strip(), "message": message.strip(), "time": str(datetime.now())}
78
+ )
79
+ commit_url = repo.push_to_hub()
80
+ return ""
81
+
82
+
83
+ iface = gr.Interface(
84
+ store_message,
85
+ [
86
+ inputs.Textbox(placeholder="Your name"),
87
+ inputs.Textbox(placeholder="Your message", lines=2),
88
+ ],
89
+ "html",
90
+ css="""
91
+ .message {background-color:cornflowerblue;color:white; padding:4px;margin:4px;border-radius:4px; }
92
+ """,
93
+ title="Reading/writing to a HuggingFace dataset repo from Spaces",
94
+ description=f"This is a demo of how to do simple *shared data persistence* in a Gradio Space, backed by a dataset repo.",
95
+ article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})",
96
+ )
97
+
98
+
99
+ mname = "facebook/blenderbot-400M-distill"
100
+ model = BlenderbotForConditionalGeneration.from_pretrained(mname)
101
+ tokenizer = BlenderbotTokenizer.from_pretrained(mname)
102
+
103
+ def take_last_tokens(inputs, note_history, history):
104
+ """Filter the last 128 tokens"""
105
+ if inputs['input_ids'].shape[1] > 128:
106
+ inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
107
+ inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
108
+ note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
109
+ history = history[1:]
110
+ return inputs, note_history, history
111
+
112
+ def add_note_to_history(note, note_history):
113
+ """Add a note to the historical information"""
114
+ note_history.append(note)
115
+ note_history = '</s> <s>'.join(note_history)
116
+ return [note_history]
117
+
118
+
119
+ def chat(message, history):
120
+ history = history or []
121
+ if history:
122
+ history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
123
+ else:
124
+ history_useful = []
125
+ history_useful = add_note_to_history(message, history_useful)
126
+ inputs = tokenizer(history_useful, return_tensors="pt")
127
+ inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
128
+ reply_ids = model.generate(**inputs)
129
+ response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
130
+ history_useful = add_note_to_history(response, history_useful)
131
+ list_history = history_useful[0].split('</s> <s>')
132
+ history.append((list_history[-2], list_history[-1]))
133
+ store_message(message, response) # Save to dataset
134
+ return history, history
135
+
136
+ SAMPLE_RATE = 16000
137
+ model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_en_conformer_transducer_xlarge")
138
+ model.change_decoding_strategy(None)
139
+ model.eval()
140
+
141
+ def process_audio_file(file):
142
+ data, sr = librosa.load(file)
143
+ if sr != SAMPLE_RATE:
144
+ data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
145
+ # monochannel
146
+ data = librosa.to_mono(data)
147
+ return data
148
+
149
+ #def transcribe(audio, state = "", im4 = "", file = ""):
150
+ #def transcribe(audio, state = "", im4 = None, file = None):
151
+ def transcribe(audio, state = ""): # two parms - had been testing video and file inputs at same time.
152
+ # Grant additional context
153
+ # time.sleep(1)
154
+ if state is None:
155
+ state = ""
156
+ audio_data = process_audio_file(audio)
157
+ with tempfile.TemporaryDirectory() as tmpdir:
158
+ # Filepath transcribe
159
+ audio_path = os.path.join(tmpdir, f'audio_{uuid.uuid4()}.wav')
160
+ soundfile.write(audio_path, audio_data, SAMPLE_RATE)
161
+ transcriptions = model.transcribe([audio_path])
162
+ # Direct transcribe
163
+ # transcriptions = model.transcribe([audio])
164
+ # if transcriptions form a tuple (from RNNT), extract just "best" hypothesis
165
+ if type(transcriptions) == tuple and len(transcriptions) == 2:
166
+ transcriptions = transcriptions[0]
167
+ transcriptions = transcriptions[0]
168
+ store_message(transcriptions, state) # Save to dataset
169
+ state = state + transcriptions + " "
170
+ return state, state
171
+
172
+ iface = gr.Interface(
173
+ fn=transcribe,
174
+ inputs=[
175
+ gr.Audio(source="microphone", type='filepath', streaming=True),
176
+ "state",
177
+ #gr.Image(label="Webcam", source="webcam"),
178
+ #gr.File(label="File"),
179
+ ],
180
+ outputs=[
181
+ "textbox",
182
+ "state",
183
+ #gr.HighlightedText(label="HighlightedText", color_map={"punc": "pink", "test 0": "blue"}),
184
+ #gr.HighlightedText(label="HighlightedText", show_legend=True),
185
+ #gr.JSON(label="JSON"),
186
+ #gr.HTML(label="HTML"),
187
+ ],
188
+ layout="horizontal",
189
+ theme="huggingface",
190
+ title="🗣️LiveSpeechRecognition🧠Memory💾",
191
+ description=f"Live Automatic Speech Recognition (ASR) with Memory💾 Dataset.",
192
+ allow_flagging='never',
193
+ live=True,
194
+ article=f"Result Output Saved to Memory💾 Dataset: [{DATASET_REPO_URL}]({DATASET_REPO_URL})"
195
+ )
196
+ iface.launch()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ nemo_toolkit[asr]
2
+ transformers
3
+ torch
4
+ gradio
5
+ Werkzeug
6
+ huggingface_hub
7
+ Pillow