hunterschep commited on
Commit
d954236
ยท
verified ยท
1 Parent(s): df1c6c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -231
app.py CHANGED
@@ -1,266 +1,126 @@
1
  import gradio as gr
2
- import torch
3
- import librosa
4
  from transformers import Wav2Vec2Processor, AutoModelForCTC
5
- import zipfile
6
- import os
7
  import firebase_admin
8
  from firebase_admin import credentials, firestore, storage
9
- from datetime import datetime, timedelta
10
- import json
11
- import tempfile
12
- import uuid
13
 
14
- # LOCAL INITIALIZATION - ONLY USE ON YOUR OWN DEVICE
15
- '''
16
- os.chdir(os.path.dirname(os.path.abspath(__file__)))
17
- cred = credentials.Certificate("serviceAccountKey.json")
18
- '''
19
- # Deployed Initialization
20
- firebase_config = json.loads(os.environ.get('firebase_creds'))
21
  cred = credentials.Certificate(firebase_config)
22
-
23
- firebase_admin.initialize_app(cred, {
24
- "storageBucket": "amis-asr-corrections-dem-8cf3d.firebasestorage.app"
25
- })
26
  db = firestore.client()
27
  bucket = storage.bucket()
28
 
29
- # Load the ASR model and processor
30
  MODEL_NAME = "eleferrand/XLSR_paiwan"
31
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
32
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
33
 
34
-
35
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
- # Core ASR helper functions
37
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
38
-
39
- def transcribe(audio_file: str):
40
- """Run ASR on the uploaded audio file and return the raw transcription."""
41
  try:
42
- audio, _ = librosa.load(audio_file, sr=16000)
43
- input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
44
  with torch.no_grad():
45
- logits = model(input_values).logits
46
- predicted_ids = torch.argmax(logits, dim=-1)
47
- transcription = processor.batch_decode(predicted_ids)[0]
48
- return transcription.replace("[UNK]", "")
49
  except Exception as e:
50
  return f"่™•็†ๆ–‡ไปถ้Œฏ่ชค: {e}"
51
 
 
 
 
52
 
53
- def transcribe_with_status(audio_file):
54
- """Wrapper that provides UIโ€‘friendly status messages."""
55
- if audio_file is None:
56
- return "", "", gr.update(value="่ซ‹ๅ…ˆไธŠๅ‚ณ้Ÿณ่จŠ (Please upload audio first)", visible=True)
57
-
58
- # Show processing message first
59
- processing_msg = gr.update(value="่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ (Processing, please waitโ€ฆ)", visible=True)
60
- transcription = transcribe(audio_file)
61
- done_msg = gr.update(value="ๅฎŒๆˆ๏ผ(Done!)", visible=True)
62
- return transcription, transcription, done_msg
63
-
64
-
65
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
- # Firebase helpers
67
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
-
69
- def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
70
- """Upload audio (if provided) + transcription pair to Firestore/Storage."""
71
  try:
72
- audio_metadata, audio_file_url = {}, None
73
-
74
- if audio_file and os.path.exists(audio_file):
75
- audio, sr = librosa.load(audio_file, sr=44100)
76
- audio_metadata = {
77
- "duration": librosa.get_duration(y=audio, sr=sr),
78
- "file_size": os.path.getsize(audio_file),
79
  }
80
- uid = str(uuid.uuid4())
81
- dst = f"audio/pai/{uid}.wav"
82
- blob = bucket.blob(dst)
83
- blob.upload_from_filename(audio_file)
84
- audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
85
-
86
- db.collection("paiwan_transcriptions").add({
87
- "transcription_info": {
88
- "original_text": original_transcription,
89
- "corrected_text": corrected_transcription,
90
- "language": "pai",
91
- },
92
- "audio_data": {
93
- "audio_metadata": audio_metadata,
94
- "audio_file_url": audio_file_url,
95
- },
96
- "user_info": {
97
- "native_paiwan_speaker": native_speaker,
98
- "age": age,
99
- },
100
- "timestamp": datetime.now().isoformat(),
101
- "model_name": MODEL_NAME,
102
- })
103
  return "ๆ กๆญฃไฟๅญ˜ๆˆๅŠŸ! (Correction saved successfully!)"
104
  except Exception as e:
105
- return f"ไฟๅญ˜ๅคฑ่ดฅ: {e} (Error saving correction: {e})"
106
 
107
-
108
- def prepare_download(audio_file, original_transcription, corrected_transcription):
109
- """Bundle audio + TXT files into a ZIP for download."""
110
- if audio_file is None:
111
  return None
112
-
113
- tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip").name
114
- with zipfile.ZipFile(tmp_zip, "w") as zf:
115
- if os.path.exists(audio_file):
116
- zf.write(audio_file, arcname="audio.wav")
117
- for name, content in [
118
- ("original_transcription.txt", original_transcription),
119
- ("corrected_transcription.txt", corrected_transcription),
120
- ]:
121
  with open(name, "w", encoding="utf-8") as f:
122
- f.write(content)
123
- zf.write(name, arcname=name)
124
  os.remove(name)
125
- return tmp_zip
126
-
127
-
128
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
129
- # Dynamic label switching โ€“ uses gr.update() so values arenโ€™t overwritten
130
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
131
-
132
- def toggle_language(switch: bool):
133
- """Return a tuple of updates for each UI component when the language toggle flips."""
134
-
135
- if switch: # Traditional Chinese UI
136
- return (
137
- "ๆŽ’็ฃ่ชž่‡ชๅ‹•่ชž้Ÿณ่ญ˜ๅˆฅ้€ๅญ—็จฟ่ˆ‡ไฟฎๆญฃ็ณป็ตฑ", # Title (Markdown)
138
- "ๆญฅ้ฉŸ 1๏ผš้Ÿณ่จŠไธŠๅ‚ณ่ˆ‡้€ๅญ—็จฟ", # Step 1 (Markdown)
139
- "ๆญฅ้ฉŸ 2๏ผšๅฏฉ้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ", # Step 2 (Markdown)
140
- "ๆญฅ้ฉŸ 3๏ผšไฝฟ็”จ่€…่ณ‡่จŠ", # Step 3 (Markdown)
141
- "ๆญฅ้ฉŸ 4๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰", # Step 4 (Markdown)
142
- gr.update(label="้Ÿณ่จŠ่ผธๅ…ฅ"), # Audio component label
143
- gr.update(value="็”ข็”Ÿ้€ๅญ—็จฟ"), # Transcribe button text
144
- gr.update(label="ๅŽŸๅง‹้€ๅญ—็จฟ"), # Original transcription textbox label
145
- gr.update(label="ๆ›ดๆญฃ้€ๅญ—็จฟ"), # Corrected transcription textbox label
146
- gr.update(label="ๅนด้ฝก"), # Age slider label
147
- gr.update(label="ๆฏ่ชžๆŽ’็ฃ่ชžไฝฟ็”จ่€…?"), # Native speaker checkbox label
148
- gr.update(value="ๅ„ฒๅญ˜"), # Save button text
149
- gr.update(label="ๅ„ฒๅญ˜็‹€ๆ…‹"), # Saveโ€‘status textbox label
150
- gr.update(value="ไธ‹่ผ‰ ZIP ๆช”ๆกˆ"), # Download button text
151
- gr.update(value="่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ") # Status message default
152
- )
153
-
154
- # English UI
155
- return (
156
- "Paiwan ASR Transcription & Correction System",
157
- "Step 1: Audio Upload & Transcription",
158
- "Step 2: Review & Edit Transcription",
159
- "Step 3: User Information",
160
- "Step 4: Save & Download",
161
- gr.update(label="Audio Input"),
162
- gr.update(value="Generate Transcript"),
163
- gr.update(label="Original Transcription"),
164
- gr.update(label="Corrected Transcription"),
165
- gr.update(label="Age"),
166
- gr.update(label="Native Paiwan Speaker?"),
167
- gr.update(value="Save"),
168
- gr.update(label="Save Status"),
169
- gr.update(value="Download ZIP File"),
170
- gr.update(value="Processing, please waitโ€ฆ"),
171
- )
172
-
173
-
174
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
175
- # Gradio UI
176
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
177
 
 
178
  with gr.Blocks() as demo:
179
- lang_switch = gr.Checkbox(label="ๅˆ‡ๆ›ๅˆฐ็น้ซ”ไธญๆ–‡ (Switch to Traditional Chinese)")
180
-
181
- title = gr.Markdown()
182
- step1 = gr.Markdown()
183
-
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  with gr.Row():
185
- audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath")
 
186
 
187
- status_message = gr.Markdown(visible=False)
188
-
189
- step2 = gr.Markdown()
190
  with gr.Row():
191
- transcribe_button = gr.Button()
192
-
193
- original_text = gr.Textbox(interactive=False, lines=5)
194
- corrected_text = gr.Textbox(interactive=True, lines=5)
195
-
196
- step3 = gr.Markdown()
197
- with gr.Row():
198
- age_input = gr.Slider(minimum=0, maximum=100, step=1, value=25)
199
- native_speaker_input = gr.Checkbox(value=True)
200
-
201
- step4 = gr.Markdown()
202
  with gr.Row():
203
- save_button = gr.Button()
204
- save_status = gr.Textbox(interactive=False)
205
-
206
- with gr.Row():
207
- download_button = gr.Button()
208
- download_output = gr.File()
209
-
210
- # Initialize English labels
211
- init_vals = toggle_language(False)
212
- (title.value, step1.value, step2.value, step3.value, step4.value,
213
- audio_input, transcribe_button, original_text, corrected_text,
214
- age_input, native_speaker_input, save_button, save_status,
215
- download_button, status_message_init) = init_vals
216
-
217
- audio_input.label = "Audio Input"
218
- transcribe_button.value = "Generate Transcript"
219
- original_text.label = "Original Transcription"
220
- corrected_text.label = "Corrected Transcription"
221
- age_input.label = "Age"
222
- native_speaker_input.label = "Native Paiwan Speaker?"
223
- save_button.value = "Save"
224
- save_status.label = "Save Status"
225
- download_button.value = "Download ZIP File"
226
- status_message.value = status_message_init.value
227
-
228
- # Language switch โ€“ wonโ€™t overwrite component values anymore
229
- lang_switch.change(
230
- toggle_language,
231
- inputs=lang_switch,
232
- outputs=[title, step1, step2, step3, step4,
233
- audio_input, transcribe_button, original_text, corrected_text,
234
- age_input, native_speaker_input, save_button, save_status,
235
- download_button, status_message]
236
- )
237
-
238
- # Auto transcription on upload
239
- audio_input.change(
240
- transcribe_with_status,
241
- inputs=audio_input,
242
- outputs=[original_text, corrected_text, status_message]
243
- )
244
-
245
- # Manual transcription button
246
- transcribe_button.click(
247
- transcribe_with_status,
248
- inputs=audio_input,
249
- outputs=[original_text, corrected_text, status_message]
250
- )
251
-
252
- # Save to Firebase
253
- save_button.click(
254
- store_correction,
255
- inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
256
- outputs=save_status,
257
- )
258
-
259
- # Download ZIP
260
- download_button.click(
261
- prepare_download,
262
- inputs=[audio_input, original_text, corrected_text],
263
- outputs=download_output,
264
- )
265
 
266
  demo.launch()
 
1
  import gradio as gr
2
+ import torch, librosa, zipfile, os, json, tempfile, uuid
 
3
  from transformers import Wav2Vec2Processor, AutoModelForCTC
4
+ from datetime import datetime, timedelta
 
5
  import firebase_admin
6
  from firebase_admin import credentials, firestore, storage
 
 
 
 
7
 
8
+ # ---------- Firebase init ----------
9
+ firebase_config = json.loads(os.environ.get("firebase_creds"))
 
 
 
 
 
10
  cred = credentials.Certificate(firebase_config)
11
+ firebase_admin.initialize_app(
12
+ cred, {"storageBucket": "amis-asr-corrections-dem-8cf3d.firebasestorage.app"}
13
+ )
 
14
  db = firestore.client()
15
  bucket = storage.bucket()
16
 
17
+ # ---------- ASR model ----------
18
  MODEL_NAME = "eleferrand/XLSR_paiwan"
19
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
20
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
21
 
22
+ # ---------- Core helpers ----------
23
+ def transcribe(path):
 
 
 
 
 
24
  try:
25
+ audio, _ = librosa.load(path, sr=16_000)
26
+ inputs = processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
27
  with torch.no_grad():
28
+ logits = model(inputs).logits
29
+ ids = torch.argmax(logits, dim=-1)
30
+ text = processor.batch_decode(ids)[0]
31
+ return text.replace("[UNK]", "")
32
  except Exception as e:
33
  return f"่™•็†ๆ–‡ไปถ้Œฏ่ชค: {e}"
34
 
35
+ def transcribe_both(path):
36
+ txt = transcribe(path)
37
+ return txt, txt # original & editable copies
38
 
39
+ def store_correction(orig, corr, audio, age, native):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  try:
41
+ audio_meta, audio_url = {}, None
42
+ if audio and os.path.exists(audio):
43
+ a, sr = librosa.load(audio, sr=44_100)
44
+ audio_meta = {
45
+ "duration": librosa.get_duration(y=a, sr=sr),
46
+ "file_size": os.path.getsize(audio),
 
47
  }
48
+ uid = f"{uuid.uuid4()}.wav"
49
+ blob = bucket.blob(f"audio/pai/{uid}")
50
+ blob.upload_from_filename(audio)
51
+ audio_url = blob.generate_signed_url(expiration=timedelta(hours=1))
52
+
53
+ db.collection("paiwan_transcriptions").add(
54
+ {
55
+ "transcription_info": {
56
+ "original_text": orig,
57
+ "corrected_text": corr,
58
+ "language": "pai",
59
+ },
60
+ "audio_data": {"audio_metadata": audio_meta, "audio_file_url": audio_url},
61
+ "user_info": {"native_paiwan_speaker": native, "age": age},
62
+ "timestamp": datetime.now().isoformat(),
63
+ "model_name": MODEL_NAME,
64
+ }
65
+ )
 
 
 
 
 
66
  return "ๆ กๆญฃไฟๅญ˜ๆˆๅŠŸ! (Correction saved successfully!)"
67
  except Exception as e:
68
+ return f"ไฟๅญ˜ๅคฑๆ•—: {e} (Error saving correction: {e})"
69
 
70
+ def prepare_download(audio, orig, corr):
71
+ if not audio:
 
 
72
  return None
73
+ tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
74
+ tmp_zip.close()
75
+ with zipfile.ZipFile(tmp_zip.name, "w") as z:
76
+ if os.path.exists(audio):
77
+ z.write(audio, arcname="audio.wav")
78
+ for name, txt in [("original_transcription.txt", orig),
79
+ ("corrected_transcription.txt", corr)]:
 
 
80
  with open(name, "w", encoding="utf-8") as f:
81
+ f.write(txt)
82
+ z.write(name, arcname=name)
83
  os.remove(name)
84
+ return tmp_zip.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # ---------- Interface ----------
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("# ๆŽ’็ฃ่ชž่‡ชๅ‹•่ชž้Ÿณ่ญ˜ๅˆฅ้€ๅญ—็จฟ่ˆ‡ไฟฎๆญฃ็ณป็ตฑ (Paiwan ASR Transcription & Correction System)")
89
+
90
+ # Step 1
91
+ gr.Markdown("### ๆญฅ้ฉŸ 1๏ผš้Ÿณ่จŠไธŠๅ‚ณ (Audio Upload)")
92
+ gr.Markdown("ไธŠๅ‚ณๅพŒ่ซ‹่‡ณๆญฅ้ฉŸ 2 ๆŒ‰ใ€Œ็”ข็”Ÿ้€ๅญ—็จฟใ€๏ผŒ็ณป็ตฑ่™•็†ๆ™‚่ซ‹่€ๅฟƒ็ญ‰ๅพ…โ€ฆ")
93
+ audio_input = gr.Audio(["upload", "microphone"], type="filepath",
94
+ label="้Ÿณ่จŠ่ผธๅ…ฅ (Audio Input)")
95
+
96
+ # Step 2
97
+ gr.Markdown("### ๆญฅ้ฉŸ 2๏ผš็”ข็”Ÿ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ (Generate & Edit Transcript)")
98
+ trans_btn = gr.Button("็”ข็”Ÿ้€ๅญ—็จฟ (Generate Transcript)")
99
+ original = gr.Textbox(label="ๅŽŸๅง‹้€ๅญ—็จฟ (Original Transcription)",
100
+ interactive=False, lines=6)
101
+ corrected = gr.Textbox(label="ๆ›ดๆญฃ้€ๅญ—็จฟ (Corrected Transcription)",
102
+ interactive=True, lines=6)
103
+
104
+ # Step 3
105
+ gr.Markdown("### ๆญฅ้ฉŸ 3๏ผšไฝฟ็”จ่€…่ณ‡่จŠ (User Information)")
106
  with gr.Row():
107
+ age = gr.Slider(0, 100, step=1, value=25, label="ๅนด้ฝก (Age)")
108
+ native = gr.Checkbox(value=True, label="ๆฏ่ชžๆŽ’็ฃ่ชžไฝฟ็”จ่€…๏ผŸ(Native Paiwan Speaker?)")
109
 
110
+ # Step 4
111
+ gr.Markdown("### ๆญฅ้ฉŸ 4๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰ (Save & Download)")
 
112
  with gr.Row():
113
+ save_btn = gr.Button("ๅ„ฒๅญ˜ (Save)")
114
+ save_msg = gr.Textbox(label="ๅ„ฒๅญ˜็‹€ๆ…‹ (Save Status)", interactive=False)
 
 
 
 
 
 
 
 
 
115
  with gr.Row():
116
+ dl_btn = gr.Button("ไธ‹่ผ‰ ZIP ๆช”ๆกˆ (Download ZIP File)")
117
+ dl_out = gr.File()
118
+
119
+ # --- wiring ---
120
+ trans_btn.click(transcribe_both, audio_input, [original, corrected])
121
+ save_btn.click(store_correction,
122
+ [original, corrected, audio_input, age, native],
123
+ save_msg)
124
+ dl_btn.click(prepare_download, [audio_input, original, corrected], dl_out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  demo.launch()