hunterschep commited on
Commit
df1c6c9
ยท
verified ยท
1 Parent(s): 3bc7caf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -110
app.py CHANGED
@@ -32,11 +32,15 @@ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
32
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
33
 
34
 
35
- def transcribe(audio_file):
 
 
 
 
 
36
  try:
37
- audio, rate = librosa.load(audio_file, sr=16000)
38
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
39
-
40
  with torch.no_grad():
41
  logits = model(input_values).logits
42
  predicted_ids = torch.argmax(logits, dim=-1)
@@ -47,117 +51,130 @@ def transcribe(audio_file):
47
 
48
 
49
  def transcribe_with_status(audio_file):
 
50
  if audio_file is None:
51
- return "", "", "่ซ‹ๅ…ˆไธŠๅ‚ณ้Ÿณ่จŠ (Please upload audio first)"
52
- status = "่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ (Processing, please waitโ€ฆ)"
 
 
53
  transcription = transcribe(audio_file)
54
- return transcription, transcription, "ๅฎŒๆˆ๏ผ(Done!)"
 
55
 
56
 
 
 
 
 
57
  def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
 
58
  try:
59
- audio_metadata = {}
60
- audio_file_url = None
61
 
62
- # If an audio file is provided, upload it to Firebase Storage
63
  if audio_file and os.path.exists(audio_file):
64
  audio, sr = librosa.load(audio_file, sr=44100)
65
- duration = librosa.get_duration(y=audio, sr=sr)
66
- file_size = os.path.getsize(audio_file)
67
- audio_metadata = {'duration': duration, 'file_size': file_size}
68
-
69
- # Generate a unique identifier for the audio file
70
- unique_id = str(uuid.uuid4())
71
- destination_path = f"audio/pai/{unique_id}.wav"
72
-
73
- # Create a blob and upload the file
74
- blob = bucket.blob(destination_path)
75
  blob.upload_from_filename(audio_file)
76
-
77
- # Generate a signed download URL valid for 1 hour (adjust expiration as needed)
78
  audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
79
 
80
- combined_data = {
81
- 'transcription_info': {
82
- 'original_text': original_transcription,
83
- 'corrected_text': corrected_transcription,
84
- 'language': 'pai',
85
  },
86
- 'audio_data': {
87
- 'audio_metadata': audio_metadata,
88
- 'audio_file_url': audio_file_url,
89
  },
90
- 'user_info': {
91
- 'native_paiwan_speaker': native_speaker,
92
- 'age': age
93
  },
94
- 'timestamp': datetime.now().isoformat(),
95
- 'model_name': MODEL_NAME
96
- }
97
- # Save data to a collection for that language
98
- db.collection('paiwan_transcriptions').add(combined_data)
99
  return "ๆ กๆญฃไฟๅญ˜ๆˆๅŠŸ! (Correction saved successfully!)"
100
  except Exception as e:
101
  return f"ไฟๅญ˜ๅคฑ่ดฅ: {e} (Error saving correction: {e})"
102
 
103
 
104
  def prepare_download(audio_file, original_transcription, corrected_transcription):
 
105
  if audio_file is None:
106
  return None
107
 
108
- tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
109
- tmp_zip.close()
110
- with zipfile.ZipFile(tmp_zip.name, "w") as zf:
111
  if os.path.exists(audio_file):
112
  zf.write(audio_file, arcname="audio.wav")
 
 
 
 
 
 
 
 
 
113
 
114
- orig_txt = "original_transcription.txt"
115
- with open(orig_txt, "w", encoding="utf-8") as f:
116
- f.write(original_transcription)
117
- zf.write(orig_txt, arcname="original_transcription.txt")
118
- os.remove(orig_txt)
119
 
120
- corr_txt = "corrected_transcription.txt"
121
- with open(corr_txt, "w", encoding="utf-8") as f:
122
- f.write(corrected_transcription)
123
- zf.write(corr_txt, arcname="corrected_transcription.txt")
124
- os.remove(corr_txt)
125
- return tmp_zip.name
126
 
 
 
127
 
128
- def toggle_language(switch):
129
- """Switch UI text between English and Traditional Chinese"""
130
- if switch:
131
- return (
132
- "ๆŽ’็ฃ่ชž่‡ชๅ‹•่ชž้Ÿณ่ญ˜ๅˆฅ้€ๅญ—็จฟ่ˆ‡ไฟฎๆญฃ็ณป็ตฑ", # Title
133
- "ๆญฅ้ฉŸ 1๏ผš้Ÿณ่จŠไธŠๅ‚ณ่ˆ‡้€ๅญ—็จฟ", # Step 1
134
- "ๆญฅ้ฉŸ 2๏ผšๅฏฉ้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ", # Step 2
135
- "ๆญฅ้ฉŸ 3๏ผšไฝฟ็”จ่€…่ณ‡่จŠ", # Step 3
136
- "ๆญฅ้ฉŸ 4๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰", # Step 4
137
- "้Ÿณ่จŠ่ผธๅ…ฅ", "็”ข็”Ÿ้€ๅญ—็จฟ", # Audio label, Transcribe button
138
- "ๅŽŸๅง‹้€ๅญ—็จฟ", "ๆ›ดๆญฃ้€ๅญ—็จฟ", # Textboxes
139
- "ๅนด้ฝก", "ๆฏ่ชžๆŽ’็ฃ่ชžไฝฟ็”จ่€…?", # Age, Native speaker?
140
- "ๅ„ฒๅญ˜", "ๅ„ฒๅญ˜็‹€ๆ…‹", # Save button, Save status
141
- "ไธ‹่ผ‰ ZIP ๆช”ๆกˆ", # Download button
142
- "่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ" # Status message default (Chinese)
143
- )
144
- else:
145
  return (
146
- "Paiwan ASR Transcription & Correction System",
147
- "Step 1: Audio Upload & Transcription",
148
- "Step 2: Review & Edit Transcription",
149
- "Step 3: User Information",
150
- "Step 4: Save & Download",
151
- "Audio Input", "Generate Transcript",
152
- "Original Transcription", "Corrected Transcription",
153
- "Age", "Native Paiwan Speaker?",
154
- "Save", "Save Status",
155
- "Download ZIP File",
156
- "Processing, please waitโ€ฆ"
 
 
 
 
157
  )
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- # Interface
161
  with gr.Blocks() as demo:
162
  lang_switch = gr.Checkbox(label="ๅˆ‡ๆ›ๅˆฐ็น้ซ”ไธญๆ–‡ (Switch to Traditional Chinese)")
163
 
@@ -190,61 +207,60 @@ with gr.Blocks() as demo:
190
  download_button = gr.Button()
191
  download_output = gr.File()
192
 
193
- # Initialize UI texts according to default language (English)
194
- (title_text, step1_text, step2_text, step3_text, step4_text,
195
- audio_label, transcribe_label, orig_label, corr_label,
196
- age_label, native_label, save_label, save_status_label,
197
- download_label, default_status) = toggle_language(False)
198
-
199
- title.value = title_text
200
- step1.value = step1_text
201
- step2.value = step2_text
202
- step3.value = step3_text
203
- step4.value = step4_text
204
- audio_input.label = audio_label
205
- transcribe_button.value = transcribe_label
206
- original_text.label = orig_label
207
- corrected_text.label = corr_label
208
- age_input.label = age_label
209
- native_speaker_input.label = native_label
210
- save_button.value = save_label
211
- save_status.label = save_status_label
212
- download_button.value = download_label
213
- status_message.value = default_status
214
-
215
- # Language switcher
216
  lang_switch.change(
217
  toggle_language,
218
  inputs=lang_switch,
219
- outputs=[title, step1, step2, step3, step4, audio_input, transcribe_button,
220
- original_text, corrected_text, age_input, native_speaker_input,
221
- save_button, save_status, download_button, status_message]
 
222
  )
223
 
224
- # Automatic transcription on audio upload
225
  audio_input.change(
226
  transcribe_with_status,
227
  inputs=audio_input,
228
  outputs=[original_text, corrected_text, status_message]
229
  )
230
 
231
- # Manual transcription button (optional)
232
  transcribe_button.click(
233
  transcribe_with_status,
234
  inputs=audio_input,
235
  outputs=[original_text, corrected_text, status_message]
236
  )
237
 
 
238
  save_button.click(
239
  store_correction,
240
  inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
241
- outputs=save_status
242
  )
243
 
 
244
  download_button.click(
245
  prepare_download,
246
  inputs=[audio_input, original_text, corrected_text],
247
- outputs=download_output
248
  )
249
 
250
  demo.launch()
 
32
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
33
 
34
 
35
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
+ # Core ASR helper functions
37
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
38
+
39
+ def transcribe(audio_file: str):
40
+ """Run ASR on the uploaded audio file and return the raw transcription."""
41
  try:
42
+ audio, _ = librosa.load(audio_file, sr=16000)
43
  input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
 
44
  with torch.no_grad():
45
  logits = model(input_values).logits
46
  predicted_ids = torch.argmax(logits, dim=-1)
 
51
 
52
 
53
  def transcribe_with_status(audio_file):
54
+ """Wrapper that provides UIโ€‘friendly status messages."""
55
  if audio_file is None:
56
+ return "", "", gr.update(value="่ซ‹ๅ…ˆไธŠๅ‚ณ้Ÿณ่จŠ (Please upload audio first)", visible=True)
57
+
58
+ # Show processing message first
59
+ processing_msg = gr.update(value="่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ (Processing, please waitโ€ฆ)", visible=True)
60
  transcription = transcribe(audio_file)
61
+ done_msg = gr.update(value="ๅฎŒๆˆ๏ผ(Done!)", visible=True)
62
+ return transcription, transcription, done_msg
63
 
64
 
65
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
66
+ # Firebase helpers
67
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
+
69
  def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
70
+ """Upload audio (if provided) + transcription pair to Firestore/Storage."""
71
  try:
72
+ audio_metadata, audio_file_url = {}, None
 
73
 
 
74
  if audio_file and os.path.exists(audio_file):
75
  audio, sr = librosa.load(audio_file, sr=44100)
76
+ audio_metadata = {
77
+ "duration": librosa.get_duration(y=audio, sr=sr),
78
+ "file_size": os.path.getsize(audio_file),
79
+ }
80
+ uid = str(uuid.uuid4())
81
+ dst = f"audio/pai/{uid}.wav"
82
+ blob = bucket.blob(dst)
 
 
 
83
  blob.upload_from_filename(audio_file)
 
 
84
  audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
85
 
86
+ db.collection("paiwan_transcriptions").add({
87
+ "transcription_info": {
88
+ "original_text": original_transcription,
89
+ "corrected_text": corrected_transcription,
90
+ "language": "pai",
91
  },
92
+ "audio_data": {
93
+ "audio_metadata": audio_metadata,
94
+ "audio_file_url": audio_file_url,
95
  },
96
+ "user_info": {
97
+ "native_paiwan_speaker": native_speaker,
98
+ "age": age,
99
  },
100
+ "timestamp": datetime.now().isoformat(),
101
+ "model_name": MODEL_NAME,
102
+ })
 
 
103
  return "ๆ กๆญฃไฟๅญ˜ๆˆๅŠŸ! (Correction saved successfully!)"
104
  except Exception as e:
105
  return f"ไฟๅญ˜ๅคฑ่ดฅ: {e} (Error saving correction: {e})"
106
 
107
 
108
  def prepare_download(audio_file, original_transcription, corrected_transcription):
109
+ """Bundle audio + TXT files into a ZIP for download."""
110
  if audio_file is None:
111
  return None
112
 
113
+ tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip").name
114
+ with zipfile.ZipFile(tmp_zip, "w") as zf:
 
115
  if os.path.exists(audio_file):
116
  zf.write(audio_file, arcname="audio.wav")
117
+ for name, content in [
118
+ ("original_transcription.txt", original_transcription),
119
+ ("corrected_transcription.txt", corrected_transcription),
120
+ ]:
121
+ with open(name, "w", encoding="utf-8") as f:
122
+ f.write(content)
123
+ zf.write(name, arcname=name)
124
+ os.remove(name)
125
+ return tmp_zip
126
 
 
 
 
 
 
127
 
128
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
129
+ # Dynamic label switching โ€“ uses gr.update() so values arenโ€™t overwritten
130
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
131
 
132
+ def toggle_language(switch: bool):
133
+ """Return a tuple of updates for each UI component when the language toggle flips."""
134
 
135
+ if switch: # Traditional Chinese UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  return (
137
+ "ๆŽ’็ฃ่ชž่‡ชๅ‹•่ชž้Ÿณ่ญ˜ๅˆฅ้€ๅญ—็จฟ่ˆ‡ไฟฎๆญฃ็ณป็ตฑ", # Title (Markdown)
138
+ "ๆญฅ้ฉŸ 1๏ผš้Ÿณ่จŠไธŠๅ‚ณ่ˆ‡้€ๅญ—็จฟ", # Step 1 (Markdown)
139
+ "ๆญฅ้ฉŸ 2๏ผšๅฏฉ้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ", # Step 2 (Markdown)
140
+ "ๆญฅ้ฉŸ 3๏ผšไฝฟ็”จ่€…่ณ‡่จŠ", # Step 3 (Markdown)
141
+ "ๆญฅ้ฉŸ 4๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰", # Step 4 (Markdown)
142
+ gr.update(label="้Ÿณ่จŠ่ผธๅ…ฅ"), # Audio component label
143
+ gr.update(value="็”ข็”Ÿ้€ๅญ—็จฟ"), # Transcribe button text
144
+ gr.update(label="ๅŽŸๅง‹้€ๅญ—็จฟ"), # Original transcription textbox label
145
+ gr.update(label="ๆ›ดๆญฃ้€ๅญ—็จฟ"), # Corrected transcription textbox label
146
+ gr.update(label="ๅนด้ฝก"), # Age slider label
147
+ gr.update(label="ๆฏ่ชžๆŽ’็ฃ่ชžไฝฟ็”จ่€…?"), # Native speaker checkbox label
148
+ gr.update(value="ๅ„ฒๅญ˜"), # Save button text
149
+ gr.update(label="ๅ„ฒๅญ˜็‹€ๆ…‹"), # Saveโ€‘status textbox label
150
+ gr.update(value="ไธ‹่ผ‰ ZIP ๆช”ๆกˆ"), # Download button text
151
+ gr.update(value="่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ") # Status message default
152
  )
153
 
154
+ # English UI
155
+ return (
156
+ "Paiwan ASR Transcription & Correction System",
157
+ "Step 1: Audio Upload & Transcription",
158
+ "Step 2: Review & Edit Transcription",
159
+ "Step 3: User Information",
160
+ "Step 4: Save & Download",
161
+ gr.update(label="Audio Input"),
162
+ gr.update(value="Generate Transcript"),
163
+ gr.update(label="Original Transcription"),
164
+ gr.update(label="Corrected Transcription"),
165
+ gr.update(label="Age"),
166
+ gr.update(label="Native Paiwan Speaker?"),
167
+ gr.update(value="Save"),
168
+ gr.update(label="Save Status"),
169
+ gr.update(value="Download ZIP File"),
170
+ gr.update(value="Processing, please waitโ€ฆ"),
171
+ )
172
+
173
+
174
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
175
+ # Gradio UI
176
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
177
 
 
178
  with gr.Blocks() as demo:
179
  lang_switch = gr.Checkbox(label="ๅˆ‡ๆ›ๅˆฐ็น้ซ”ไธญๆ–‡ (Switch to Traditional Chinese)")
180
 
 
207
  download_button = gr.Button()
208
  download_output = gr.File()
209
 
210
+ # Initialize English labels
211
+ init_vals = toggle_language(False)
212
+ (title.value, step1.value, step2.value, step3.value, step4.value,
213
+ audio_input, transcribe_button, original_text, corrected_text,
214
+ age_input, native_speaker_input, save_button, save_status,
215
+ download_button, status_message_init) = init_vals
216
+
217
+ audio_input.label = "Audio Input"
218
+ transcribe_button.value = "Generate Transcript"
219
+ original_text.label = "Original Transcription"
220
+ corrected_text.label = "Corrected Transcription"
221
+ age_input.label = "Age"
222
+ native_speaker_input.label = "Native Paiwan Speaker?"
223
+ save_button.value = "Save"
224
+ save_status.label = "Save Status"
225
+ download_button.value = "Download ZIP File"
226
+ status_message.value = status_message_init.value
227
+
228
+ # Language switch โ€“ wonโ€™t overwrite component values anymore
 
 
 
 
229
  lang_switch.change(
230
  toggle_language,
231
  inputs=lang_switch,
232
+ outputs=[title, step1, step2, step3, step4,
233
+ audio_input, transcribe_button, original_text, corrected_text,
234
+ age_input, native_speaker_input, save_button, save_status,
235
+ download_button, status_message]
236
  )
237
 
238
+ # Auto transcription on upload
239
  audio_input.change(
240
  transcribe_with_status,
241
  inputs=audio_input,
242
  outputs=[original_text, corrected_text, status_message]
243
  )
244
 
245
+ # Manual transcription button
246
  transcribe_button.click(
247
  transcribe_with_status,
248
  inputs=audio_input,
249
  outputs=[original_text, corrected_text, status_message]
250
  )
251
 
252
+ # Save to Firebase
253
  save_button.click(
254
  store_correction,
255
  inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
256
+ outputs=save_status,
257
  )
258
 
259
+ # Download ZIP
260
  download_button.click(
261
  prepare_download,
262
  inputs=[audio_input, original_text, corrected_text],
263
+ outputs=download_output,
264
  )
265
 
266
  demo.launch()