hunterschep commited on
Commit
3bc7caf
ยท
verified ยท
1 Parent(s): 55f2d9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -149
app.py CHANGED
@@ -11,13 +11,13 @@ import json
11
  import tempfile
12
  import uuid
13
 
14
- # LOCAL INITIALIZATION - ONLY USE ON YOUR OWN DEVICE
15
- """
16
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
17
  cred = credentials.Certificate("serviceAccountKey.json")
18
- """
19
  # Deployed Initialization
20
- firebase_config = json.loads(os.environ.get("firebase_creds"))
21
  cred = credentials.Certificate(firebase_config)
22
 
23
  firebase_admin.initialize_app(cred, {
@@ -31,201 +31,220 @@ MODEL_NAME = "eleferrand/XLSR_paiwan"
31
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
32
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
33
 
34
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
- # Helper functions
36
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
37
 
38
- def transcribe(audio_file: str) -> str:
39
- """Run ASR on *audio_file* and return the raw transcription."""
40
  try:
41
- audio, _ = librosa.load(audio_file, sr=16_000)
42
- input_values = processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
 
43
  with torch.no_grad():
44
  logits = model(input_values).logits
45
  predicted_ids = torch.argmax(logits, dim=-1)
46
  transcription = processor.batch_decode(predicted_ids)[0]
47
  return transcription.replace("[UNK]", "")
48
  except Exception as e:
49
- return f"่™•็†ๆช”ๆกˆๆ™‚็™ผ็”Ÿ้Œฏ่ชค๏ผš{e}"
50
-
51
 
52
- def transcribe_both_and_clear(audio_file: str):
53
- """Transcribe and populate both textboxes, then hide the status message."""
54
- text = transcribe(audio_file)
55
- return text, text, gr.update(visible=False, value="")
56
 
 
 
 
 
 
 
57
 
58
- def show_processing(_):
59
- """Display a temporary โ€˜processingโ€™ notice while ASR runs."""
60
- return gr.update(visible=True, value="โณ ่ฝ‰่ญฏไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ / Transcribingโ€ฆ please waitโ€ฆ")
61
 
62
-
63
- def store_correction(original: str, corrected: str, audio_file: str, age: int, native: bool):
64
- """Save user correction + optional audio to Firebase."""
65
  try:
66
- audio_meta, audio_url = {}, None
 
 
 
67
  if audio_file and os.path.exists(audio_file):
68
- audio, sr = librosa.load(audio_file, sr=44_100)
69
- audio_meta = {
70
- "duration": librosa.get_duration(y=audio, sr=sr),
71
- "file_size": os.path.getsize(audio_file),
72
- }
73
- uid = f"{uuid.uuid4()}.wav"
74
- path = f"audio/pai/{uid}"
75
- blob = bucket.blob(path)
 
 
 
76
  blob.upload_from_filename(audio_file)
77
- audio_url = blob.generate_signed_url(expiration=timedelta(hours=1))
78
 
79
- payload = {
80
- "transcription_info": {
81
- "original_text": original,
82
- "corrected_text": corrected,
83
- "language": "pai",
 
 
 
84
  },
85
- "audio_data": {"metadata": audio_meta, "url": audio_url},
86
- "user_info": {"native_paiwan_speaker": native, "age": age},
87
- "timestamp": datetime.now().isoformat(),
88
- "model_name": MODEL_NAME,
 
 
 
 
 
 
89
  }
90
- db.collection("paiwan_transcriptions").add(payload)
91
- return "ไฟฎๆญฃๅทฒๆˆๅŠŸๅ„ฒๅญ˜๏ผ(Correction saved successfully!)"
 
92
  except Exception as e:
93
- return f"ๅ„ฒๅญ˜ๅคฑๆ•—๏ผš{e} (Error saving correction: {e})"
94
 
95
 
96
- def prepare_download(audio_file: str, original: str, corrected: str):
97
- """Package audio + transcriptions into a ZIP and return its path."""
98
- if not audio_file:
99
  return None
100
 
101
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
102
- tmp.close()
103
- with zipfile.ZipFile(tmp.name, "w") as zf:
104
  if os.path.exists(audio_file):
105
  zf.write(audio_file, arcname="audio.wav")
106
- for name, content in {
107
- "original_transcription.txt": original,
108
- "corrected_transcription.txt": corrected,
109
- }.items():
110
- with open(name, "w", encoding="utf-8") as fh:
111
- fh.write(content)
112
- zf.write(name, arcname=name)
113
- os.remove(name)
114
- return tmp.name
115
-
116
-
117
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
118
- # Language toggle helpers
119
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
120
-
121
- # Each tuple contains (Markdown value/label, update kwargs) in the same order as *components*
122
- zh_TW_labels = (
123
- ("ๆŽ’็ฃ่ชž่ชž้Ÿณ่พจ่ญ˜่ˆ‡้€ๅญ—็จฟไฟฎๆญฃ็ณป็ตฑ", {"value": "ๆŽ’็ฃ่ชž่ชž้Ÿณ่พจ่ญ˜่ˆ‡้€ๅญ—็จฟไฟฎๆญฃ็ณป็ตฑ"}),
124
- ("ๆญฅ้ฉŸไธ€๏ผšไธŠๅ‚ณ้Ÿณ่จŠ๏ผˆ็ณป็ตฑๅฐ‡่‡ชๅ‹•่ฝ‰่ญฏ๏ผ‰", {"value": "ๆญฅ้ฉŸไธ€๏ผšไธŠๅ‚ณ้Ÿณ่จŠ๏ผˆ็ณป็ตฑๅฐ‡่‡ชๅ‹•่ฝ‰่ญฏ๏ผ‰"}),
125
- ("ๆญฅ้ฉŸไบŒ๏ผšๆชข้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ", {"value": "ๆญฅ้ฉŸไบŒ๏ผšๆชข้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ"}),
126
- ("ๆญฅ้ฉŸไธ‰๏ผšไฝฟ็”จ่€…่ณ‡่จŠ", {"value": "ๆญฅ้ฉŸไธ‰๏ผšไฝฟ็”จ่€…่ณ‡่จŠ"}),
127
- ("ๆญฅ้ฉŸๅ››๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰", {"value": "ๆญฅ้ฉŸๅ››๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰"}),
128
- (None, {"label": "้Ÿณ่จŠๆช”ๆกˆ"}),
129
- (None, {"label": "ๅŽŸๅง‹้€ๅญ—็จฟ"}),
130
- (None, {"label": "ไฟฎๆญฃ็‰ˆ้€ๅญ—็จฟ"}),
131
- (None, {"label": "ๅนด้ฝก"}),
132
- (None, {"label": "ๆ˜ฏๅฆ็‚บๆŽ’็ฃ่ชžๆฏ่ชž่€…๏ผŸ"}),
133
- (None, {"value": "ๅ„ฒๅญ˜ไฟฎๆญฃ"}),
134
- (None, {"label": "ๅ„ฒๅญ˜็‹€ๆ…‹"}),
135
- (None, {"value": "ไธ‹่ผ‰ ZIP ๆช”"}),
136
- ("", {"value": ""}),
137
- )
138
-
139
- en_labels = (
140
- ("Paiwan ASR Transcription & Correction System", {"value": "Paiwan ASR Transcription & Correction System"}),
141
- ("Step 1: Upload Audio (autoโ€‘transcription)", {"value": "Step 1: Upload Audio (autoโ€‘transcription)"}),
142
- ("Step 2: Review & Edit Transcription", {"value": "Step 2: Review & Edit Transcription"}),
143
- ("Step 3: User Information", {"value": "Step 3: User Information"}),
144
- ("Step 4: Save & Download", {"value": "Step 4: Save & Download"}),
145
- (None, {"label": "Audio File"}),
146
- (None, {"label": "Original Transcription"}),
147
- (None, {"label": "Corrected Transcription"}),
148
- (None, {"label": "Age"}),
149
- (None, {"label": "Native Paiwan Speaker?"}),
150
- (None, {"value": "Save Correction"}),
151
- (None, {"label": "Save Status"}),
152
- (None, {"value": "Download ZIP File"}),
153
- ("", {"value": ""}),
154
- )
155
-
156
-
157
- def toggle_language(switch: bool):
158
- """Return gr.update objects in correct order for all UI components."""
159
- labels = zh_TW_labels if switch else en_labels
160
- updates = []
161
- for comp_text, kwargs in labels:
162
- # Markdown components expect value update; others use kwargs
163
- if comp_text is not None and "value" not in kwargs:
164
- kwargs["value"] = comp_text
165
- updates.append(gr.update(**kwargs))
166
- return tuple(updates)
167
-
168
-
169
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
170
  # Interface
171
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
172
  with gr.Blocks() as demo:
173
  lang_switch = gr.Checkbox(label="ๅˆ‡ๆ›ๅˆฐ็น้ซ”ไธญๆ–‡ (Switch to Traditional Chinese)")
174
 
175
- # Component stubs (order matters!)
176
  title = gr.Markdown()
177
  step1 = gr.Markdown()
178
- step2 = gr.Markdown()
179
- step3 = gr.Markdown()
180
- step4 = gr.Markdown()
181
 
182
- audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath")
183
- original_text = gr.Textbox(interactive=False, lines=5)
184
- corrected_text = gr.Textbox(interactive=True, lines=5)
185
 
186
- age_input = gr.Slider(minimum=0, maximum=100, step=1, value=25)
187
- native_speaker_input = gr.Checkbox(value=True)
188
 
189
- save_button = gr.Button()
190
- save_status = gr.Textbox(interactive=False)
191
- download_button = gr.Button()
192
- status_msg = gr.Markdown(visible=False)
193
 
194
- components = [
195
- title, step1, step2, step3, step4,
196
- audio_input, original_text, corrected_text,
197
- age_input, native_speaker_input,
198
- save_button, save_status, download_button,
199
- status_msg,
200
- ]
201
 
202
- # Initialise text in English
203
- initial_updates = toggle_language(False)
204
- for comp, upd in zip(components, initial_updates):
205
- comp.update(**upd)
206
 
207
- # Language toggle
208
- lang_switch.change(toggle_language, inputs=lang_switch, outputs=components)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- # Autoโ€‘transcribe when audio added
211
- audio_input.change(show_processing, inputs=audio_input, outputs=status_msg)
212
  audio_input.change(
213
- transcribe_both_and_clear,
 
 
 
 
 
 
 
214
  inputs=audio_input,
215
- outputs=[original_text, corrected_text, status_msg],
216
  )
217
 
218
- # Save & download actions
219
  save_button.click(
220
  store_correction,
221
  inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
222
- outputs=save_status,
223
  )
224
 
225
  download_button.click(
226
  prepare_download,
227
  inputs=[audio_input, original_text, corrected_text],
228
- outputs=download_output,
229
  )
230
 
231
  demo.launch()
 
11
  import tempfile
12
  import uuid
13
 
14
+ # LOCAL INITIALIZATION - ONLY USE ON YOUR OWN DEVICE
15
+ '''
16
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
17
  cred = credentials.Certificate("serviceAccountKey.json")
18
+ '''
19
  # Deployed Initialization
20
+ firebase_config = json.loads(os.environ.get('firebase_creds'))
21
  cred = credentials.Certificate(firebase_config)
22
 
23
  firebase_admin.initialize_app(cred, {
 
31
  processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
32
  model = AutoModelForCTC.from_pretrained(MODEL_NAME)
33
 
 
 
 
34
 
35
+ def transcribe(audio_file):
 
36
  try:
37
+ audio, rate = librosa.load(audio_file, sr=16000)
38
+ input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
39
+
40
  with torch.no_grad():
41
  logits = model(input_values).logits
42
  predicted_ids = torch.argmax(logits, dim=-1)
43
  transcription = processor.batch_decode(predicted_ids)[0]
44
  return transcription.replace("[UNK]", "")
45
  except Exception as e:
46
+ return f"่™•็†ๆ–‡ไปถ้Œฏ่ชค: {e}"
 
47
 
 
 
 
 
48
 
49
+ def transcribe_with_status(audio_file):
50
+ if audio_file is None:
51
+ return "", "", "่ซ‹ๅ…ˆไธŠๅ‚ณ้Ÿณ่จŠ (Please upload audio first)"
52
+ status = "่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ (Processing, please waitโ€ฆ)"
53
+ transcription = transcribe(audio_file)
54
+ return transcription, transcription, "ๅฎŒๆˆ๏ผ(Done!)"
55
 
 
 
 
56
 
57
+ def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
 
 
58
  try:
59
+ audio_metadata = {}
60
+ audio_file_url = None
61
+
62
+ # If an audio file is provided, upload it to Firebase Storage
63
  if audio_file and os.path.exists(audio_file):
64
+ audio, sr = librosa.load(audio_file, sr=44100)
65
+ duration = librosa.get_duration(y=audio, sr=sr)
66
+ file_size = os.path.getsize(audio_file)
67
+ audio_metadata = {'duration': duration, 'file_size': file_size}
68
+
69
+ # Generate a unique identifier for the audio file
70
+ unique_id = str(uuid.uuid4())
71
+ destination_path = f"audio/pai/{unique_id}.wav"
72
+
73
+ # Create a blob and upload the file
74
+ blob = bucket.blob(destination_path)
75
  blob.upload_from_filename(audio_file)
 
76
 
77
+ # Generate a signed download URL valid for 1 hour (adjust expiration as needed)
78
+ audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
79
+
80
+ combined_data = {
81
+ 'transcription_info': {
82
+ 'original_text': original_transcription,
83
+ 'corrected_text': corrected_transcription,
84
+ 'language': 'pai',
85
  },
86
+ 'audio_data': {
87
+ 'audio_metadata': audio_metadata,
88
+ 'audio_file_url': audio_file_url,
89
+ },
90
+ 'user_info': {
91
+ 'native_paiwan_speaker': native_speaker,
92
+ 'age': age
93
+ },
94
+ 'timestamp': datetime.now().isoformat(),
95
+ 'model_name': MODEL_NAME
96
  }
97
+ # Save data to a collection for that language
98
+ db.collection('paiwan_transcriptions').add(combined_data)
99
+ return "ๆ กๆญฃไฟๅญ˜ๆˆๅŠŸ! (Correction saved successfully!)"
100
  except Exception as e:
101
+ return f"ไฟๅญ˜ๅคฑ่ดฅ: {e} (Error saving correction: {e})"
102
 
103
 
104
+ def prepare_download(audio_file, original_transcription, corrected_transcription):
105
+ if audio_file is None:
 
106
  return None
107
 
108
+ tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
109
+ tmp_zip.close()
110
+ with zipfile.ZipFile(tmp_zip.name, "w") as zf:
111
  if os.path.exists(audio_file):
112
  zf.write(audio_file, arcname="audio.wav")
113
+
114
+ orig_txt = "original_transcription.txt"
115
+ with open(orig_txt, "w", encoding="utf-8") as f:
116
+ f.write(original_transcription)
117
+ zf.write(orig_txt, arcname="original_transcription.txt")
118
+ os.remove(orig_txt)
119
+
120
+ corr_txt = "corrected_transcription.txt"
121
+ with open(corr_txt, "w", encoding="utf-8") as f:
122
+ f.write(corrected_transcription)
123
+ zf.write(corr_txt, arcname="corrected_transcription.txt")
124
+ os.remove(corr_txt)
125
+ return tmp_zip.name
126
+
127
+
128
+ def toggle_language(switch):
129
+ """Switch UI text between English and Traditional Chinese"""
130
+ if switch:
131
+ return (
132
+ "ๆŽ’็ฃ่ชž่‡ชๅ‹•่ชž้Ÿณ่ญ˜ๅˆฅ้€ๅญ—็จฟ่ˆ‡ไฟฎๆญฃ็ณป็ตฑ", # Title
133
+ "ๆญฅ้ฉŸ 1๏ผš้Ÿณ่จŠไธŠๅ‚ณ่ˆ‡้€ๅญ—็จฟ", # Step 1
134
+ "ๆญฅ้ฉŸ 2๏ผšๅฏฉ้–ฑ่ˆ‡็ทจ่ผฏ้€ๅญ—็จฟ", # Step 2
135
+ "ๆญฅ้ฉŸ 3๏ผšไฝฟ็”จ่€…่ณ‡่จŠ", # Step 3
136
+ "ๆญฅ้ฉŸ 4๏ผšๅ„ฒๅญ˜่ˆ‡ไธ‹่ผ‰", # Step 4
137
+ "้Ÿณ่จŠ่ผธๅ…ฅ", "็”ข็”Ÿ้€ๅญ—็จฟ", # Audio label, Transcribe button
138
+ "ๅŽŸๅง‹้€ๅญ—็จฟ", "ๆ›ดๆญฃ้€ๅญ—็จฟ", # Textboxes
139
+ "ๅนด้ฝก", "ๆฏ่ชžๆŽ’็ฃ่ชžไฝฟ็”จ่€…?", # Age, Native speaker?
140
+ "ๅ„ฒๅญ˜", "ๅ„ฒๅญ˜็‹€ๆ…‹", # Save button, Save status
141
+ "ไธ‹่ผ‰ ZIP ๆช”ๆกˆ", # Download button
142
+ "่™•็†ไธญ๏ผŒ่ซ‹็จๅ€™โ€ฆ" # Status message default (Chinese)
143
+ )
144
+ else:
145
+ return (
146
+ "Paiwan ASR Transcription & Correction System",
147
+ "Step 1: Audio Upload & Transcription",
148
+ "Step 2: Review & Edit Transcription",
149
+ "Step 3: User Information",
150
+ "Step 4: Save & Download",
151
+ "Audio Input", "Generate Transcript",
152
+ "Original Transcription", "Corrected Transcription",
153
+ "Age", "Native Paiwan Speaker?",
154
+ "Save", "Save Status",
155
+ "Download ZIP File",
156
+ "Processing, please waitโ€ฆ"
157
+ )
158
+
159
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  # Interface
 
161
  with gr.Blocks() as demo:
162
  lang_switch = gr.Checkbox(label="ๅˆ‡ๆ›ๅˆฐ็น้ซ”ไธญๆ–‡ (Switch to Traditional Chinese)")
163
 
 
164
  title = gr.Markdown()
165
  step1 = gr.Markdown()
 
 
 
166
 
167
+ with gr.Row():
168
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath")
 
169
 
170
+ status_message = gr.Markdown(visible=False)
 
171
 
172
+ step2 = gr.Markdown()
173
+ with gr.Row():
174
+ transcribe_button = gr.Button()
 
175
 
176
+ original_text = gr.Textbox(interactive=False, lines=5)
177
+ corrected_text = gr.Textbox(interactive=True, lines=5)
 
 
 
 
 
178
 
179
+ step3 = gr.Markdown()
180
+ with gr.Row():
181
+ age_input = gr.Slider(minimum=0, maximum=100, step=1, value=25)
182
+ native_speaker_input = gr.Checkbox(value=True)
183
 
184
+ step4 = gr.Markdown()
185
+ with gr.Row():
186
+ save_button = gr.Button()
187
+ save_status = gr.Textbox(interactive=False)
188
+
189
+ with gr.Row():
190
+ download_button = gr.Button()
191
+ download_output = gr.File()
192
+
193
+ # Initialize UI texts according to default language (English)
194
+ (title_text, step1_text, step2_text, step3_text, step4_text,
195
+ audio_label, transcribe_label, orig_label, corr_label,
196
+ age_label, native_label, save_label, save_status_label,
197
+ download_label, default_status) = toggle_language(False)
198
+
199
+ title.value = title_text
200
+ step1.value = step1_text
201
+ step2.value = step2_text
202
+ step3.value = step3_text
203
+ step4.value = step4_text
204
+ audio_input.label = audio_label
205
+ transcribe_button.value = transcribe_label
206
+ original_text.label = orig_label
207
+ corrected_text.label = corr_label
208
+ age_input.label = age_label
209
+ native_speaker_input.label = native_label
210
+ save_button.value = save_label
211
+ save_status.label = save_status_label
212
+ download_button.value = download_label
213
+ status_message.value = default_status
214
+
215
+ # Language switcher
216
+ lang_switch.change(
217
+ toggle_language,
218
+ inputs=lang_switch,
219
+ outputs=[title, step1, step2, step3, step4, audio_input, transcribe_button,
220
+ original_text, corrected_text, age_input, native_speaker_input,
221
+ save_button, save_status, download_button, status_message]
222
+ )
223
 
224
+ # Automatic transcription on audio upload
 
225
  audio_input.change(
226
+ transcribe_with_status,
227
+ inputs=audio_input,
228
+ outputs=[original_text, corrected_text, status_message]
229
+ )
230
+
231
+ # Manual transcription button (optional)
232
+ transcribe_button.click(
233
+ transcribe_with_status,
234
  inputs=audio_input,
235
+ outputs=[original_text, corrected_text, status_message]
236
  )
237
 
 
238
  save_button.click(
239
  store_correction,
240
  inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
241
+ outputs=save_status
242
  )
243
 
244
  download_button.click(
245
  prepare_download,
246
  inputs=[audio_input, original_text, corrected_text],
247
+ outputs=download_output
248
  )
249
 
250
  demo.launch()