Spaces:

FormosonBankDemos
/

paiwan_transcription

Sleeping

App Files Files Community

hunterschep commited on Apr 27

Commit

df1c6c9

verified ·

1 Parent(s): 3bc7caf

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -110

app.py CHANGED Viewed

@@ -32,11 +32,15 @@ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 model = AutoModelForCTC.from_pretrained(MODEL_NAME)
-def transcribe(audio_file):
     try:
-        audio, rate = librosa.load(audio_file, sr=16000)
         input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
@@ -47,117 +51,130 @@ def transcribe(audio_file):
 def transcribe_with_status(audio_file):
     if audio_file is None:
-        return "", "", "請先上傳音訊 (Please upload audio first)"
-    status = "處理中，請稍候… (Processing, please wait…)"
     transcription = transcribe(audio_file)
-    return transcription, transcription, "完成！(Done!)"
 def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
     try:
-        audio_metadata = {}
-        audio_file_url = None
-        # If an audio file is provided, upload it to Firebase Storage
         if audio_file and os.path.exists(audio_file):
             audio, sr = librosa.load(audio_file, sr=44100)
-            duration = librosa.get_duration(y=audio, sr=sr)
-            file_size = os.path.getsize(audio_file)
-            audio_metadata = {'duration': duration, 'file_size': file_size}
-            # Generate a unique identifier for the audio file
-            unique_id = str(uuid.uuid4())
-            destination_path = f"audio/pai/{unique_id}.wav"
-            # Create a blob and upload the file
-            blob = bucket.blob(destination_path)
             blob.upload_from_filename(audio_file)
-            # Generate a signed download URL valid for 1 hour (adjust expiration as needed)
             audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
-        combined_data = {
-            'transcription_info': {
-                'original_text': original_transcription,
-                'corrected_text': corrected_transcription,
-                'language': 'pai',
             },
-            'audio_data': {
-                'audio_metadata': audio_metadata,
-                'audio_file_url': audio_file_url,
             },
-            'user_info': {
-                'native_paiwan_speaker': native_speaker,
-                'age': age
             },
-            'timestamp': datetime.now().isoformat(),
-            'model_name': MODEL_NAME
-        }
-        # Save data to a collection for that language
-        db.collection('paiwan_transcriptions').add(combined_data)
         return "校正保存成功! (Correction saved successfully!)"
     except Exception as e:
         return f"保存失败: {e} (Error saving correction: {e})"
 def prepare_download(audio_file, original_transcription, corrected_transcription):
     if audio_file is None:
         return None
-    tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
-    tmp_zip.close()
-    with zipfile.ZipFile(tmp_zip.name, "w") as zf:
         if os.path.exists(audio_file):
             zf.write(audio_file, arcname="audio.wav")
-        orig_txt = "original_transcription.txt"
-        with open(orig_txt, "w", encoding="utf-8") as f:
-            f.write(original_transcription)
-        zf.write(orig_txt, arcname="original_transcription.txt")
-        os.remove(orig_txt)
-        corr_txt = "corrected_transcription.txt"
-        with open(corr_txt, "w", encoding="utf-8") as f:
-            f.write(corrected_transcription)
-        zf.write(corr_txt, arcname="corrected_transcription.txt")
-        os.remove(corr_txt)
-    return tmp_zip.name
-def toggle_language(switch):
-    """Switch UI text between English and Traditional Chinese"""
-    if switch:
-        return (
-            "排灣語自動語音識別逐字稿與修正系統",  # Title
-            "步驟 1：音訊上傳與逐字稿",            # Step 1
-            "步驟 2：審閱與編輯逐字稿",            # Step 2
-            "步驟 3：使用者資訊",                # Step 3
-            "步驟 4：儲存與下載",                # Step 4
-            "音訊輸入", "產生逐字稿",             # Audio label, Transcribe button
-            "原始逐字稿", "更正逐字稿",          # Textboxes
-            "年齡", "母語排灣語使用者?",       # Age, Native speaker?
-            "儲存", "儲存狀態",                 # Save button, Save status
-            "下載 ZIP 檔案",                   # Download button
-            "處理中，請稍候…"                  # Status message default (Chinese)
-        )
-    else:
         return (
-            "Paiwan ASR Transcription & Correction System",
-            "Step 1: Audio Upload & Transcription",
-            "Step 2: Review & Edit Transcription",
-            "Step 3: User Information",
-            "Step 4: Save & Download",
-            "Audio Input", "Generate Transcript",
-            "Original Transcription", "Corrected Transcription",
-            "Age", "Native Paiwan Speaker?",
-            "Save", "Save Status",
-            "Download ZIP File",
-            "Processing, please wait…"
         )
-# Interface
 with gr.Blocks() as demo:
     lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
@@ -190,61 +207,60 @@ with gr.Blocks() as demo:
         download_button = gr.Button()
         download_output = gr.File()
-    # Initialize UI texts according to default language (English)
-    (title_text, step1_text, step2_text, step3_text, step4_text,
-     audio_label, transcribe_label, orig_label, corr_label,
-     age_label, native_label, save_label, save_status_label,
-     download_label, default_status) = toggle_language(False)
-    title.value = title_text
-    step1.value = step1_text
-    step2.value = step2_text
-    step3.value = step3_text
-    step4.value = step4_text
-    audio_input.label = audio_label
-    transcribe_button.value = transcribe_label
-    original_text.label = orig_label
-    corrected_text.label = corr_label
-    age_input.label = age_label
-    native_speaker_input.label = native_label
-    save_button.value = save_label
-    save_status.label = save_status_label
-    download_button.value = download_label
-    status_message.value = default_status
-    # Language switcher
     lang_switch.change(
         toggle_language,
         inputs=lang_switch,
-        outputs=[title, step1, step2, step3, step4, audio_input, transcribe_button,
-                 original_text, corrected_text, age_input, native_speaker_input,
-                 save_button, save_status, download_button, status_message]
     )
-    # Automatic transcription on audio upload
     audio_input.change(
         transcribe_with_status,
         inputs=audio_input,
         outputs=[original_text, corrected_text, status_message]
     )
-    # Manual transcription button (optional)
     transcribe_button.click(
         transcribe_with_status,
         inputs=audio_input,
         outputs=[original_text, corrected_text, status_message]
     )
     save_button.click(
         store_correction,
         inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
-        outputs=save_status
     )
     download_button.click(
         prepare_download,
         inputs=[audio_input, original_text, corrected_text],
-        outputs=download_output
     )
 demo.launch()

 model = AutoModelForCTC.from_pretrained(MODEL_NAME)
+# ────────────────────────────────
+# Core ASR helper functions
+# ────────────────────────────────
+def transcribe(audio_file: str):
+    """Run ASR on the uploaded audio file and return the raw transcription."""
     try:
+        audio, _ = librosa.load(audio_file, sr=16000)
         input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
 def transcribe_with_status(audio_file):
+    """Wrapper that provides UI‑friendly status messages."""
     if audio_file is None:
+        return "", "", gr.update(value="請先上傳音訊 (Please upload audio first)", visible=True)
+    # Show processing message first
+    processing_msg = gr.update(value="處理中，請稍候… (Processing, please wait…)", visible=True)
     transcription = transcribe(audio_file)
+    done_msg = gr.update(value="完成！(Done!)", visible=True)
+    return transcription, transcription, done_msg
+# ────────────────────────────────
+# Firebase helpers
+# ────────────────────────────────
 def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
+    """Upload audio (if provided) + transcription pair to Firestore/Storage."""
     try:
+        audio_metadata, audio_file_url = {}, None
         if audio_file and os.path.exists(audio_file):
             audio, sr = librosa.load(audio_file, sr=44100)
+            audio_metadata = {
+                "duration": librosa.get_duration(y=audio, sr=sr),
+                "file_size": os.path.getsize(audio_file),
+            }
+            uid = str(uuid.uuid4())
+            dst = f"audio/pai/{uid}.wav"
+            blob = bucket.blob(dst)
             blob.upload_from_filename(audio_file)
             audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
+        db.collection("paiwan_transcriptions").add({
+            "transcription_info": {
+                "original_text": original_transcription,
+                "corrected_text": corrected_transcription,
+                "language": "pai",
             },
+            "audio_data": {
+                "audio_metadata": audio_metadata,
+                "audio_file_url": audio_file_url,
             },
+            "user_info": {
+                "native_paiwan_speaker": native_speaker,
+                "age": age,
             },
+            "timestamp": datetime.now().isoformat(),
+            "model_name": MODEL_NAME,
+        })
         return "校正保存成功! (Correction saved successfully!)"
     except Exception as e:
         return f"保存失败: {e} (Error saving correction: {e})"
 def prepare_download(audio_file, original_transcription, corrected_transcription):
+    """Bundle audio + TXT files into a ZIP for download."""
     if audio_file is None:
         return None
+    tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip").name
+    with zipfile.ZipFile(tmp_zip, "w") as zf:
         if os.path.exists(audio_file):
             zf.write(audio_file, arcname="audio.wav")
+        for name, content in [
+            ("original_transcription.txt", original_transcription),
+            ("corrected_transcription.txt", corrected_transcription),
+        ]:
+            with open(name, "w", encoding="utf-8") as f:
+                f.write(content)
+            zf.write(name, arcname=name)
+            os.remove(name)
+    return tmp_zip
+# ────────────────────────────────
+# Dynamic label switching – uses gr.update() so values aren’t overwritten
+# ────────────────────────────────
+def toggle_language(switch: bool):
+    """Return a tuple of updates for each UI component when the language toggle flips."""
+    if switch:  # Traditional Chinese UI
         return (
+            "排灣語自動語音識別逐字稿與修正系統",  # Title (Markdown)
+            "步驟 1：音訊上傳與逐字稿",           # Step 1 (Markdown)
+            "步驟 2：審閱與編輯逐字稿",           # Step 2 (Markdown)
+            "步驟 3：使用者資訊",               # Step 3 (Markdown)
+            "步驟 4：儲存與下載",               # Step 4 (Markdown)
+            gr.update(label="音訊輸入"),          # Audio component label
+            gr.update(value="產生逐字稿"),        # Transcribe button text
+            gr.update(label="原始逐字稿"),        # Original transcription textbox label
+            gr.update(label="更正逐字稿"),        # Corrected transcription textbox label
+            gr.update(label="年齡"),              # Age slider label
+            gr.update(label="母語排灣語使用者?"),  # Native speaker checkbox label
+            gr.update(value="儲存"),              # Save button text
+            gr.update(label="儲存狀態"),          # Save‑status textbox label
+            gr.update(value="下載 ZIP 檔案"),      # Download button text
+            gr.update(value="處理中，請稍候…")     # Status message default
         )
+    # English UI
+    return (
+        "Paiwan ASR Transcription & Correction System",
+        "Step 1: Audio Upload & Transcription",
+        "Step 2: Review & Edit Transcription",
+        "Step 3: User Information",
+        "Step 4: Save & Download",
+        gr.update(label="Audio Input"),
+        gr.update(value="Generate Transcript"),
+        gr.update(label="Original Transcription"),
+        gr.update(label="Corrected Transcription"),
+        gr.update(label="Age"),
+        gr.update(label="Native Paiwan Speaker?"),
+        gr.update(value="Save"),
+        gr.update(label="Save Status"),
+        gr.update(value="Download ZIP File"),
+        gr.update(value="Processing, please wait…"),
+    )
+# ────────────────────────────────
+# Gradio UI
+# ────────────────────────────────
 with gr.Blocks() as demo:
     lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
         download_button = gr.Button()
         download_output = gr.File()
+    # Initialize English labels
+    init_vals = toggle_language(False)
+    (title.value, step1.value, step2.value, step3.value, step4.value,
+     audio_input, transcribe_button, original_text, corrected_text,
+     age_input, native_speaker_input, save_button, save_status,
+     download_button, status_message_init) = init_vals
+    audio_input.label = "Audio Input"
+    transcribe_button.value = "Generate Transcript"
+    original_text.label = "Original Transcription"
+    corrected_text.label = "Corrected Transcription"
+    age_input.label = "Age"
+    native_speaker_input.label = "Native Paiwan Speaker?"
+    save_button.value = "Save"
+    save_status.label = "Save Status"
+    download_button.value = "Download ZIP File"
+    status_message.value = status_message_init.value
+    # Language switch – won’t overwrite component values anymore
     lang_switch.change(
         toggle_language,
         inputs=lang_switch,
+        outputs=[title, step1, step2, step3, step4,
+                 audio_input, transcribe_button, original_text, corrected_text,
+                 age_input, native_speaker_input, save_button, save_status,
+                 download_button, status_message]
     )
+    # Auto transcription on upload
     audio_input.change(
         transcribe_with_status,
         inputs=audio_input,
         outputs=[original_text, corrected_text, status_message]
     )
+    # Manual transcription button
     transcribe_button.click(
         transcribe_with_status,
         inputs=audio_input,
         outputs=[original_text, corrected_text, status_message]
     )
+    # Save to Firebase
     save_button.click(
         store_correction,
         inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
+        outputs=save_status,
     )
+    # Download ZIP
     download_button.click(
         prepare_download,
         inputs=[audio_input, original_text, corrected_text],
+        outputs=download_output,
     )
 demo.launch()