Spaces:

FormosonBankDemos
/

paiwan_transcription

Sleeping

App Files Files Community

hunterschep commited on Apr 27

Commit

3bc7caf

verified ·

1 Parent(s): 55f2d9c

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -149

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ import json
 import tempfile
 import uuid
-# LOCAL INITIALIZATION - ONLY USE ON YOUR OWN DEVICE
-"""
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 cred = credentials.Certificate("serviceAccountKey.json")
-"""
 # Deployed Initialization
-firebase_config = json.loads(os.environ.get("firebase_creds"))
 cred = credentials.Certificate(firebase_config)
 firebase_admin.initialize_app(cred, {
@@ -31,201 +31,220 @@ MODEL_NAME = "eleferrand/XLSR_paiwan"
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 model = AutoModelForCTC.from_pretrained(MODEL_NAME)
-# ──────────────────────────────────────────────────────────────────────────────
-# Helper functions
-# ──────────────────────────────────────────────────────────────────────────────
-def transcribe(audio_file: str) -> str:
-    """Run ASR on *audio_file* and return the raw transcription."""
     try:
-        audio, _ = librosa.load(audio_file, sr=16_000)
-        input_values = processor(audio, sampling_rate=16_000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.batch_decode(predicted_ids)[0]
         return transcription.replace("[UNK]", "")
     except Exception as e:
-        return f"處理檔案時發生錯誤：{e}"
-def transcribe_both_and_clear(audio_file: str):
-    """Transcribe and populate both textboxes, then hide the status message."""
-    text = transcribe(audio_file)
-    return text, text, gr.update(visible=False, value="")
-def show_processing(_):
-    """Display a temporary ‘processing’ notice while ASR runs."""
-    return gr.update(visible=True, value="⏳ 轉譯中，請稍候… / Transcribing… please wait…")
-def store_correction(original: str, corrected: str, audio_file: str, age: int, native: bool):
-    """Save user correction + optional audio to Firebase."""
     try:
-        audio_meta, audio_url = {}, None
         if audio_file and os.path.exists(audio_file):
-            audio, sr = librosa.load(audio_file, sr=44_100)
-            audio_meta = {
-                "duration": librosa.get_duration(y=audio, sr=sr),
-                "file_size": os.path.getsize(audio_file),
-            }
-            uid = f"{uuid.uuid4()}.wav"
-            path = f"audio/pai/{uid}"
-            blob = bucket.blob(path)
             blob.upload_from_filename(audio_file)
-            audio_url = blob.generate_signed_url(expiration=timedelta(hours=1))
-        payload = {
-            "transcription_info": {
-                "original_text": original,
-                "corrected_text": corrected,
-                "language": "pai",
             },
-            "audio_data": {"metadata": audio_meta, "url": audio_url},
-            "user_info": {"native_paiwan_speaker": native, "age": age},
-            "timestamp": datetime.now().isoformat(),
-            "model_name": MODEL_NAME,
         }
-        db.collection("paiwan_transcriptions").add(payload)
-        return "修正已成功儲存！(Correction saved successfully!)"
     except Exception as e:
-        return f"儲存失敗：{e} (Error saving correction: {e})"
-def prepare_download(audio_file: str, original: str, corrected: str):
-    """Package audio + transcriptions into a ZIP and return its path."""
-    if not audio_file:
         return None
-    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
-    tmp.close()
-    with zipfile.ZipFile(tmp.name, "w") as zf:
         if os.path.exists(audio_file):
             zf.write(audio_file, arcname="audio.wav")
-        for name, content in {
-            "original_transcription.txt": original,
-            "corrected_transcription.txt": corrected,
-        }.items():
-            with open(name, "w", encoding="utf-8") as fh:
-                fh.write(content)
-            zf.write(name, arcname=name)
-            os.remove(name)
-    return tmp.name
-# ──────────────────────────────────────────────────────────────────────────────
-# Language toggle helpers
-# ──────────────────────────────────────────────────────────────────────────────
-# Each tuple contains (Markdown value/label, update kwargs) in the same order as *components*
-zh_TW_labels = (
-    ("排灣語語音辨識與逐字稿修正系統", {"value": "排灣語語音辨識與逐字稿修正系統"}),
-    ("步驟一：上傳音訊（系統將自動轉譯）", {"value": "步驟一：上傳音訊（系統將自動轉譯）"}),
-    ("步驟二：檢閱與編輯逐字稿", {"value": "步驟二：檢閱與編輯逐字稿"}),
-    ("步驟三：使用者資訊", {"value": "步驟三：使用者資訊"}),
-    ("步驟四：儲存與下載", {"value": "步驟四：儲存與下載"}),
-    (None, {"label": "音訊檔案"}),
-    (None, {"label": "原始逐字稿"}),
-    (None, {"label": "修正版逐字稿"}),
-    (None, {"label": "年齡"}),
-    (None, {"label": "是否為排灣語母語者？"}),
-    (None, {"value": "儲存修正"}),
-    (None, {"label": "儲存狀態"}),
-    (None, {"value": "下載 ZIP 檔"}),
-    ("", {"value": ""}),
-)
-en_labels = (
-    ("Paiwan ASR Transcription & Correction System", {"value": "Paiwan ASR Transcription & Correction System"}),
-    ("Step 1: Upload Audio (auto‑transcription)", {"value": "Step 1: Upload Audio (auto‑transcription)"}),
-    ("Step 2: Review & Edit Transcription", {"value": "Step 2: Review & Edit Transcription"}),
-    ("Step 3: User Information", {"value": "Step 3: User Information"}),
-    ("Step 4: Save & Download", {"value": "Step 4: Save & Download"}),
-    (None, {"label": "Audio File"}),
-    (None, {"label": "Original Transcription"}),
-    (None, {"label": "Corrected Transcription"}),
-    (None, {"label": "Age"}),
-    (None, {"label": "Native Paiwan Speaker?"}),
-    (None, {"value": "Save Correction"}),
-    (None, {"label": "Save Status"}),
-    (None, {"value": "Download ZIP File"}),
-    ("", {"value": ""}),
-)
-def toggle_language(switch: bool):
-    """Return gr.update objects in correct order for all UI components."""
-    labels = zh_TW_labels if switch else en_labels
-    updates = []
-    for comp_text, kwargs in labels:
-        # Markdown components expect value update; others use kwargs
-        if comp_text is not None and "value" not in kwargs:
-            kwargs["value"] = comp_text
-        updates.append(gr.update(**kwargs))
-    return tuple(updates)
-# ──────────────────────────────────────────────────────────────────────────────
 # Interface
-# ──────────────────────────────────────────────────────────────────────────────
 with gr.Blocks() as demo:
     lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
-    # Component stubs (order matters!)
     title = gr.Markdown()
     step1 = gr.Markdown()
-    step2 = gr.Markdown()
-    step3 = gr.Markdown()
-    step4 = gr.Markdown()
-    audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath")
-    original_text = gr.Textbox(interactive=False, lines=5)
-    corrected_text = gr.Textbox(interactive=True, lines=5)
-    age_input = gr.Slider(minimum=0, maximum=100, step=1, value=25)
-    native_speaker_input = gr.Checkbox(value=True)
-    save_button = gr.Button()
-    save_status = gr.Textbox(interactive=False)
-    download_button = gr.Button()
-    status_msg = gr.Markdown(visible=False)
-    components = [
-        title, step1, step2, step3, step4,
-        audio_input, original_text, corrected_text,
-        age_input, native_speaker_input,
-        save_button, save_status, download_button,
-        status_msg,
-    ]
-    # Initialise text in English
-    initial_updates = toggle_language(False)
-    for comp, upd in zip(components, initial_updates):
-        comp.update(**upd)
-    # Language toggle
-    lang_switch.change(toggle_language, inputs=lang_switch, outputs=components)
-    # Auto‑transcribe when audio added
-    audio_input.change(show_processing, inputs=audio_input, outputs=status_msg)
     audio_input.change(
-        transcribe_both_and_clear,
         inputs=audio_input,
-        outputs=[original_text, corrected_text, status_msg],
     )
-    # Save & download actions
     save_button.click(
         store_correction,
         inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
-        outputs=save_status,
     )
     download_button.click(
         prepare_download,
         inputs=[audio_input, original_text, corrected_text],
-        outputs=download_output,
     )
 demo.launch()

 import tempfile
 import uuid
+# LOCAL INITIALIZATION - ONLY USE ON YOUR OWN DEVICE
+'''
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 cred = credentials.Certificate("serviceAccountKey.json")
+'''
 # Deployed Initialization
+firebase_config = json.loads(os.environ.get('firebase_creds'))
 cred = credentials.Certificate(firebase_config)
 firebase_admin.initialize_app(cred, {
 processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
 model = AutoModelForCTC.from_pretrained(MODEL_NAME)
+def transcribe(audio_file):
     try:
+        audio, rate = librosa.load(audio_file, sr=16000)
+        input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.batch_decode(predicted_ids)[0]
         return transcription.replace("[UNK]", "")
     except Exception as e:
+        return f"處理文件錯誤: {e}"
+def transcribe_with_status(audio_file):
+    if audio_file is None:
+        return "", "", "請先上傳音訊 (Please upload audio first)"
+    status = "處理中，請稍候… (Processing, please wait…)"
+    transcription = transcribe(audio_file)
+    return transcription, transcription, "完成！(Done!)"
+def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
     try:
+        audio_metadata = {}
+        audio_file_url = None
+        # If an audio file is provided, upload it to Firebase Storage
         if audio_file and os.path.exists(audio_file):
+            audio, sr = librosa.load(audio_file, sr=44100)
+            duration = librosa.get_duration(y=audio, sr=sr)
+            file_size = os.path.getsize(audio_file)
+            audio_metadata = {'duration': duration, 'file_size': file_size}
+            # Generate a unique identifier for the audio file
+            unique_id = str(uuid.uuid4())
+            destination_path = f"audio/pai/{unique_id}.wav"
+            # Create a blob and upload the file
+            blob = bucket.blob(destination_path)
             blob.upload_from_filename(audio_file)
+            # Generate a signed download URL valid for 1 hour (adjust expiration as needed)
+            audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
+        combined_data = {
+            'transcription_info': {
+                'original_text': original_transcription,
+                'corrected_text': corrected_transcription,
+                'language': 'pai',
             },
+            'audio_data': {
+                'audio_metadata': audio_metadata,
+                'audio_file_url': audio_file_url,
+            },
+            'user_info': {
+                'native_paiwan_speaker': native_speaker,
+                'age': age
+            },
+            'timestamp': datetime.now().isoformat(),
+            'model_name': MODEL_NAME
         }
+        # Save data to a collection for that language
+        db.collection('paiwan_transcriptions').add(combined_data)
+        return "校正保存成功! (Correction saved successfully!)"
     except Exception as e:
+        return f"保存失败: {e} (Error saving correction: {e})"
+def prepare_download(audio_file, original_transcription, corrected_transcription):
+    if audio_file is None:
         return None
+    tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
+    tmp_zip.close()
+    with zipfile.ZipFile(tmp_zip.name, "w") as zf:
         if os.path.exists(audio_file):
             zf.write(audio_file, arcname="audio.wav")
+        orig_txt = "original_transcription.txt"
+        with open(orig_txt, "w", encoding="utf-8") as f:
+            f.write(original_transcription)
+        zf.write(orig_txt, arcname="original_transcription.txt")
+        os.remove(orig_txt)
+        corr_txt = "corrected_transcription.txt"
+        with open(corr_txt, "w", encoding="utf-8") as f:
+            f.write(corrected_transcription)
+        zf.write(corr_txt, arcname="corrected_transcription.txt")
+        os.remove(corr_txt)
+    return tmp_zip.name
+def toggle_language(switch):
+    """Switch UI text between English and Traditional Chinese"""
+    if switch:
+        return (
+            "排灣語自動語音識別逐字稿與修正系統",  # Title
+            "步驟 1：音訊上傳與逐字稿",            # Step 1
+            "步驟 2：審閱與編輯逐字稿",            # Step 2
+            "步驟 3：使用者資訊",                # Step 3
+            "步驟 4：儲存與下載",                # Step 4
+            "音訊輸入", "產生逐字稿",             # Audio label, Transcribe button
+            "原始逐字稿", "更正逐字稿",          # Textboxes
+            "年齡", "母語排灣語使用者?",       # Age, Native speaker?
+            "儲存", "儲存狀態",                 # Save button, Save status
+            "下載 ZIP 檔案",                   # Download button
+            "處理中，請稍候…"                  # Status message default (Chinese)
+        )
+    else:
+        return (
+            "Paiwan ASR Transcription & Correction System",
+            "Step 1: Audio Upload & Transcription",
+            "Step 2: Review & Edit Transcription",
+            "Step 3: User Information",
+            "Step 4: Save & Download",
+            "Audio Input", "Generate Transcript",
+            "Original Transcription", "Corrected Transcription",
+            "Age", "Native Paiwan Speaker?",
+            "Save", "Save Status",
+            "Download ZIP File",
+            "Processing, please wait…"
+        )
 # Interface
 with gr.Blocks() as demo:
     lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
     title = gr.Markdown()
     step1 = gr.Markdown()
+    with gr.Row():
+        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath")
+    status_message = gr.Markdown(visible=False)
+    step2 = gr.Markdown()
+    with gr.Row():
+        transcribe_button = gr.Button()
+    original_text = gr.Textbox(interactive=False, lines=5)
+    corrected_text = gr.Textbox(interactive=True, lines=5)
+    step3 = gr.Markdown()
+    with gr.Row():
+        age_input = gr.Slider(minimum=0, maximum=100, step=1, value=25)
+        native_speaker_input = gr.Checkbox(value=True)
+    step4 = gr.Markdown()
+    with gr.Row():
+        save_button = gr.Button()
+        save_status = gr.Textbox(interactive=False)
+    with gr.Row():
+        download_button = gr.Button()
+        download_output = gr.File()
+    # Initialize UI texts according to default language (English)
+    (title_text, step1_text, step2_text, step3_text, step4_text,
+     audio_label, transcribe_label, orig_label, corr_label,
+     age_label, native_label, save_label, save_status_label,
+     download_label, default_status) = toggle_language(False)
+    title.value = title_text
+    step1.value = step1_text
+    step2.value = step2_text
+    step3.value = step3_text
+    step4.value = step4_text
+    audio_input.label = audio_label
+    transcribe_button.value = transcribe_label
+    original_text.label = orig_label
+    corrected_text.label = corr_label
+    age_input.label = age_label
+    native_speaker_input.label = native_label
+    save_button.value = save_label
+    save_status.label = save_status_label
+    download_button.value = download_label
+    status_message.value = default_status
+    # Language switcher
+    lang_switch.change(
+        toggle_language,
+        inputs=lang_switch,
+        outputs=[title, step1, step2, step3, step4, audio_input, transcribe_button,
+                 original_text, corrected_text, age_input, native_speaker_input,
+                 save_button, save_status, download_button, status_message]
+    )
+    # Automatic transcription on audio upload
     audio_input.change(
+        transcribe_with_status,
+        inputs=audio_input,
+        outputs=[original_text, corrected_text, status_message]
+    )
+    # Manual transcription button (optional)
+    transcribe_button.click(
+        transcribe_with_status,
         inputs=audio_input,
+        outputs=[original_text, corrected_text, status_message]
     )
     save_button.click(
         store_correction,
         inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
+        outputs=save_status
     )
     download_button.click(
         prepare_download,
         inputs=[audio_input, original_text, corrected_text],
+        outputs=download_output
     )
 demo.launch()