File size: 7,590 Bytes
5543320
 
 
 
 
 
 
 
 
2a0048a
a090817
5543320
763f8fd
658ecdc
40d6177
5543320
 
 
 
 
 
 
 
 
 
 
 
40d6177
5543320
 
 
 
 
 
40d6177
5543320
 
2a0048a
5543320
2a0048a
 
5543320
a090817
5543320
2a0048a
 
 
 
 
a090817
 
5543320
 
2a0048a
 
 
b422101
a090817
 
 
 
 
5543320
a090817
40d6177
5543320
40d6177
5543320
 
 
 
40d6177
a090817
 
 
5543320
 
40d6177
5543320
 
 
 
 
40d6177
5543320
 
 
 
 
a090817
5543320
40d6177
2a0048a
40d6177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a0048a
 
40d6177
a090817
b422101
40d6177
b422101
40d6177
 
 
 
 
 
a090817
 
b422101
40d6177
b422101
40d6177
 
 
 
 
 
 
 
 
 
 
 
a090817
b422101
40d6177
b422101
40d6177
 
 
 
 
 
 
 
 
 
 
a090817
b422101
40d6177
a090817
40d6177
 
 
 
 
 
a090817
40d6177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a090817
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import gradio as gr
import torch
import librosa
from transformers import Wav2Vec2Processor, AutoModelForCTC
import zipfile
import os
import firebase_admin
from firebase_admin import credentials, firestore
from datetime import datetime
import json
import tempfile

# Initialize Firebase
firebase_config = json.loads(os.environ.get('firebase_creds'))
cred = credentials.Certificate(firebase_config)  # Your Firebase JSON key file
firebase_admin.initialize_app(cred)
db = firestore.client()

# Load the ASR model and processor
MODEL_NAME = "eleferrand/xlsr53_Amis"
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
model = AutoModelForCTC.from_pretrained(MODEL_NAME)

def transcribe(audio_file):
    try:
        audio, rate = librosa.load(audio_file, sr=16000)
        input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values

        with torch.no_grad():
            logits = model(input_values).logits
        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = processor.batch_decode(predicted_ids)[0]
        return transcription.replace("[UNK]", "")
    except Exception as e:
        return f"处理文件错误: {e}"

def transcribe_both(audio_file):
    start_time = datetime.now()
    transcription = transcribe(audio_file)
    processing_time = (datetime.now() - start_time).total_seconds()
    return transcription, transcription, processing_time

def store_correction(original_transcription, corrected_transcription, audio_file, processing_time, age, native_speaker):
    try:
        audio_metadata = {}
        if audio_file and os.path.exists(audio_file):
            audio, sr = librosa.load(audio_file, sr=16000)
            duration = librosa.get_duration(y=audio, sr=sr)
            file_size = os.path.getsize(audio_file)
            audio_metadata = {'duration': duration, 'file_size': file_size}
        combined_data = {
            'original_text': original_transcription,
            'corrected_text': corrected_transcription,
            'timestamp': datetime.now().isoformat(),
            'processing_time': processing_time,
            'audio_metadata': audio_metadata,
            'audio_url': None,
            'model_name': MODEL_NAME,
            'user_info': {
                'native_amis_speaker': native_speaker,
                'age': age
            }
        }
        db.collection('transcriptions').add(combined_data)
        return "校正保存成功! (Correction saved successfully!)"
    except Exception as e:
        return f"保存失败: {e} (Error saving correction: {e})"

def prepare_download(audio_file, original_transcription, corrected_transcription):
    if audio_file is None:
        return None

    tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
    tmp_zip.close()
    with zipfile.ZipFile(tmp_zip.name, "w") as zf:
        if os.path.exists(audio_file):
            zf.write(audio_file, arcname="audio.wav")
        
        orig_txt = "original_transcription.txt"
        with open(orig_txt, "w", encoding="utf-8") as f:
            f.write(original_transcription)
        zf.write(orig_txt, arcname="original_transcription.txt")
        os.remove(orig_txt)

        corr_txt = "corrected_transcription.txt"
        with open(corr_txt, "w", encoding="utf-8") as f:
            f.write(corrected_transcription)
        zf.write(corr_txt, arcname="corrected_transcription.txt")
        os.remove(corr_txt)
    return tmp_zip.name

# 界面设计
with gr.Blocks(css="""
    .container { 
        max-width: 800px; 
        margin: auto; 
        padding: 20px; 
        font-family: Arial, sans-serif;
    }
    .header { 
        text-align: center; 
        margin-bottom: 30px; 
    }
    .section { 
        margin-bottom: 30px; 
        padding: 15px; 
        border: 1px solid #ddd; 
        border-radius: 8px; 
        background-color: #f9f9f9;
    }
    .section h3 {
        margin-top: 0;
        margin-bottom: 15px;
        text-align: center;
    }
    .button-row {
        display: flex;
        justify-content: center;
        gap: 10px;
        flex-wrap: wrap;
    }
    @media (max-width: 600px) {
      .gradio-row { 
          flex-direction: column; 
      }
    }
""") as demo:
    with gr.Column(elem_classes="container"):
        gr.Markdown("<h1 class='header'>阿美語轉錄與修正系統 (ASR Correction System)</h1>")
        
        with gr.Column(elem_classes="section"):
            gr.Markdown("### 步驟 1:音訊上傳與轉錄(Audio Upload & Transcription)")
            with gr.Row():
                audio_input = gr.Audio(
                    sources=["upload", "microphone"],
                    type="filepath",
                    label="音訊輸入 (Audio Input)"
                )
                transcribe_button = gr.Button("轉錄音訊 (Transcribe Audio)", variant="primary")
            proc_time_state = gr.State()
        
        with gr.Column(elem_classes="section"):
            gr.Markdown("### 步驟 2:審閱與編輯轉錄 (Review & Edit Transcription)")
            with gr.Row():
                original_text = gr.Textbox(
                    label="原始轉錄 (Original Transcription)",
                    interactive=False,
                    lines=5,
                    placeholder="謄本將在此出現... (Transcription will appear here...)"
                )
                corrected_text = gr.Textbox(
                    label="更正轉錄 (Corrected Transcription)",
                    interactive=True,
                    lines=5,
                    placeholder="在此編輯轉錄... (Edit transcription here...)"
                )
        
        with gr.Column(elem_classes="section"):
            gr.Markdown("### 步驟 3:使用者資訊 (User Information)")
            with gr.Row():
                age_input = gr.Slider(
                    minimum=0,
                    maximum=100,
                    step=1,
                    label="年齡 (Age)",
                    value=25
                )
                native_speaker_input = gr.Checkbox(
                    label="以阿美語為母語? (Native Amis Speaker?)",
                    value=True
                )
        
        with gr.Column(elem_classes="section"):
            gr.Markdown("### 步驟 4:儲存與下載 (Save & Download)")
            with gr.Row(elem_classes="button-row"):
                save_button = gr.Button("儲存更正 (Save Correction)", variant="primary")
                save_status = gr.Textbox(
                    label="儲存狀態 (Save Status)",
                    interactive=False,
                    placeholder="狀態訊息會出現在這裡... (Status messages will appear here...)"
                )
            with gr.Row(elem_classes="button-row"):
                download_button = gr.Button("下載 ZIP 檔案 (Download ZIP)")
                download_output = gr.File(label="下載 ZIP 檔案  (Download ZIP)")

        transcribe_button.click(
            fn=transcribe_both,
            inputs=audio_input,
            outputs=[original_text, corrected_text, proc_time_state]
        )
        
        save_button.click(
            fn=store_correction,
            inputs=[original_text, corrected_text, audio_input, proc_time_state, age_input, native_speaker_input],
            outputs=save_status
        )
        
        download_button.click(
            fn=prepare_download,
            inputs=[audio_input, original_text, corrected_text],
            outputs=download_output
        )

demo.launch(share=True)