semakoc commited on
Commit
0f2e342
·
verified ·
1 Parent(s): 4b7868e

Create app

Browse files
Files changed (1) hide show
  1. app +183 -0
app ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import librosa
4
+ from transformers import Wav2Vec2Processor, AutoModelForCTC
5
+ import zipfile
6
+ import os
7
+ import firebase_admin
8
+ from firebase_admin import credentials, firestore
9
+ from datetime import datetime
10
+ import json
11
+ import tempfile
12
+
13
+ # Initialize Firebase
14
+ firebase_config = json.loads(os.environ.get('firebase_creds'))
15
+ cred = credentials.Certificate(firebase_config)
16
+ firebase_admin.initialize_app(cred)
17
+ db = firestore.client()
18
+
19
+ # Load the ASR model and processor
20
+ MODEL_NAME = "eleferrand/xlsr53_Amis"
21
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
22
+ model = AutoModelForCTC.from_pretrained(MODEL_NAME)
23
+
24
+ def transcribe(audio_file):
25
+ try:
26
+ audio, rate = librosa.load(audio_file, sr=16000)
27
+ input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
28
+
29
+ with torch.no_grad():
30
+ logits = model(input_values).logits
31
+ predicted_ids = torch.argmax(logits, dim=-1)
32
+ transcription = processor.batch_decode(predicted_ids)[0]
33
+ return transcription.replace("[UNK]", "")
34
+ except Exception as e:
35
+ return f"處理文件錯誤: {e}"
36
+
37
+ def transcribe_both(audio_file):
38
+ start_time = datetime.now()
39
+ transcription = transcribe(audio_file)
40
+ processing_time = (datetime.now() - start_time).total_seconds()
41
+ return transcription, transcription, processing_time
42
+
43
+ def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
44
+ try:
45
+ audio_metadata = {}
46
+ if audio_file and os.path.exists(audio_file):
47
+ audio, sr = librosa.load(audio_file, sr=16000)
48
+ duration = librosa.get_duration(y=audio, sr=sr)
49
+ file_size = os.path.getsize(audio_file)
50
+ audio_metadata = {'duration': duration, 'file_size': file_size}
51
+
52
+ combined_data = {
53
+ 'original_text': original_transcription,
54
+ 'corrected_text': corrected_transcription,
55
+ 'timestamp': datetime.now().isoformat(),
56
+ 'audio_metadata': audio_metadata,
57
+ 'model_name': MODEL_NAME,
58
+ 'user_info': {
59
+ 'native_amis_speaker': native_speaker,
60
+ 'age': age
61
+ }
62
+ }
63
+ db.collection('transcriptions').add(combined_data)
64
+ return "校正保存成功! (Correction saved successfully!)"
65
+ except Exception as e:
66
+ return f"保存失败: {e} (Error saving correction: {e})"
67
+
68
+ def prepare_download(audio_file, original_transcription, corrected_transcription):
69
+ if audio_file is None:
70
+ return None
71
+
72
+ tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
73
+ tmp_zip.close()
74
+ with zipfile.ZipFile(tmp_zip.name, "w") as zf:
75
+ if os.path.exists(audio_file):
76
+ zf.write(audio_file, arcname="audio.wav")
77
+
78
+ orig_txt = "original_transcription.txt"
79
+ with open(orig_txt, "w", encoding="utf-8") as f:
80
+ f.write(original_transcription)
81
+ zf.write(orig_txt, arcname="original_transcription.txt")
82
+ os.remove(orig_txt)
83
+
84
+ corr_txt = "corrected_transcription.txt"
85
+ with open(corr_txt, "w", encoding="utf-8") as f:
86
+ f.write(corrected_transcription)
87
+ zf.write(corr_txt, arcname="corrected_transcription.txt")
88
+ os.remove(corr_txt)
89
+ return tmp_zip.name
90
+
91
+ def toggle_language(switch):
92
+ """Switch UI text between English and Traditional Chinese"""
93
+ if switch:
94
+ return (
95
+ "阿美語轉錄與修正系統",
96
+ "步驟 1:音訊上傳與轉錄",
97
+ "步驟 2:審閱與編輯轉錄",
98
+ "步驟 3:使用者資訊",
99
+ "步驟 4:儲存與下載",
100
+ "音訊輸入", "轉錄音訊",
101
+ "原始轉錄", "更正轉錄",
102
+ "年齡", "以阿美語為母語?",
103
+ "儲存更正", "儲存狀態",
104
+ "下載 ZIP 檔案"
105
+ )
106
+ else:
107
+ return (
108
+ "Amis ASR Transcription & Correction System",
109
+ "Step 1: Audio Upload & Transcription",
110
+ "Step 2: Review & Edit Transcription",
111
+ "Step 3: User Information",
112
+ "Step 4: Save & Download",
113
+ "Audio Input", "Transcribe Audio",
114
+ "Original Transcription", "Corrected Transcription",
115
+ "Age", "Native Amis Speaker?",
116
+ "Save Correction", "Save Status",
117
+ "Download ZIP File"
118
+ )
119
+
120
+ # Interface
121
+ # Interface
122
+ with gr.Blocks() as demo:
123
+ lang_switch = gr.Checkbox(label="切換到繁體中文 (Switch to Traditional Chinese)")
124
+
125
+ title = gr.Markdown("Amis ASR Transcription & Correction System")
126
+ step1 = gr.Markdown("Step 1: Audio Upload & Transcription")
127
+
128
+ # Audio input and playback (Original section)
129
+ with gr.Row():
130
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input")
131
+
132
+ step2 = gr.Markdown("Step 2: Review & Edit Transcription")
133
+ # Transcribe button below the audio input (Added this section to place the button below the playback)
134
+ with gr.Row(): # Added this Row to position the button below the audio input
135
+ transcribe_button = gr.Button("Transcribe Audio")
136
+
137
+ original_text = gr.Textbox(label="Original Transcription", interactive=False, lines=5)
138
+ corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True, lines=5)
139
+
140
+ step3 = gr.Markdown("Step 3: User Information")
141
+
142
+ with gr.Row():
143
+ age_input = gr.Slider(minimum=0, maximum=100, step=1, label="Age", value=25)
144
+ native_speaker_input = gr.Checkbox(label="Native Amis Speaker?", value=True)
145
+
146
+ step4 = gr.Markdown("Step 4: Save & Download")
147
+
148
+ with gr.Row():
149
+ save_button = gr.Button("Save Correction")
150
+ save_status = gr.Textbox(label="Save Status", interactive=False)
151
+
152
+ with gr.Row():
153
+ download_button = gr.Button("Download ZIP File")
154
+ download_output = gr.File()
155
+
156
+ # Toggle language dynamically
157
+ lang_switch.change(
158
+ toggle_language,
159
+ inputs=lang_switch,
160
+ outputs=[title, step1, step2, step3, step4, audio_input, transcribe_button,
161
+ original_text, corrected_text, age_input, native_speaker_input,
162
+ save_button, save_status, download_button]
163
+ )
164
+
165
+ transcribe_button.click(
166
+ transcribe_both,
167
+ inputs=audio_input,
168
+ outputs=[original_text, corrected_text]
169
+ )
170
+
171
+ save_button.click(
172
+ store_correction,
173
+ inputs=[original_text, corrected_text, audio_input, age_input, native_speaker_input],
174
+ outputs=save_status
175
+ )
176
+
177
+ download_button.click(
178
+ prepare_download,
179
+ inputs=[audio_input, original_text, corrected_text],
180
+ outputs=download_output
181
+ )
182
+
183
+ demo.launch()