Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import librosa | |
from transformers import Wav2Vec2Processor, AutoModelForCTC | |
import zipfile | |
import os | |
import firebase_admin | |
from firebase_admin import credentials, firestore | |
from datetime import datetime | |
# Initialize Firebase | |
cred = credentials.Certificate('firebase_credentials.json') # Your Firebase JSON key file | |
firebase_admin.initialize_app(cred) | |
db = firestore.client() | |
# Load the ASR model and processor | |
MODEL_NAME = "eleferrand/xlsr53_Amis" | |
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) | |
model = AutoModelForCTC.from_pretrained(MODEL_NAME) | |
def transcribe(audio_file): | |
""" | |
Transcribes the audio file using the loaded ASR model. | |
Returns the transcription string. | |
""" | |
try: | |
# Load and resample the audio to 16kHz | |
audio, rate = librosa.load(audio_file, sr=16000) | |
# Prepare the input tensor for the model | |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values | |
# Get model predictions (logits) and decode to text | |
with torch.no_grad(): | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predicted_ids)[0] | |
return transcription.replace("[UNK]", "") | |
except Exception as e: | |
return f"Error processing file: {e}" | |
def transcribe_both(audio_file): | |
""" | |
Calls the transcribe function and returns the transcription | |
for both the original (read-only) and the corrected (editable) textboxes. | |
""" | |
transcription = transcribe(audio_file) | |
return transcription, transcription | |
def store_correction(original_transcription, corrected_transcription): | |
""" | |
Stores the original and corrected transcription in Firestore. | |
""" | |
try: | |
correction_data = { | |
'original_text': original_transcription, | |
'corrected_text': corrected_transcription, | |
'timestamp': datetime.now().isoformat() | |
} | |
db.collection('transcription_corrections').add(correction_data) | |
return "✅ Correction saved successfully!" | |
except Exception as e: | |
return f"⚠️ Error saving correction: {e}" | |
def prepare_download(audio_file, original_transcription, corrected_transcription): | |
""" | |
Prepares a ZIP file containing: | |
- The uploaded audio file (saved as audio.wav) | |
- A text file with the original transcription | |
- A text file with the corrected transcription | |
Returns the path to the ZIP file. | |
""" | |
if audio_file is None: | |
return None | |
zip_filename = "results.zip" | |
with zipfile.ZipFile(zip_filename, "w") as zf: | |
# Add the audio file (saved as audio.wav in the zip) | |
if os.path.exists(audio_file): | |
zf.write(audio_file, arcname="audio.wav") | |
else: | |
print("Audio file not found:", audio_file) | |
# Create and add the original transcription file | |
orig_txt = "original_transcription.txt" | |
with open(orig_txt, "w", encoding="utf-8") as f: | |
f.write(original_transcription) | |
zf.write(orig_txt, arcname="original_transcription.txt") | |
os.remove(orig_txt) | |
# Create and add the corrected transcription file | |
corr_txt = "corrected_transcription.txt" | |
with open(corr_txt, "w", encoding="utf-8") as f: | |
f.write(corrected_transcription) | |
zf.write(corr_txt, arcname="corrected_transcription.txt") | |
os.remove(corr_txt) | |
return zip_filename | |
# Build the Gradio Blocks interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# ASR Demo with Editable Transcription, Firestore Storage, and Download") | |
with gr.Row(): | |
audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio") | |
transcribe_button = gr.Button("Transcribe Audio") | |
with gr.Row(): | |
# The original transcription is displayed (non-editable) | |
original_text = gr.Textbox(label="Original Transcription", interactive=False) | |
# The corrected transcription is pre-filled with the original, but remains editable. | |
corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True) | |
save_button = gr.Button("Save Correction to Database") | |
save_status = gr.Textbox(label="Save Status", interactive=False) | |
download_button = gr.Button("Download Results (ZIP)") | |
download_output = gr.File(label="Download ZIP") | |
# When the transcribe button is clicked, update both textboxes with the transcription. | |
transcribe_button.click( | |
fn=transcribe_both, | |
inputs=audio_input, | |
outputs=[original_text, corrected_text] | |
) | |
# When the "Save Correction" button is clicked, store the corrected transcription in Firestore. | |
save_button.click( | |
fn=store_correction, | |
inputs=[original_text, corrected_text], | |
outputs=save_status | |
) | |
# When the download button is clicked, package the audio file and both transcriptions into a zip. | |
download_button.click( | |
fn=prepare_download, | |
inputs=[audio_input, original_text, corrected_text], | |
outputs=download_output | |
) | |
# Launch the demo | |
demo.launch(share=True) |