import gradio as gr import torch import librosa from transformers import Wav2Vec2Processor, AutoModelForCTC import zipfile import os import firebase_admin from firebase_admin import credentials, firestore from datetime import datetime # Initialize Firebase cred = credentials.Certificate('firebase_credentials.json') # Your Firebase JSON key file firebase_admin.initialize_app(cred) db = firestore.client() # Load the ASR model and processor MODEL_NAME = "eleferrand/xlsr53_Amis" processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) model = AutoModelForCTC.from_pretrained(MODEL_NAME) def transcribe(audio_file): """ Transcribes the audio file using the loaded ASR model. Returns the transcription string. """ try: # Load and resample the audio to 16kHz audio, rate = librosa.load(audio_file, sr=16000) # Prepare the input tensor for the model input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values # Get model predictions (logits) and decode to text with torch.no_grad(): logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] return transcription.replace("[UNK]", "") except Exception as e: return f"Error processing file: {e}" def transcribe_both(audio_file): """ Calls the transcribe function and returns the transcription for both the original (read-only) and the corrected (editable) textboxes. """ transcription = transcribe(audio_file) return transcription, transcription def store_correction(original_transcription, corrected_transcription): """ Stores the original and corrected transcription in Firestore. """ try: correction_data = { 'original_text': original_transcription, 'corrected_text': corrected_transcription, 'timestamp': datetime.now().isoformat() } db.collection('transcription_corrections').add(correction_data) return "✅ Correction saved successfully!" except Exception as e: return f"⚠️ Error saving correction: {e}" def prepare_download(audio_file, original_transcription, corrected_transcription): """ Prepares a ZIP file containing: - The uploaded audio file (saved as audio.wav) - A text file with the original transcription - A text file with the corrected transcription Returns the path to the ZIP file. """ if audio_file is None: return None zip_filename = "results.zip" with zipfile.ZipFile(zip_filename, "w") as zf: # Add the audio file (saved as audio.wav in the zip) if os.path.exists(audio_file): zf.write(audio_file, arcname="audio.wav") else: print("Audio file not found:", audio_file) # Create and add the original transcription file orig_txt = "original_transcription.txt" with open(orig_txt, "w", encoding="utf-8") as f: f.write(original_transcription) zf.write(orig_txt, arcname="original_transcription.txt") os.remove(orig_txt) # Create and add the corrected transcription file corr_txt = "corrected_transcription.txt" with open(corr_txt, "w", encoding="utf-8") as f: f.write(corrected_transcription) zf.write(corr_txt, arcname="corrected_transcription.txt") os.remove(corr_txt) return zip_filename # Build the Gradio Blocks interface with gr.Blocks() as demo: gr.Markdown("# ASR Demo with Editable Transcription, Firestore Storage, and Download") with gr.Row(): audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio") transcribe_button = gr.Button("Transcribe Audio") with gr.Row(): # The original transcription is displayed (non-editable) original_text = gr.Textbox(label="Original Transcription", interactive=False) # The corrected transcription is pre-filled with the original, but remains editable. corrected_text = gr.Textbox(label="Corrected Transcription", interactive=True) save_button = gr.Button("Save Correction to Database") save_status = gr.Textbox(label="Save Status", interactive=False) download_button = gr.Button("Download Results (ZIP)") download_output = gr.File(label="Download ZIP") # When the transcribe button is clicked, update both textboxes with the transcription. transcribe_button.click( fn=transcribe_both, inputs=audio_input, outputs=[original_text, corrected_text] ) # When the "Save Correction" button is clicked, store the corrected transcription in Firestore. save_button.click( fn=store_correction, inputs=[original_text, corrected_text], outputs=save_status ) # When the download button is clicked, package the audio file and both transcriptions into a zip. download_button.click( fn=prepare_download, inputs=[audio_input, original_text, corrected_text], outputs=download_output ) # Launch the demo demo.launch(share=True)