Spaces:

FormosonBankDemos
/

Amis_ASR_transcription

Sleeping

App Files Files Community

semakoc

hunterschep commited on Apr 10

Commit

031ac03

verified ·

1 Parent(s): daefd88

Update storage to be more robust (#10)

Browse files

- Update storage to be more robust (dc1678e598c264723d900b4f722a205ff2939ebf)

Co-authored-by: Hunter S <[email protected]>

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -53,26 +53,34 @@ def transcribe_both(audio_file):
 def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
     try:
         audio_metadata = {}
-        audio_file_url = None
-        # If an audio file is provided, upload it to Firebase Storage
         if audio_file and os.path.exists(audio_file):
             audio, sr = librosa.load(audio_file, sr=44100)
             duration = librosa.get_duration(y=audio, sr=sr)
             file_size = os.path.getsize(audio_file)
             audio_metadata = {'duration': duration, 'file_size': file_size}
-            # Generate a unique identifier for the audio file
-            unique_id = str(uuid.uuid4())
             destination_path = f"audio/{lang}/{unique_id}.wav"
-            # Create a blob and upload the file
             blob = bucket.blob(destination_path)
             blob.upload_from_filename(audio_file)
-            # Generate a signed download URL valid for 1 hour (adjust expiration as needed)
-            audio_file_url = blob.generate_signed_url(expiration=timedelta(hours=1))
         combined_data = {
             'transcription_info': {
                 'original_text': original_transcription,
@@ -81,7 +89,9 @@ def store_correction(original_transcription, corrected_transcription, audio_file
             },
             'audio_data': {
                 'audio_metadata': audio_metadata,
-                'audio_file_url': audio_file_url,
             },
             'user_info': {
                 'native_amis_speaker': native_speaker,
@@ -90,8 +100,11 @@ def store_correction(original_transcription, corrected_transcription, audio_file
             'timestamp': datetime.now().isoformat(),
             'model_name': MODEL_NAME
         }
-        # Save data to a collection for that language
-        db.collection('amis_transcriptions').add(combined_data)
         return "校正保存成功! (Correction saved successfully!)"
     except Exception as e:
         return f"保存失败: {e} (Error saving correction: {e})"

 def store_correction(original_transcription, corrected_transcription, audio_file, age, native_speaker):
     try:
         audio_metadata = {}
+        audio_ref = None  # This will store our storage reference
+        # Generate a unique identifier that will be shared between storage and Firestore
+        unique_id = str(uuid.uuid4())
         if audio_file and os.path.exists(audio_file):
+            # Process audio metadata
             audio, sr = librosa.load(audio_file, sr=44100)
             duration = librosa.get_duration(y=audio, sr=sr)
             file_size = os.path.getsize(audio_file)
             audio_metadata = {'duration': duration, 'file_size': file_size}
+            # Create storage path using UUID
             destination_path = f"audio/{lang}/{unique_id}.wav"
+            # Upload to Firebase Storage
             blob = bucket.blob(destination_path)
             blob.upload_from_filename(audio_file)
+            # Get permanent reference to the file (not temporary URL)
+            audio_ref = destination_path
+            # Optional: Store both the permanent path and temporary URL
+            audio_file_url = blob.generate_signed_url(timedelta(hours=1))
+        else:
+            audio_file_url = None
+        # Create document data with explicit audio reference
         combined_data = {
             'transcription_info': {
                 'original_text': original_transcription,
             },
             'audio_data': {
                 'audio_metadata': audio_metadata,
+                'storage_path': audio_ref,  # Permanent reference
+                'audio_url': audio_file_url,  # Temporary URL
+                'file_id': unique_id         # Explicit unique ID
             },
             'user_info': {
                 'native_amis_speaker': native_speaker,
             'timestamp': datetime.now().isoformat(),
             'model_name': MODEL_NAME
         }
+        # Create document with UUID as ID instead of auto-generated ID
+        doc_ref = db.collection('amis_transcriptions').document(unique_id)
+        doc_ref.set(combined_data)
         return "校正保存成功! (Correction saved successfully!)"
     except Exception as e:
         return f"保存失败: {e} (Error saving correction: {e})"