Spaces:

NLPV
/

BihariVernacular

Sleeping

App Files Files Community

NLPV commited on Apr 9

Commit

9a34dcd

verified ·

1 Parent(s): b589aed

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -25

app.py CHANGED Viewed

@@ -18,33 +18,29 @@ def check_directory(path):
 check_directory(TRANSCRIPTS_FOLDER)
-def transcribe_and_translate(audio_file, selected_language, model_type="base"):
     """
-    Transcribe audio using Whisper and translate it into English if required.
-    :param audio_file: Path to the uploaded audio file
     :param selected_language: Language code for transcription
     :param model_type: Whisper model type (default is 'base')
     :return: Transcription and translation
     """
-    if not audio_file:
-        return "No audio file uploaded."
     try:
         # Load the Whisper model based on user selection
         model = whisper.load_model(model_type, device=DEVICE)
     except Exception as e:
         return f"Failed to load Whisper model ({model_type}): {e}"
-    try:
-        # Transcribe with the user-selected language
-        result = model.transcribe(audio_file, language=selected_language, verbose=False)
-        # Save the transcription with timestamps
-        transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{os.path.basename(audio_file)}_transcript.txt")
-        translated_text = []
-        with open(transcript_file, 'w', encoding='utf-8') as text_file:
             for segment in result['segments']:
                 start_time = segment['start']
                 end_time = segment['end']
@@ -54,23 +50,19 @@ def transcribe_and_translate(audio_file, selected_language, model_type="base"):
                     text_en = GoogleTranslator(source='auto', target='en').translate(text)
                     translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
                     text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
-        # Return the transcription and translation
-        return "\n".join(translated_text) if translated_text else result['text']
-    except Exception as e:
-        return f"Failed to process the audio file: {e}"
 # Define the Gradio interface
 interface = gr.Interface(
-    fn=transcribe_and_translate,
     inputs=[
-        gr.Audio(type="filepath", label="Upload Audio"),
-        gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"),
         gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
     ],
     outputs="text",
-    title="Transcription and Translation"
 )
 if __name__ == '__main__':

 check_directory(TRANSCRIPTS_FOLDER)
+def live_transcribe_and_translate(stream, selected_language, model_type="base"):
     """
+    Transcribe live audio using Whisper and translate it into English if required.
+    :param stream: Stream of live audio data
     :param selected_language: Language code for transcription
     :param model_type: Whisper model type (default is 'base')
     :return: Transcription and translation
     """
     try:
         # Load the Whisper model based on user selection
         model = whisper.load_model(model_type, device=DEVICE)
     except Exception as e:
         return f"Failed to load Whisper model ({model_type}): {e}"
+    # Prepare audio processor
+    audio_processor = whisper.audio.AudioProcessor(model, streaming=True)
+    translated_text = []
+    transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
+    with open(transcript_file, 'w', encoding='utf-8') as text_file:
+        for chunk in stream:
+            result = audio_processor.transcribe(chunk, return_timestamps=True)
             for segment in result['segments']:
                 start_time = segment['start']
                 end_time = segment['end']
                     text_en = GoogleTranslator(source='auto', target='en').translate(text)
                     translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
                     text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
+    return "\n".join(translated_text) if translated_text else "Live transcription completed."
 # Define the Gradio interface
 interface = gr.Interface(
+    fn=live_transcribe_and_translate,
     inputs=[
+        gr.Audio(source="microphone", type="stream", streaming=True, label="Start Recording"),
+        gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
         gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
     ],
     outputs="text",
+    title="Live Transcription and Translation"
 )
 if __name__ == '__main__':