Spaces:

NLPV
/

BihariVernacular

Sleeping

App Files Files Community

NLPV commited on Dec 16, 2024

Commit

46e7915

verified ·

1 Parent(s): 9d2f7b5

Update BihariVernacular.py

Browse files

Files changed (1) hide show

BihariVernacular.py +102 -102

BihariVernacular.py CHANGED Viewed

@@ -1,102 +1,102 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Nov 22 14:30:42 2024
-@author: CentERdata
-"""
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Dec  9 16:43:31 2024
-@author: Pradeep Kumar
-"""
-import whisper
-import torch
-import os
-import gradio as gr
-from deep_translator import GoogleTranslator
-# Check if NVIDIA GPU is available
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# Directories for transcripts
-BASE_DIR = os.getcwd()
-TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
-# Ensure transcripts directory exists
-def check_directory(path):
-    if not os.path.exists(path):
-        os.makedirs(path)
-check_directory(TRANSCRIPTS_FOLDER)
-def transcribe_and_translate(audio_file, selected_language, model_type="base"):
-    """
-    Transcribe audio using Whisper and translate it into English if required.
-    :param audio_file: Path to the uploaded audio file
-    :param selected_language: Language code for transcription
-    :param model_type: Whisper model type (default is 'base')
-    :return: Transcription and translation
-    """
-    temp_audio_path = os.path.join(BASE_DIR, audio_file.name)
-    # Save the uploaded file to a temporary location
-    with open(temp_audio_path, "wb") as f:
-        f.write(audio_file.read())
-    try:
-        # Load the Whisper model based on user selection
-        model = whisper.load_model(model_type, device=DEVICE)
-    except Exception as e:
-        return f"Failed to load Whisper model ({model_type}): {e}"
-    try:
-        # Transcribe with the user-selected language
-        if selected_language:
-            result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
-        else:
-            return "Language selection is required."
-        # Save the transcription with timestamps
-        transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")
-        translated_text = []
-        with open(transcript_file, 'w', encoding='utf-8') as text_file:
-            for segment in result['segments']:
-                start_time = segment['start']
-                end_time = segment['end']
-                text = segment['text']
-                text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
-                if selected_language in ['mai', 'mag', 'bho']:
-                    text_en = GoogleTranslator(source='auto', target='en').translate(text)
-                    translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
-                    text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
-        # Return the transcription and translation
-        return "\n".join(translated_text) if translated_text else result['text']
-    except Exception as e:
-        return f"Failed to process the audio file: {e}"
-    finally:
-        # Clean up temporary audio file
-        if os.path.exists(temp_audio_path):
-            os.remove(temp_audio_path)
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=transcribe_and_translate,
-    inputs=[
-        gr.Audio(source="upload", type="file", label="Upload Audio"),
-        gr.Dropdown(label="Select Language", choices=["mai", "mag", "bho", "en"], value="mai"),
-        gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
-    ],
-    outputs="text",
-    title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
-)
-if __name__ == '__main__':
-    # Launch the Gradio interface
-    interface.launch()

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Nov 22 14:30:42 2024
+@author: Pradeep Kumar
+"""
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec  9 16:43:31 2024
+@author: Pradeep Kumar
+"""
+import whisper
+import torch
+import os
+import gradio as gr
+from deep_translator import GoogleTranslator
+# Check if NVIDIA GPU is available
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Directories for transcripts
+BASE_DIR = os.getcwd()
+TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
+# Ensure transcripts directory exists
+def check_directory(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+check_directory(TRANSCRIPTS_FOLDER)
+def transcribe_and_translate(audio_file, selected_language, model_type="base"):
+    """
+    Transcribe audio using Whisper and translate it into English if required.
+    :param audio_file: Path to the uploaded audio file
+    :param selected_language: Language code for transcription
+    :param model_type: Whisper model type (default is 'base')
+    :return: Transcription and translation
+    """
+    temp_audio_path = os.path.join(BASE_DIR, audio_file.name)
+    # Save the uploaded file to a temporary location
+    with open(temp_audio_path, "wb") as f:
+        f.write(audio_file.read())
+    try:
+        # Load the Whisper model based on user selection
+        model = whisper.load_model(model_type, device=DEVICE)
+    except Exception as e:
+        return f"Failed to load Whisper model ({model_type}): {e}"
+    try:
+        # Transcribe with the user-selected language
+        if selected_language:
+            result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
+        else:
+            return "Language selection is required."
+        # Save the transcription with timestamps
+        transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")
+        translated_text = []
+        with open(transcript_file, 'w', encoding='utf-8') as text_file:
+            for segment in result['segments']:
+                start_time = segment['start']
+                end_time = segment['end']
+                text = segment['text']
+                text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
+                if selected_language in ['mai', 'mag', 'bho']:
+                    text_en = GoogleTranslator(source='auto', target='en').translate(text)
+                    translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
+                    text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
+        # Return the transcription and translation
+        return "\n".join(translated_text) if translated_text else result['text']
+    except Exception as e:
+        return f"Failed to process the audio file: {e}"
+    finally:
+        # Clean up temporary audio file
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=transcribe_and_translate,
+    inputs=[
+        gr.Audio(source="upload", type="file", label="Upload Audio"),
+        gr.Dropdown(label="Select Language", choices=["mai", "mag", "bho", "en"], value="mai"),
+        gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
+    ],
+    outputs="text",
+    title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
+)
+if __name__ == '__main__':
+    # Launch the Gradio interface
+    interface.launch()