NLPV commited on
Commit
46e7915
·
verified ·
1 Parent(s): 9d2f7b5

Update BihariVernacular.py

Browse files
Files changed (1) hide show
  1. BihariVernacular.py +102 -102
BihariVernacular.py CHANGED
@@ -1,102 +1,102 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Fri Nov 22 14:30:42 2024
4
-
5
- @author: CentERdata
6
- """
7
-
8
- # -*- coding: utf-8 -*-
9
- """
10
- Created on Mon Dec 9 16:43:31 2024
11
-
12
- @author: Pradeep Kumar
13
- """
14
- import whisper
15
- import torch
16
- import os
17
- import gradio as gr
18
- from deep_translator import GoogleTranslator
19
-
20
- # Check if NVIDIA GPU is available
21
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
-
23
- # Directories for transcripts
24
- BASE_DIR = os.getcwd()
25
- TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
26
-
27
- # Ensure transcripts directory exists
28
- def check_directory(path):
29
- if not os.path.exists(path):
30
- os.makedirs(path)
31
-
32
- check_directory(TRANSCRIPTS_FOLDER)
33
-
34
- def transcribe_and_translate(audio_file, selected_language, model_type="base"):
35
- """
36
- Transcribe audio using Whisper and translate it into English if required.
37
-
38
- :param audio_file: Path to the uploaded audio file
39
- :param selected_language: Language code for transcription
40
- :param model_type: Whisper model type (default is 'base')
41
- :return: Transcription and translation
42
- """
43
- temp_audio_path = os.path.join(BASE_DIR, audio_file.name)
44
-
45
- # Save the uploaded file to a temporary location
46
- with open(temp_audio_path, "wb") as f:
47
- f.write(audio_file.read())
48
-
49
- try:
50
- # Load the Whisper model based on user selection
51
- model = whisper.load_model(model_type, device=DEVICE)
52
- except Exception as e:
53
- return f"Failed to load Whisper model ({model_type}): {e}"
54
-
55
- try:
56
- # Transcribe with the user-selected language
57
- if selected_language:
58
- result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
59
- else:
60
- return "Language selection is required."
61
-
62
- # Save the transcription with timestamps
63
- transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")
64
-
65
- translated_text = []
66
- with open(transcript_file, 'w', encoding='utf-8') as text_file:
67
- for segment in result['segments']:
68
- start_time = segment['start']
69
- end_time = segment['end']
70
- text = segment['text']
71
- text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
72
- if selected_language in ['mai', 'mag', 'bho']:
73
- text_en = GoogleTranslator(source='auto', target='en').translate(text)
74
- translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
75
- text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
76
-
77
- # Return the transcription and translation
78
- return "\n".join(translated_text) if translated_text else result['text']
79
-
80
- except Exception as e:
81
- return f"Failed to process the audio file: {e}"
82
-
83
- finally:
84
- # Clean up temporary audio file
85
- if os.path.exists(temp_audio_path):
86
- os.remove(temp_audio_path)
87
-
88
- # Define the Gradio interface
89
- interface = gr.Interface(
90
- fn=transcribe_and_translate,
91
- inputs=[
92
- gr.Audio(source="upload", type="file", label="Upload Audio"),
93
- gr.Dropdown(label="Select Language", choices=["mai", "mag", "bho", "en"], value="mai"),
94
- gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
95
- ],
96
- outputs="text",
97
- title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
98
- )
99
-
100
- if __name__ == '__main__':
101
- # Launch the Gradio interface
102
- interface.launch()
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Nov 22 14:30:42 2024
4
+
5
+ @author: Pradeep Kumar
6
+ """
7
+
8
+ # -*- coding: utf-8 -*-
9
+ """
10
+ Created on Mon Dec 9 16:43:31 2024
11
+
12
+ @author: Pradeep Kumar
13
+ """
14
+ import whisper
15
+ import torch
16
+ import os
17
+ import gradio as gr
18
+ from deep_translator import GoogleTranslator
19
+
20
+ # Check if NVIDIA GPU is available
21
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
+
23
+ # Directories for transcripts
24
+ BASE_DIR = os.getcwd()
25
+ TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
26
+
27
+ # Ensure transcripts directory exists
28
+ def check_directory(path):
29
+ if not os.path.exists(path):
30
+ os.makedirs(path)
31
+
32
+ check_directory(TRANSCRIPTS_FOLDER)
33
+
34
+ def transcribe_and_translate(audio_file, selected_language, model_type="base"):
35
+ """
36
+ Transcribe audio using Whisper and translate it into English if required.
37
+
38
+ :param audio_file: Path to the uploaded audio file
39
+ :param selected_language: Language code for transcription
40
+ :param model_type: Whisper model type (default is 'base')
41
+ :return: Transcription and translation
42
+ """
43
+ temp_audio_path = os.path.join(BASE_DIR, audio_file.name)
44
+
45
+ # Save the uploaded file to a temporary location
46
+ with open(temp_audio_path, "wb") as f:
47
+ f.write(audio_file.read())
48
+
49
+ try:
50
+ # Load the Whisper model based on user selection
51
+ model = whisper.load_model(model_type, device=DEVICE)
52
+ except Exception as e:
53
+ return f"Failed to load Whisper model ({model_type}): {e}"
54
+
55
+ try:
56
+ # Transcribe with the user-selected language
57
+ if selected_language:
58
+ result = model.transcribe(temp_audio_path, language=selected_language, verbose=False)
59
+ else:
60
+ return "Language selection is required."
61
+
62
+ # Save the transcription with timestamps
63
+ transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.name}_transcript.txt")
64
+
65
+ translated_text = []
66
+ with open(transcript_file, 'w', encoding='utf-8') as text_file:
67
+ for segment in result['segments']:
68
+ start_time = segment['start']
69
+ end_time = segment['end']
70
+ text = segment['text']
71
+ text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
72
+ if selected_language in ['mai', 'mag', 'bho']:
73
+ text_en = GoogleTranslator(source='auto', target='en').translate(text)
74
+ translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
75
+ text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
76
+
77
+ # Return the transcription and translation
78
+ return "\n".join(translated_text) if translated_text else result['text']
79
+
80
+ except Exception as e:
81
+ return f"Failed to process the audio file: {e}"
82
+
83
+ finally:
84
+ # Clean up temporary audio file
85
+ if os.path.exists(temp_audio_path):
86
+ os.remove(temp_audio_path)
87
+
88
+ # Define the Gradio interface
89
+ interface = gr.Interface(
90
+ fn=transcribe_and_translate,
91
+ inputs=[
92
+ gr.Audio(source="upload", type="file", label="Upload Audio"),
93
+ gr.Dropdown(label="Select Language", choices=["mai", "mag", "bho", "en"], value="mai"),
94
+ gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
95
+ ],
96
+ outputs="text",
97
+ title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
98
+ )
99
+
100
+ if __name__ == '__main__':
101
+ # Launch the Gradio interface
102
+ interface.launch()