NLPV commited on
Commit
34a09fd
·
verified ·
1 Parent(s): 537e080

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -92
app.py CHANGED
@@ -1,56 +1,12 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Mon Dec 9 16:43:31 2024
4
-
5
- @author: Pradeep Kumar
6
- """
7
- import whisper
8
  import torch
9
  import os
10
- from flask import Flask, request, abort, jsonify, render_template
11
  from deep_translator import GoogleTranslator
12
-
13
-
14
- #%%
15
-
16
- import subprocess
17
-
18
- # List of packages to check versions for
19
- packages = ["whisper", "torch", "os", "flask", "deep-translator"]
20
-
21
- # Dictionary to store versions
22
- package_versions = {}
23
-
24
- for package in packages:
25
- try:
26
- # Run pip show to get version info
27
- result = subprocess.run(
28
- ["pip", "show", package],
29
- stdout=subprocess.PIPE,
30
- stderr=subprocess.PIPE,
31
- text=True
32
- )
33
- if result.returncode == 0:
34
- # Parse the version from the output
35
- for line in result.stdout.splitlines():
36
- if line.startswith("Version:"):
37
- package_versions[package] = line.split(":", 1)[1].strip()
38
- else:
39
- package_versions[package] = "Not Installed"
40
- except Exception as e:
41
- package_versions[package] = f"Error: {str(e)}"
42
-
43
- package_versions
44
-
45
-
46
- #%%
47
 
48
  # Check if NVIDIA GPU is available
49
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
50
 
51
- # Initialize Flask app
52
- app = Flask(__name__)
53
-
54
  # Directories for transcripts
55
  BASE_DIR = os.getcwd()
56
  TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
@@ -62,74 +18,61 @@ def check_directory(path):
62
 
63
  check_directory(TRANSCRIPTS_FOLDER)
64
 
65
- @app.route('/')
66
- def upload_page():
67
- """
68
- Render the upload page for audio file submission.
69
- """
70
- return render_template('upload.html')
71
-
72
- @app.route('/process_audio', methods=['POST'])
73
- def process_audio():
74
  """
75
- Process audio directly from the destination using Whisper.
 
 
 
 
 
76
  """
77
- if 'audio_file' not in request.files:
78
- return abort(400, "No file part in the request.")
79
-
80
- audio_file = request.files['audio_file']
81
- selected_language = request.form.get('language', None)
82
- model_type = request.form.get('model_type', "base")
83
-
84
- if not audio_file or audio_file.filename == '':
85
- return abort(400, "No file selected for upload.")
86
-
87
- # Save the uploaded file to a temporary location
88
- temp_audio_path = os.path.join(BASE_DIR, audio_file.filename)
89
- audio_file.save(temp_audio_path)
90
 
91
  try:
92
  # Load the Whisper model based on user selection
93
  model = whisper.load_model(model_type, device=DEVICE)
94
  except Exception as e:
95
- return jsonify({"error": f"Failed to load Whisper model ({model_type}): {e}"}), 500
96
 
97
  try:
98
  # Transcribe with the user-selected language
99
- if selected_language:
100
- result = model.transcribe(temp_audio_path,fp16=False, language=selected_language, verbose=False)
101
- else:
102
- return abort(400, "Language selection is required.")
103
 
104
  # Save the transcription with timestamps
105
- transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{audio_file.filename}_transcript.txt")
106
-
 
107
  with open(transcript_file, 'w', encoding='utf-8') as text_file:
108
  for segment in result['segments']:
109
  start_time = segment['start']
110
  end_time = segment['end']
111
  text = segment['text']
112
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
113
- if selected_language == 'nl':
114
  text_en = GoogleTranslator(source='auto', target='en').translate(text)
 
115
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
116
 
117
- # Return the transcription metadata
118
- return jsonify({
119
- "message": "Transcription successful!",
120
- "transcript_path": transcript_file,
121
- "transcription_preview": result['text']
122
- })
123
 
124
  except Exception as e:
125
- return jsonify({"error": f"Failed to process the audio file: {e}"}), 500
126
-
127
- finally:
128
- # Clean up temporary audio file
129
- if os.path.exists(temp_audio_path):
130
- os.remove(temp_audio_path)
131
-
 
 
 
 
 
 
132
 
133
  if __name__ == '__main__':
134
- # Run the Flask application
135
- app.run(debug=True)
 
 
 
 
 
 
 
 
1
  import torch
2
  import os
3
+ import gradio as gr
4
  from deep_translator import GoogleTranslator
5
+ import whisper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Check if NVIDIA GPU is available
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
 
 
 
10
  # Directories for transcripts
11
  BASE_DIR = os.getcwd()
12
  TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')
 
18
 
19
  check_directory(TRANSCRIPTS_FOLDER)
20
 
21
+ def transcribe_and_translate(audio_file, selected_language, model_type="base"):
 
 
 
 
 
 
 
 
22
  """
23
+ Transcribe audio using Whisper and translate it into English if required.
24
+
25
+ :param audio_file: Path to the uploaded audio file
26
+ :param selected_language: Language code for transcription
27
+ :param model_type: Whisper model type (default is 'base')
28
+ :return: Transcription and translation
29
  """
30
+ if not audio_file:
31
+ return "No audio file uploaded."
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  try:
34
  # Load the Whisper model based on user selection
35
  model = whisper.load_model(model_type, device=DEVICE)
36
  except Exception as e:
37
+ return f"Failed to load Whisper model ({model_type}): {e}"
38
 
39
  try:
40
  # Transcribe with the user-selected language
41
+ result = model.transcribe(audio_file, language=selected_language, verbose=False)
 
 
 
42
 
43
  # Save the transcription with timestamps
44
+ transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{os.path.basename(audio_file)}_transcript.txt")
45
+
46
+ translated_text = []
47
  with open(transcript_file, 'w', encoding='utf-8') as text_file:
48
  for segment in result['segments']:
49
  start_time = segment['start']
50
  end_time = segment['end']
51
  text = segment['text']
52
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
53
+ if selected_language in ['bn', 'mag', 'bho']:
54
  text_en = GoogleTranslator(source='auto', target='en').translate(text)
55
+ translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
56
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
57
 
58
+ # Return the transcription and translation
59
+ return "\n".join(translated_text) if translated_text else result['text']
 
 
 
 
60
 
61
  except Exception as e:
62
+ return f"Failed to process the audio file: {e}"
63
+
64
+ # Define the Gradio interface
65
+ interface = gr.Interface(
66
+ fn=transcribe_and_translate,
67
+ inputs=[
68
+ gr.Audio(type="filepath", label="Upload Audio"),
69
+ gr.Dropdown(label="Select Language", choices=["bn", "mag", "bho", "en"], value="mai"),
70
+ gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
71
+ ],
72
+ outputs="text",
73
+ title="Maithili, Maghi, and Bhojpuri Transcription and Translation"
74
+ )
75
 
76
  if __name__ == '__main__':
77
+ # Launch the Gradio interface
78
+ interface.launch()