NLPV commited on
Commit
9a34dcd
·
verified ·
1 Parent(s): b589aed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -25
app.py CHANGED
@@ -18,33 +18,29 @@ def check_directory(path):
18
 
19
  check_directory(TRANSCRIPTS_FOLDER)
20
 
21
- def transcribe_and_translate(audio_file, selected_language, model_type="base"):
22
  """
23
- Transcribe audio using Whisper and translate it into English if required.
24
 
25
- :param audio_file: Path to the uploaded audio file
26
  :param selected_language: Language code for transcription
27
  :param model_type: Whisper model type (default is 'base')
28
  :return: Transcription and translation
29
  """
30
- if not audio_file:
31
- return "No audio file uploaded."
32
-
33
  try:
34
  # Load the Whisper model based on user selection
35
  model = whisper.load_model(model_type, device=DEVICE)
36
  except Exception as e:
37
  return f"Failed to load Whisper model ({model_type}): {e}"
 
 
 
38
 
39
- try:
40
- # Transcribe with the user-selected language
41
- result = model.transcribe(audio_file, language=selected_language, verbose=False)
42
-
43
- # Save the transcription with timestamps
44
- transcript_file = os.path.join(TRANSCRIPTS_FOLDER, f"{os.path.basename(audio_file)}_transcript.txt")
45
-
46
- translated_text = []
47
- with open(transcript_file, 'w', encoding='utf-8') as text_file:
48
  for segment in result['segments']:
49
  start_time = segment['start']
50
  end_time = segment['end']
@@ -54,23 +50,19 @@ def transcribe_and_translate(audio_file, selected_language, model_type="base"):
54
  text_en = GoogleTranslator(source='auto', target='en').translate(text)
55
  translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
56
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
57
-
58
- # Return the transcription and translation
59
- return "\n".join(translated_text) if translated_text else result['text']
60
-
61
- except Exception as e:
62
- return f"Failed to process the audio file: {e}"
63
 
64
  # Define the Gradio interface
65
  interface = gr.Interface(
66
- fn=transcribe_and_translate,
67
  inputs=[
68
- gr.Audio(type="filepath", label="Upload Audio"),
69
- gr.Dropdown(label="Select Language", choices=["nl","en"], value="mai"),
70
  gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
71
  ],
72
  outputs="text",
73
- title="Transcription and Translation"
74
  )
75
 
76
  if __name__ == '__main__':
 
18
 
19
  check_directory(TRANSCRIPTS_FOLDER)
20
 
21
+ def live_transcribe_and_translate(stream, selected_language, model_type="base"):
22
  """
23
+ Transcribe live audio using Whisper and translate it into English if required.
24
 
25
+ :param stream: Stream of live audio data
26
  :param selected_language: Language code for transcription
27
  :param model_type: Whisper model type (default is 'base')
28
  :return: Transcription and translation
29
  """
 
 
 
30
  try:
31
  # Load the Whisper model based on user selection
32
  model = whisper.load_model(model_type, device=DEVICE)
33
  except Exception as e:
34
  return f"Failed to load Whisper model ({model_type}): {e}"
35
+
36
+ # Prepare audio processor
37
+ audio_processor = whisper.audio.AudioProcessor(model, streaming=True)
38
 
39
+ translated_text = []
40
+ transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
41
+ with open(transcript_file, 'w', encoding='utf-8') as text_file:
42
+ for chunk in stream:
43
+ result = audio_processor.transcribe(chunk, return_timestamps=True)
 
 
 
 
44
  for segment in result['segments']:
45
  start_time = segment['start']
46
  end_time = segment['end']
 
50
  text_en = GoogleTranslator(source='auto', target='en').translate(text)
51
  translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
52
  text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")
53
+
54
+ return "\n".join(translated_text) if translated_text else "Live transcription completed."
 
 
 
 
55
 
56
  # Define the Gradio interface
57
  interface = gr.Interface(
58
+ fn=live_transcribe_and_translate,
59
  inputs=[
60
+ gr.Audio(source="microphone", type="stream", streaming=True, label="Start Recording"),
61
+ gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
62
  gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
63
  ],
64
  outputs="text",
65
+ title="Live Transcription and Translation"
66
  )
67
 
68
  if __name__ == '__main__':