ranggaaldosas commited on
Commit
54fe872
·
1 Parent(s): c38911e

feat: fixing audio sample bugs

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -6,28 +6,29 @@ import torch
6
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
7
 
8
  hf_token = os.getenv("hf_token")
9
-
10
  if hf_token is None:
11
  raise ValueError(
12
  "Hugging Face token not found. Please set the 'hf_token' environment variable."
13
  )
14
 
15
  processor = WhisperProcessor.from_pretrained(
16
- "openai/whisper-small",
17
- language="Indonesian",
18
- task="transcribe",
19
- token=hf_token,
20
  )
21
  model = WhisperForConditionalGeneration.from_pretrained(
22
  "avalonai/whisper-small-jv", token=hf_token
23
  )
24
 
25
 
26
- def transcribe(audio):
27
- if audio is None:
28
- return "No audio file provided. Please upload an audio file."
 
 
 
 
 
29
  try:
30
- audio, sampling_rate = librosa.load(audio, sr=16000)
31
  except Exception as e:
32
  return f"Failed to load audio: {str(e)}"
33
 
@@ -44,18 +45,19 @@ def transcribe(audio):
44
  audio_samples = [
45
  os.path.join("audio_sample", f)
46
  for f in os.listdir("audio_sample")
47
- if f.endswith((".wav", ".mp3"))
48
  ]
 
 
 
 
49
  audio_input = gr.Audio(
50
- sources="microphone",
51
- type="filepath",
52
- label="Upload Audio or Select a Sample",
53
- choices=audio_samples,
54
  )
55
 
56
  iface = gr.Interface(
57
  fn=transcribe,
58
- inputs=audio_input,
59
  outputs="text",
60
  title="Speech-to-text on Javanese Language Demo",
61
  description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",
 
6
  from transformers import WhisperForConditionalGeneration, WhisperProcessor
7
 
8
  hf_token = os.getenv("hf_token")
 
9
  if hf_token is None:
10
  raise ValueError(
11
  "Hugging Face token not found. Please set the 'hf_token' environment variable."
12
  )
13
 
14
  processor = WhisperProcessor.from_pretrained(
15
+ "openai/whisper-small", language="Indonesian", task="transcribe", token=hf_token
 
 
 
16
  )
17
  model = WhisperForConditionalGeneration.from_pretrained(
18
  "avalonai/whisper-small-jv", token=hf_token
19
  )
20
 
21
 
22
+ def transcribe(audio_choice, audio_file):
23
+ if audio_file is not None:
24
+ audio_path = audio_file
25
+ elif audio_choice is not None:
26
+ audio_path = audio_choice
27
+ else:
28
+ return "No audio file provided. Please upload an audio file or select a sample."
29
+
30
  try:
31
+ audio, sampling_rate = librosa.load(audio_path, sr=16000)
32
  except Exception as e:
33
  return f"Failed to load audio: {str(e)}"
34
 
 
45
  audio_samples = [
46
  os.path.join("audio_sample", f)
47
  for f in os.listdir("audio_sample")
48
+ if f.endswith((".wav", ".mp3", ".m4a"))
49
  ]
50
+
51
+ audio_choice = gr.Dropdown(
52
+ label="Select a Sample Audio", choices=audio_samples, default=None
53
+ )
54
  audio_input = gr.Audio(
55
+ sources="microphone", type="filepath", label="Upload Audio or Use Microphone"
 
 
 
56
  )
57
 
58
  iface = gr.Interface(
59
  fn=transcribe,
60
+ inputs=[audio_choice, audio_input],
61
  outputs="text",
62
  title="Speech-to-text on Javanese Language Demo",
63
  description="Ini adalah platform untuk pengujian model speech-to-text pada bahasa Jawa oleh Avalon AI. Silahkan coba dengan mengucapkan kalimat atau memilih salah satu sample audio.",