AkitoP commited on
Commit
ed09f01
·
1 Parent(s): 0dfaf3b
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -27,20 +27,13 @@ pipe = pipeline(
27
 
28
 
29
  @spaces.GPU
30
- def transcribe(audio: str, model: str) -> tuple[str, float]:
31
- if not audio:
32
- return "No audio file", 0
33
  filename = Path(audio).name
34
- logger.info(f"Model: {model}")
35
- logger.info(f"Audio: {filename}")
36
  # Read and resample audio to 16kHz
37
  y, sr = librosa.load(audio, mono=True, sr=16000)
38
  # Get duration of audio
39
- duration = librosa.get_duration(y=y, sr=sr)
40
- logger.info(f"Duration: {duration:.2f}s")
41
- start_time = time.time()
42
  result = pipe(y, generate_kwargs=generate_kwargs)["text"]
43
- end_time = time.time()
44
  return result
45
 
46
 
@@ -53,9 +46,8 @@ A Whisper model fine-tuned to transcribe Japanese speech into Katakana with pitc
53
  with gr.Blocks() as app:
54
  gr.Markdown(initial_md)
55
  audio = gr.Audio(type="filepath")
56
- transcript = gr.Button("Transcribe with Galgame-Whisper (WIP)")
57
  output = gr.Textbox(label="Result")
58
- transcript.click(transcribe(audio=audio, model="AkitoP/whisper-large-v3-japense-phone_accent"), outputs=[output])
59
 
60
 
61
  # app.load(warmup, inputs=[], outputs=[warmup_result], queue=True)
 
27
 
28
 
29
  @spaces.GPU
30
+ def transcribe(audio: str) -> tuple[str, float]:
 
 
31
  filename = Path(audio).name
 
 
32
  # Read and resample audio to 16kHz
33
  y, sr = librosa.load(audio, mono=True, sr=16000)
34
  # Get duration of audio
 
 
 
35
  result = pipe(y, generate_kwargs=generate_kwargs)["text"]
36
+ print(result)
37
  return result
38
 
39
 
 
46
  with gr.Blocks() as app:
47
  gr.Markdown(initial_md)
48
  audio = gr.Audio(type="filepath")
 
49
  output = gr.Textbox(label="Result")
50
+ transcript.click(transcribe(audio=audio),inputs=[audio], outputs=[output])
51
 
52
 
53
  # app.load(warmup, inputs=[], outputs=[warmup_result], queue=True)