saattrupdan commited on
Commit
d087544
·
1 Parent(s): 8cd000d

chore: Add logging

Browse files
Files changed (1) hide show
  1. app.py +19 -0
app.py CHANGED
@@ -1,5 +1,6 @@
1
  """Røst ASR demo."""
2
 
 
3
  import os
4
  import warnings
5
 
@@ -11,6 +12,13 @@ from punctfix import PunctFixer
11
  from transformers import pipeline
12
  from dotenv import load_dotenv
13
 
 
 
 
 
 
 
 
14
  load_dotenv()
15
 
16
  warnings.filterwarnings("ignore", category=FutureWarning)
@@ -33,6 +41,7 @@ send the audio to the model for transcription. You can also upload an audio file
33
  pressing the {icon} button.
34
  """
35
 
 
36
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
37
  transcriber = pipeline(
38
  task="automatic-speech-recognition",
@@ -40,8 +49,12 @@ transcriber = pipeline(
40
  device=device,
41
  token=os.getenv("HUGGINGFACE_HUB_TOKEN", True),
42
  )
 
 
43
  transcription_fixer = PunctFixer(language="da", device=device)
44
 
 
 
45
  def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
46
  """Transcribe the audio.
47
 
@@ -56,12 +69,18 @@ def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
56
  if audio.ndim > 1:
57
  audio = np.mean(audio, axis=1)
58
  audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
 
 
59
  transcription = transcriber(inputs=audio)
60
  if not isinstance(transcription, dict):
61
  return ""
 
 
62
  cleaned_transcription = transcription_fixer.punctuate(
63
  text=transcription["text"]
64
  )
 
 
65
  return cleaned_transcription
66
 
67
  demo = gr.Interface(
 
1
  """Røst ASR demo."""
2
 
3
+ import logging
4
  import os
5
  import warnings
6
 
 
12
  from transformers import pipeline
13
  from dotenv import load_dotenv
14
 
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format="%(asctime)s ⋅ %(name)s ⋅ %(message)s",
18
+ datefmt="%Y-%m-%d %H:%M:%S",
19
+ )
20
+ logger = logging.getLogger("roest-asr-demo")
21
+
22
  load_dotenv()
23
 
24
  warnings.filterwarnings("ignore", category=FutureWarning)
 
41
  pressing the {icon} button.
42
  """
43
 
44
+ logger.info("Loading the ASR model...")
45
  device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
46
  transcriber = pipeline(
47
  task="automatic-speech-recognition",
 
49
  device=device,
50
  token=os.getenv("HUGGINGFACE_HUB_TOKEN", True),
51
  )
52
+
53
+ logger.info("Loading the punctuation fixer model...")
54
  transcription_fixer = PunctFixer(language="da", device=device)
55
 
56
+ logger.info("Models loaded, ready to transcribe audio.")
57
+
58
  def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
59
  """Transcribe the audio.
60
 
 
69
  if audio.ndim > 1:
70
  audio = np.mean(audio, axis=1)
71
  audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
72
+
73
+ logger.info(f"Transcribing audio clip of {len(audio) / 16_000:.2f} seconds...")
74
  transcription = transcriber(inputs=audio)
75
  if not isinstance(transcription, dict):
76
  return ""
77
+
78
+ logger.info(f"Raw transcription is {transcription['text']!r}. Cleaning it up...")
79
  cleaned_transcription = transcription_fixer.punctuate(
80
  text=transcription["text"]
81
  )
82
+
83
+ logger.info(f"Final transcription: {cleaned_transcription!r}")
84
  return cleaned_transcription
85
 
86
  demo = gr.Interface(