unijoh commited on
Commit
68195d5
1 Parent(s): a90990e

Update asr.py

Browse files
Files changed (1) hide show
  1. asr.py +36 -23
asr.py CHANGED
@@ -1,33 +1,46 @@
1
  import librosa
2
  from transformers import Wav2Vec2ForCTC, AutoProcessor
3
  import torch
 
 
 
 
4
 
5
  ASR_SAMPLING_RATE = 16_000
6
 
7
  MODEL_ID = "facebook/wav2vec2-large-960h-lv60-self"
8
 
9
- processor = AutoProcessor.from_pretrained(MODEL_ID)
10
- model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
 
 
 
 
11
 
12
  def transcribe(audio):
13
- if audio is None:
14
- return "ERROR: You have to either use the microphone or upload an audio file"
15
-
16
- audio_samples = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)[0]
17
- inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt")
18
-
19
- # Set language ID for Faroese
20
- language_id = 'fao' # ISO 639-3 code for Faroese
21
- processor.tokenizer.set_lang(language_id)
22
-
23
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
- model.to(device)
25
- inputs = inputs.to(device)
26
-
27
- with torch.no_grad():
28
- outputs = model(**inputs).logits
29
-
30
- ids = torch.argmax(outputs, dim=-1)[0]
31
- transcription = processor.decode(ids)
32
-
33
- return transcription
 
 
 
 
 
 
1
  import librosa
2
  from transformers import Wav2Vec2ForCTC, AutoProcessor
3
  import torch
4
+ import logging
5
+
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.DEBUG)
8
 
9
  ASR_SAMPLING_RATE = 16_000
10
 
11
  MODEL_ID = "facebook/wav2vec2-large-960h-lv60-self"
12
 
13
+ try:
14
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
15
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
16
+ logging.info("ASR model and processor loaded successfully.")
17
+ except Exception as e:
18
+ logging.error(f"Error loading ASR model or processor: {e}")
19
 
20
  def transcribe(audio):
21
+ try:
22
+ if audio is None:
23
+ return "ERROR: You have to either use the microphone or upload an audio file"
24
+
25
+ audio_samples = librosa.load(audio, sr=ASR_SAMPLING_RATE, mono=True)[0]
26
+ inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt")
27
+
28
+ # Set language ID for Faroese
29
+ language_id = 'fao' # ISO 639-3 code for Faroese
30
+ processor.tokenizer.set_lang(language_id)
31
+
32
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
+ model.to(device)
34
+ inputs = inputs.to(device)
35
+
36
+ with torch.no_grad():
37
+ outputs = model(**inputs).logits
38
+
39
+ ids = torch.argmax(outputs, dim=-1)[0]
40
+ transcription = processor.decode(ids)
41
+
42
+ logging.info("Transcription completed successfully.")
43
+ return transcription
44
+ except Exception as e:
45
+ logging.error(f"Error during transcription: {e}")
46
+ return "ERROR"