WajeehAzeemX commited on
Commit
8bd741b
·
verified ·
1 Parent(s): e101185

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -16,14 +16,13 @@ model_id = "WajeehAzeemX/whisper-smal-ar-testing-kale-5000"
16
  model = WhisperForConditionalGeneration.from_pretrained(
17
  model_id
18
  )
19
- processor = WhisperProcessor.from_pretrained(model_id, attn_implementation="sdpa")
20
- model.config.forced_decoder_ids = None
21
- forced_decoder_ids = processor.get_decoder_prompt_ids(language="Arabic", task="transcribe")
22
  pipe = pipeline(
23
  "automatic-speech-recognition",
24
  model=model,
25
  tokenizer=processor.tokenizer,
26
  feature_extractor=processor.feature_extractor,
 
27
  )
28
 
29
 
@@ -39,7 +38,7 @@ async def transcribe_audio(request: Request):
39
  # Process the audio array
40
  input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
41
  # Generate token ids
42
- predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
43
  # Decode token ids to text
44
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
45
  # Print the transcription
 
16
  model = WhisperForConditionalGeneration.from_pretrained(
17
  model_id
18
  )
19
+ processor = WhisperProcessor.from_pretrained(model_id)
 
 
20
  pipe = pipeline(
21
  "automatic-speech-recognition",
22
  model=model,
23
  tokenizer=processor.tokenizer,
24
  feature_extractor=processor.feature_extractor,
25
+ generate_kwargs = {"task":"transcribe", "language":"<|ar|>"}
26
  )
27
 
28
 
 
38
  # Process the audio array
39
  input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
40
  # Generate token ids
41
+ predicted_ids = model.generate(input_features)
42
  # Decode token ids to text
43
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
44
  # Print the transcription