WajeehAzeemX commited on
Commit
adb9315
·
verified ·
1 Parent(s): 1860ffd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -36
app.py CHANGED
@@ -1,33 +1,11 @@
 
 
 
1
  from fastapi import FastAPI, Request, HTTPException
2
- from transformers import pipeline
3
- import io
4
  import librosa
5
- from transformers import WhisperForConditionalGeneration, WhisperProcessor
6
-
7
  app = FastAPI()
8
- # Device configuration
9
- # Load the model and processor
10
- model_id = "whitefox123/whisper-small-ar2"
11
- model = WhisperForConditionalGeneration.from_pretrained(
12
- model_id
13
- )
14
- import torch
15
-
16
- processor = WhisperProcessor.from_pretrained('whitefox123/whisper-small-ar2')
17
- model.config.forced_decoder_ids = None
18
- forced_decoder_ids = processor.get_decoder_prompt_ids(language="Arabic", task="transcribe")
19
- model.generation_config.cache_implementation = "static"
20
- from transformers import GenerationConfig, WhisperForConditionalGeneration
21
- generation_config = GenerationConfig.from_pretrained("openai/whisper-small") # if you are using a multilingual model
22
- model.generation_config = generation_config
23
-
24
- pipe = pipeline(
25
- "automatic-speech-recognition",
26
- model=model,
27
- tokenizer=processor.tokenizer,
28
- feature_extractor=processor.feature_extractor,
29
-
30
- )
31
 
32
  @app.post("/transcribe/")
33
  async def transcribe_audio(request: Request):
@@ -41,18 +19,13 @@ async def transcribe_audio(request: Request):
41
  # Load the audio file using pydub
42
  audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
43
 
44
- # Process the audio array
45
- input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
46
-
47
- # Generate token ids
48
- predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids, return_timestamps=True)
49
-
50
  # Decode token ids to text
51
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
52
 
53
  # Print the transcription
54
- print(transcription[0]) # Display the transcriptiontry:
55
 
56
- return {"transcription": transcription[0]}
57
  except Exception as e:
58
  raise HTTPException(status_code=500, detail=str(e))
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from datasets import load_dataset
4
  from fastapi import FastAPI, Request, HTTPException
 
 
5
  import librosa
6
+ import io
7
+ whisper = pipeline("automatic-speech-recognition", "whitefox123/whisper-small-ar2", torch_dtype=torch.float16, device="cpu")
8
  app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  @app.post("/transcribe/")
11
  async def transcribe_audio(request: Request):
 
19
  # Load the audio file using pydub
20
  audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
21
 
 
 
 
 
 
 
22
  # Decode token ids to text
23
+ transcription = whisper(audio_array)
24
+
25
 
26
  # Print the transcription
27
+ print(transcription['text']) # Display the transcriptiontry:
28
 
29
+ return {"transcription": transcription['text']}
30
  except Exception as e:
31
  raise HTTPException(status_code=500, detail=str(e))