WajeehAzeemX commited on
Commit
1006f74
·
verified ·
1 Parent(s): 64e4b92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -1,11 +1,33 @@
1
- import torch
2
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
- from datasets import load_dataset
4
  from fastapi import FastAPI, Request, HTTPException
5
- import librosa
6
  import io
7
- whisper = pipeline("automatic-speech-recognition", "WajeehAzeemX/openai-whispersmall-finetuned-2000", torch_dtype=torch.float16, device="cpu")
 
 
8
  app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  @app.post("/transcribe/")
11
  async def transcribe_audio(request: Request):
@@ -19,13 +41,18 @@ async def transcribe_audio(request: Request):
19
  # Load the audio file using pydub
20
  audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
21
 
22
- # Decode token ids to text
23
- transcription = whisper(audio_array)
24
 
 
 
 
 
 
25
 
26
  # Print the transcription
27
- print(transcription['text']) # Display the transcriptiontry:
28
 
29
- return {"transcription": transcription['text']}
30
  except Exception as e:
31
  raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
1
  from fastapi import FastAPI, Request, HTTPException
2
+ from transformers import pipeline
3
  import io
4
+ import librosa
5
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
6
+
7
  app = FastAPI()
8
+ # Device configuration
9
+ # Load the model and processor
10
+ model_id = "WajeehAzeemX/openai-whispersmall-finetuned-2000"
11
+ model = WhisperForConditionalGeneration.from_pretrained(
12
+ model_id
13
+ )
14
+ import torch
15
+
16
+ processor = WhisperProcessor.from_pretrained('WajeehAzeemX/openai-whispersmall-finetuned-2000')
17
+ model.config.forced_decoder_ids = None
18
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="Arabic", task="transcribe")
19
+ model.generation_config.cache_implementation = "static"
20
+ from transformers import GenerationConfig, WhisperForConditionalGeneration
21
+ generation_config = GenerationConfig.from_pretrained("openai/whisper-small") # if you are using a multilingual model
22
+ model.generation_config = generation_config
23
+
24
+ pipe = pipeline(
25
+ "automatic-speech-recognition",
26
+ model=model,
27
+ tokenizer=processor.tokenizer,
28
+ feature_extractor=processor.feature_extractor,
29
+
30
+ )
31
 
32
  @app.post("/transcribe/")
33
  async def transcribe_audio(request: Request):
 
41
  # Load the audio file using pydub
42
  audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
43
 
44
+ # Process the audio array
45
+ input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
46
 
47
+ # Generate token ids
48
+ predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids, return_timestamps=True)
49
+
50
+ # Decode token ids to text
51
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
52
 
53
  # Print the transcription
54
+ print(transcription[0]) # Display the transcriptiontry:
55
 
56
+ return {"transcription": transcription[0]}
57
  except Exception as e:
58
  raise HTTPException(status_code=500, detail=str(e))