Spaces:
Sleeping
Sleeping
File size: 2,021 Bytes
2debb03 1006f74 adb9315 1006f74 eae5b83 1006f74 fecc0bf 1006f74 fecc0bf 8f81f58 7f3077c 1006f74 6370944 2debb03 e942829 2debb03 e942829 7ea0248 e942829 1006f74 adb9315 1006f74 8f81f58 1006f74 e942829 eae5b83 1006f74 e942829 1006f74 2debb03 e942829 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from fastapi import FastAPI, Request, HTTPException
from transformers import pipeline
import io
import librosa
from transformers import WhisperForConditionalGeneration, WhisperProcessor
app = FastAPI()
# Device configuration
# Load the model and processor
model_id = "WajeehAzeemX/whisper-smal-ar-testing-kale-5000"
model = WhisperForConditionalGeneration.from_pretrained(
model_id
)
import torch
processor = WhisperProcessor.from_pretrained('WajeehAzeemX/whisper-smal-ar-testing-kale-5000')
forced_decoder_ids = processor.get_decoder_prompt_ids(language="arabic", task="transcribe")
from transformers import GenerationConfig, WhisperForConditionalGeneration
generation_config = GenerationConfig.from_pretrained("openai/whisper-small") # if you are using a multilingual model
model.generation_config = generation_config
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
)
@app.post("/transcribe/")
async def transcribe_audio(request: Request):
try:
# Read binary data from the request
audio_data = await request.body()
# Convert binary data to a file-like object
audio_file = io.BytesIO(audio_data)
# Load the audio file using pydub
audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
# Process the audio array
input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features
# Generate token ids
predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
# Print the transcription
print(transcription[0]) # Display the transcriptiontry:
return {"transcription": transcription[0]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |