Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request, HTTPException | |
from transformers import pipeline | |
import io | |
import librosa | |
from transformers import WhisperForConditionalGeneration, WhisperProcessor | |
app = FastAPI() | |
# Device configuration | |
# Load the model and processor | |
model_id = "WajeehAzeemX/whisper-smal-ar-testing-kale-5000" | |
model = WhisperForConditionalGeneration.from_pretrained( | |
model_id | |
) | |
import torch | |
processor = WhisperProcessor.from_pretrained('WajeehAzeemX/whisper-smal-ar-testing-kale-5000') | |
forced_decoder_ids = processor.get_decoder_prompt_ids(language="arabic", task="transcribe") | |
from transformers import GenerationConfig, WhisperForConditionalGeneration | |
generation_config = GenerationConfig.from_pretrained("openai/whisper-small") # if you are using a multilingual model | |
model.generation_config = generation_config | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
) | |
async def transcribe_audio(request: Request): | |
try: | |
# Read binary data from the request | |
audio_data = await request.body() | |
# Convert binary data to a file-like object | |
audio_file = io.BytesIO(audio_data) | |
# Load the audio file using pydub | |
audio_array, sampling_rate = librosa.load(audio_file, sr=16000) | |
# Process the audio array | |
input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features | |
# Generate token ids | |
predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids) | |
# Decode token ids to text | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) | |
# Print the transcription | |
print(transcription[0]) # Display the transcriptiontry: | |
return {"transcription": transcription[0]} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) |