File size: 2,021 Bytes
2debb03
1006f74
adb9315
1006f74
 
 
eae5b83
1006f74
 
fecc0bf
1006f74
 
 
 
 
fecc0bf
8f81f58
7f3077c
1006f74
 
 
 
 
 
 
 
 
 
 
6370944
2debb03
 
 
 
 
e942829
2debb03
 
e942829
 
7ea0248
e942829
1006f74
 
adb9315
1006f74
8f81f58
1006f74
 
 
e942829
eae5b83
1006f74
e942829
1006f74
2debb03
e942829
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from fastapi import FastAPI, Request, HTTPException
from transformers import pipeline
import io
import librosa
from transformers import WhisperForConditionalGeneration, WhisperProcessor

app = FastAPI()
# Device configuration
# Load the model and processor
model_id = "WajeehAzeemX/whisper-smal-ar-testing-kale-5000"
model = WhisperForConditionalGeneration.from_pretrained(
    model_id
)
import torch

processor = WhisperProcessor.from_pretrained('WajeehAzeemX/whisper-smal-ar-testing-kale-5000')
forced_decoder_ids = processor.get_decoder_prompt_ids(language="arabic", task="transcribe")

from transformers import GenerationConfig, WhisperForConditionalGeneration
generation_config = GenerationConfig.from_pretrained("openai/whisper-small") # if you are using a multilingual model
model.generation_config = generation_config

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    
)

@app.post("/transcribe/")
async def transcribe_audio(request: Request):
    try:
        # Read binary data from the request
        audio_data = await request.body()

        # Convert binary data to a file-like object
        audio_file = io.BytesIO(audio_data)
        
        # Load the audio file using pydub
        audio_array, sampling_rate = librosa.load(audio_file, sr=16000)

        # Process the audio array
        input_features = processor(audio_array, sampling_rate=sampling_rate, return_tensors="pt").input_features 

        # Generate token ids
        predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)

        # Decode token ids to text
        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

        # Print the transcription
        print(transcription[0])  # Display the transcriptiontry:
       
        return {"transcription": transcription[0]}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))