|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
from pathlib import Path |
|
import os |
|
from gector import GecBERTModel |
|
from faster_whisper import WhisperModel, BatchedInferencePipeline |
|
from transformers.models.whisper.english_normalizer import BasicTextNormalizer |
|
from text_processing.inverse_normalize import InverseNormalizer |
|
import shutil |
|
import uvicorn |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
current_dir = Path(__file__).parent.as_posix() |
|
inverse_normalizer = InverseNormalizer('vi') |
|
whisper_model = WhisperModel("pho_distill_q8", device="cuda", compute_type="auto") |
|
batched_model = BatchedInferencePipeline(model=whisper_model, use_vad_model=True, chunk_length=20) |
|
gector_model = GecBERTModel( |
|
vocab_path=os.path.join(current_dir, "gector/vocabulary"), |
|
model_paths=[os.path.join(current_dir, "gector/Model_GECTOR")], |
|
split_chunk=True |
|
) |
|
normalizer = BasicTextNormalizer() |
|
|
|
@app.post("/transcriptions") |
|
async def transcribe_audio(file: UploadFile = File(...)): |
|
|
|
temp_file_path = Path(f"temp_{file.filename}") |
|
with open(temp_file_path, "wb") as buffer: |
|
shutil.copyfileobj(file.file, buffer) |
|
segments, info = batched_model.transcribe(str(temp_file_path), language="vi", batch_size=16) |
|
os.remove(temp_file_path) |
|
transcriptions = [segment.text for segment in segments] |
|
normalized_transcriptions = [inverse_normalizer.inverse_normalize(normalizer(text)) for text in transcriptions] |
|
corrected_texts = gector_model(normalized_transcriptions) |
|
return JSONResponse({"text": ' '.join(corrected_texts)}) |
|
|
|
|
|
if __name__ == "__main__": |
|
uvicorn.run("api:app", host="0.0.0.0", port=8000) |