import fasttext from fastapi import FastAPI, HTTPException from pydantic import BaseModel import numpy as np app = FastAPI( title="Language Detection API", description="Language detection API using FastText v0.9.2 and lid.176.bin model", version="1.0.0" ) # Load the language identification model # Model: lid.176.bin (v1.0) # - Trained on Wikipedia, Tatoeba and SETimes # - Supports 176 languages # - Uses character n-grams (minn=3, maxn=6 by default) # - Vector dimension: 16 model = fasttext.load_model("/app/lid.176.bin") # Monkey patch fastText's predict method to use np.asarray # This is needed because FastText's native predict method returns a tuple of lists, # but we need numpy arrays for better performance and compatibility original_predict = model.predict def safe_predict(text, k=-1, threshold=0.0): labels, probs = original_predict(text, k, threshold) return np.asarray(labels), np.asarray(probs) model.predict = safe_predict class TextRequest(BaseModel): text: str class PredictionResponse(BaseModel): language: str confidence: float @app.post("/detect", response_model=PredictionResponse) async def detect_language(request: TextRequest): try: # Get prediction predictions = model.predict(request.text) # Extract language and confidence language = predictions[0][0].replace("__label__", "") confidence = float(predictions[1][0]) return PredictionResponse( language=language, confidence=confidence ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): return {"message": "Language Detection API is running. Use /docs for the API documentation."}