Spaces:

samdo20
/

language-detection-api

Runtime error

App Files Files Community

Nanny7 commited on Jan 31

Commit

979c7a7

0 Parent(s):

Initial commit

Browse files

Files changed (6) hide show

Dockerfile +30 -0
README.md +40 -0
Spacefile +4 -0
app.py +55 -0
create_space.py +16 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.9-slim
+WORKDIR /app
+# Install build essentials and wget
+RUN apt-get update && \
+    apt-get install -y build-essential wget git && \
+    rm -rf /var/lib/apt/lists/*
+# Clone and install fastText v0.9.2 (stable release)
+RUN git clone --branch v0.9.2 https://github.com/facebookresearch/fastText.git && \
+    cd fastText && \
+    pip install .
+# Download the language identification model (v1.0)
+# Model details: https://fasttext.cc/docs/en/language-identification.html
+RUN wget https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
+# Copy requirements and install dependencies
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+# Copy application code
+COPY app.py .
+# Expose port
+EXPOSE 8000
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# Language Detection API
+This is a FastAPI application that provides language detection capabilities using Facebook's FastText model.
+## Features
+- Language detection for 176 different languages
+- High accuracy using FastText's pre-trained model (lid.176.bin)
+- Simple REST API interface
+- Docker containerized
+## API Endpoints
+### GET /
+Health check endpoint that confirms the API is running.
+### POST /detect
+Detects the language of the provided text.
+Request body:
+```json
+{
+    "text": "Your text here"
+}
+```
+Response:
+```json
+{
+    "language": "en",
+    "confidence": 0.976
+}
+```
+## Technical Details
+- Built with FastAPI and Python 3.9
+- Uses FastText v0.9.2
+- Containerized with Docker
+- Hosted on Hugging Face Spaces

Spacefile ADDED Viewed

	@@ -0,0 +1,4 @@

+# Spacefile Docs: https://huggingface.co/docs/hub/spaces-config-reference
+title: Language Detection API
+sdk: docker
+port: 8000

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import fasttext
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import numpy as np
+app = FastAPI(
+    title="Language Detection API",
+    description="Language detection API using FastText v0.9.2 and lid.176.bin model",
+    version="1.0.0"
+)
+# Load the language identification model
+# Model: lid.176.bin (v1.0)
+# - Trained on Wikipedia, Tatoeba and SETimes
+# - Supports 176 languages
+# - Uses character n-grams (minn=3, maxn=6 by default)
+# - Vector dimension: 16
+model = fasttext.load_model("/app/lid.176.bin")
+# Monkey patch fastText's predict method to use np.asarray
+# This is needed because FastText's native predict method returns a tuple of lists,
+# but we need numpy arrays for better performance and compatibility
+original_predict = model.predict
+def safe_predict(text, k=-1, threshold=0.0):
+    labels, probs = original_predict(text, k, threshold)
+    return np.asarray(labels), np.asarray(probs)
+model.predict = safe_predict
+class TextRequest(BaseModel):
+    text: str
+class PredictionResponse(BaseModel):
+    language: str
+    confidence: float
+@app.post("/detect", response_model=PredictionResponse)
+async def detect_language(request: TextRequest):
+    try:
+        # Get prediction
+        predictions = model.predict(request.text)
+        # Extract language and confidence
+        language = predictions[0][0].replace("__label__", "")
+        confidence = float(predictions[1][0])
+        return PredictionResponse(
+            language=language,
+            confidence=confidence
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    return {"message": "Language Detection API is running. Use /docs for the API documentation."}

create_space.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from huggingface_hub import HfApi
+import os
+# Initialize the Hugging Face API client
+api = HfApi()
+# Create a new Space
+space_name = "language-detection-api"
+api.create_repo(
+    repo_id=space_name,
+    repo_type="space",
+    space_sdk="docker",
+    private=False
+)
+print(f"Space created successfully: https://huggingface.co/spaces/{os.getenv('HUGGING_FACE_HUB_TOKEN').split('/')[0]}/{space_name}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+python-multipart==0.0.6
+numpy==1.24.3
+scipy==1.10.1