User commited on
Commit
979c7a7
·
0 Parent(s):

Initial commit

Browse files
Files changed (6) hide show
  1. Dockerfile +30 -0
  2. README.md +40 -0
  3. Spacefile +4 -0
  4. app.py +55 -0
  5. create_space.py +16 -0
  6. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install build essentials and wget
6
+ RUN apt-get update && \
7
+ apt-get install -y build-essential wget git && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ # Clone and install fastText v0.9.2 (stable release)
11
+ RUN git clone --branch v0.9.2 https://github.com/facebookresearch/fastText.git && \
12
+ cd fastText && \
13
+ pip install .
14
+
15
+ # Download the language identification model (v1.0)
16
+ # Model details: https://fasttext.cc/docs/en/language-identification.html
17
+ RUN wget https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
18
+
19
+ # Copy requirements and install dependencies
20
+ COPY requirements.txt .
21
+ RUN pip install -r requirements.txt
22
+
23
+ # Copy application code
24
+ COPY app.py .
25
+
26
+ # Expose port
27
+ EXPOSE 8000
28
+
29
+ # Run the application
30
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Language Detection API
2
+
3
+ This is a FastAPI application that provides language detection capabilities using Facebook's FastText model.
4
+
5
+ ## Features
6
+
7
+ - Language detection for 176 different languages
8
+ - High accuracy using FastText's pre-trained model (lid.176.bin)
9
+ - Simple REST API interface
10
+ - Docker containerized
11
+
12
+ ## API Endpoints
13
+
14
+ ### GET /
15
+ Health check endpoint that confirms the API is running.
16
+
17
+ ### POST /detect
18
+ Detects the language of the provided text.
19
+
20
+ Request body:
21
+ ```json
22
+ {
23
+ "text": "Your text here"
24
+ }
25
+ ```
26
+
27
+ Response:
28
+ ```json
29
+ {
30
+ "language": "en",
31
+ "confidence": 0.976
32
+ }
33
+ ```
34
+
35
+ ## Technical Details
36
+
37
+ - Built with FastAPI and Python 3.9
38
+ - Uses FastText v0.9.2
39
+ - Containerized with Docker
40
+ - Hosted on Hugging Face Spaces
Spacefile ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Spacefile Docs: https://huggingface.co/docs/hub/spaces-config-reference
2
+ title: Language Detection API
3
+ sdk: docker
4
+ port: 8000
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fasttext
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ import numpy as np
5
+
6
+ app = FastAPI(
7
+ title="Language Detection API",
8
+ description="Language detection API using FastText v0.9.2 and lid.176.bin model",
9
+ version="1.0.0"
10
+ )
11
+
12
+ # Load the language identification model
13
+ # Model: lid.176.bin (v1.0)
14
+ # - Trained on Wikipedia, Tatoeba and SETimes
15
+ # - Supports 176 languages
16
+ # - Uses character n-grams (minn=3, maxn=6 by default)
17
+ # - Vector dimension: 16
18
+ model = fasttext.load_model("/app/lid.176.bin")
19
+
20
+ # Monkey patch fastText's predict method to use np.asarray
21
+ # This is needed because FastText's native predict method returns a tuple of lists,
22
+ # but we need numpy arrays for better performance and compatibility
23
+ original_predict = model.predict
24
+ def safe_predict(text, k=-1, threshold=0.0):
25
+ labels, probs = original_predict(text, k, threshold)
26
+ return np.asarray(labels), np.asarray(probs)
27
+ model.predict = safe_predict
28
+
29
+ class TextRequest(BaseModel):
30
+ text: str
31
+
32
+ class PredictionResponse(BaseModel):
33
+ language: str
34
+ confidence: float
35
+
36
+ @app.post("/detect", response_model=PredictionResponse)
37
+ async def detect_language(request: TextRequest):
38
+ try:
39
+ # Get prediction
40
+ predictions = model.predict(request.text)
41
+
42
+ # Extract language and confidence
43
+ language = predictions[0][0].replace("__label__", "")
44
+ confidence = float(predictions[1][0])
45
+
46
+ return PredictionResponse(
47
+ language=language,
48
+ confidence=confidence
49
+ )
50
+ except Exception as e:
51
+ raise HTTPException(status_code=500, detail=str(e))
52
+
53
+ @app.get("/")
54
+ async def root():
55
+ return {"message": "Language Detection API is running. Use /docs for the API documentation."}
create_space.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ import os
3
+
4
+ # Initialize the Hugging Face API client
5
+ api = HfApi()
6
+
7
+ # Create a new Space
8
+ space_name = "language-detection-api"
9
+ api.create_repo(
10
+ repo_id=space_name,
11
+ repo_type="space",
12
+ space_sdk="docker",
13
+ private=False
14
+ )
15
+
16
+ print(f"Space created successfully: https://huggingface.co/spaces/{os.getenv('HUGGING_FACE_HUB_TOKEN').split('/')[0]}/{space_name}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ python-multipart==0.0.6
4
+ numpy==1.24.3
5
+ scipy==1.10.1