Spaces:

pollitoconpapass
/

QnIA-Translator-API

Sleeping

App Files Files Community

pollitoconpapass commited on Dec 21, 2024

Commit

528f18a

1 Parent(s): 6505ee1

Add application file

Browse files

Files changed (4) hide show

Dockerfile +34 -0
README.md +2 -2
app.py +85 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use the official Python base image
+FROM python:3.9-slim
+# Set environment variables for predictable container behavior
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# Set a working directory in the container
+WORKDIR /app
+# Install system dependencies required for SentencePiece
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    libprotobuf-dev \
+    protobuf-compiler \
+    libsentencepiece-dev \
+    libsentencepiece0 \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the requirements file
+COPY requirements.txt /app/
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the app source code into the container
+COPY app.py /app/
+# Expose the port the app runs on
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 title: QnIA Translator API
-emoji: 📊
-colorFrom: gray
 colorTo: indigo
 sdk: docker
 pinned: false

 ---
 title: QnIA Translator API
+emoji: 🦙🤖
+colorFrom: purple
 colorTo: indigo
 sdk: docker
 pinned: false

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import time
+import uvicorn
+from typing import Dict
+from fastapi import FastAPI
+from transformers import NllbTokenizer, AutoModelForSeq2SeqLM
+app = FastAPI()
+def fix_tokenizer(tokenizer, new_lang='quz_Latn'):
+    """
+    Add a new language token to the tokenizer vocabulary and update language mappings.
+    """
+    # First ensure we're working with an NLLB tokenizer
+    if not hasattr(tokenizer, 'sp_model'):
+        raise ValueError("This function expects an NLLB tokenizer")
+    # Add the new language token if it's not already present
+    if new_lang not in tokenizer.additional_special_tokens:
+        tokenizer.add_special_tokens({
+            'additional_special_tokens': [new_lang]
+        })
+    # Initialize lang_code_to_id if it doesn't exist
+    if not hasattr(tokenizer, 'lang_code_to_id'):
+        tokenizer.lang_code_to_id = {}
+    # Add the new language to lang_code_to_id mapping
+    if new_lang not in tokenizer.lang_code_to_id:
+        # Get the ID for the new language token
+        new_lang_id = tokenizer.convert_tokens_to_ids(new_lang)
+        tokenizer.lang_code_to_id[new_lang] = new_lang_id
+    # Initialize id_to_lang_code if it doesn't exist
+    if not hasattr(tokenizer, 'id_to_lang_code'):
+        tokenizer.id_to_lang_code = {}
+    # Update the reverse mapping
+    tokenizer.id_to_lang_code[tokenizer.lang_code_to_id[new_lang]] = new_lang
+    return tokenizer
+MODEL_URL = "pollitoconpapass/QnIA-translation-model"
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_URL)
+tokenizer = NllbTokenizer.from_pretrained(MODEL_URL)
+fix_tokenizer(tokenizer)
+# === HEALTH CHECK ===
+@app.get("/_health")
+async def health_check():
+    return {'status': 'ok'}
+# === TRANSLATION ===
+@app.post("/qnia-translate")
+async def translate(data: Dict, a=32, b=3, max_input_length=1024, num_beams=4):
+    start = time.time()
+    text = data['text']
+    src_lang = data['src_lang']
+    tgt_lang = data['tgt_lang']
+    tokenizer.src_lang = src_lang
+    tokenizer.tgt_lang = tgt_lang
+    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length)
+    result = model.generate(
+        **inputs.to(model.device),
+        forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
+        max_new_tokens=int(a + b * inputs.input_ids.shape[1]),
+        num_beams=num_beams,
+    )
+    translation = tokenizer.batch_decode(result, skip_special_tokens=True)
+    translation = translation[0]
+    end = time.time()
+    print(f"\nTime: {end - start}")
+    return {'translation': translation}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.115.6
+sentencepiece==0.2.0
+torch==2.4.0
+transformers==4.46.1
+uvicorn==0.34.0