Commit
路
e9fa8d8
1
Parent(s):
806388a
Add application file
Browse files- .gitignore +2 -0
- Dockerfile +27 -0
- app.py +38 -0
- functions.py +38 -0
- requirements.txt +6 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Usa la imagen slim-buster para evitar problemas con dependencias C
|
2 |
+
FROM python:3.12-slim
|
3 |
+
|
4 |
+
# Configura el directorio de trabajo
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copia solo requirements.txt para cachear la instalaci贸n de dependencias
|
8 |
+
COPY requirements.txt .
|
9 |
+
|
10 |
+
# Instala dependencias del sistema para compatibilidad con Milvus y Transformers
|
11 |
+
RUN apt-get update && apt-get install -y \
|
12 |
+
gcc \
|
13 |
+
libpq-dev \
|
14 |
+
build-essential \
|
15 |
+
&& rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Instala dependencias de Python
|
18 |
+
RUN pip install --no-cache-dir --upgrade pip \
|
19 |
+
&& pip install --no-cache-dir -r requirements.txt
|
20 |
+
|
21 |
+
# Copia el resto del c贸digo de la app
|
22 |
+
COPY . .
|
23 |
+
|
24 |
+
EXPOSE 7860
|
25 |
+
|
26 |
+
# Define el comando de arranque de la API
|
27 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi.security import HTTPBearer
|
2 |
+
from fastapi import FastAPI, HTTPException, Depends
|
3 |
+
from functions import retrieve_similar_sentence
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
security = HTTPBearer()
|
7 |
+
|
8 |
+
@app.post("/translate")
|
9 |
+
def translate_sentence(data: dict, token: str = Depends(security)):
|
10 |
+
try:
|
11 |
+
api_key = token.credentials
|
12 |
+
sentence = data["sentence"]
|
13 |
+
source_language = data["source_language"]
|
14 |
+
if not sentence or not source_language:
|
15 |
+
raise HTTPException(status_code=400, detail="Missing data in the request body")
|
16 |
+
|
17 |
+
results = retrieve_similar_sentence(sentence, source_language, api_key)
|
18 |
+
return {
|
19 |
+
"top_match": {
|
20 |
+
"source_sentence": results[0]["source_sentence"],
|
21 |
+
"target_sentence": results[0]["target_sentence"]
|
22 |
+
},
|
23 |
+
"2nd_match": {
|
24 |
+
"source_sentence": results[1]["source_sentence"],
|
25 |
+
"target_sentence": results[1]["target_sentence"]
|
26 |
+
},
|
27 |
+
"3rd_match": {
|
28 |
+
"source_sentence": results[2]["source_sentence"],
|
29 |
+
"target_sentence": results[2]["target_sentence"]
|
30 |
+
},
|
31 |
+
"4th_match": {
|
32 |
+
"source_sentence": results[3]["source_sentence"],
|
33 |
+
"target_sentence": results[3]["target_sentence"]
|
34 |
+
}
|
35 |
+
}
|
36 |
+
|
37 |
+
except Exception as e:
|
38 |
+
raise HTTPException(status_code=500, detail=str(e))
|
functions.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from pinecone import Pinecone
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
# === LOAD GNRL DATA ===
|
9 |
+
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
10 |
+
model = SentenceTransformer(EMBEDDING_MODEL)
|
11 |
+
|
12 |
+
|
13 |
+
# === RETRIEVAL FUNCTION FROM PINECONE ===
|
14 |
+
def retrieve_similar_sentence(query_sentence, source_language, api_key):
|
15 |
+
pc = Pinecone(api_key = api_key)
|
16 |
+
index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index"
|
17 |
+
index = pc.Index(index_name)
|
18 |
+
|
19 |
+
query_embedding = model.encode(query_sentence).tolist()
|
20 |
+
|
21 |
+
response = index.query(
|
22 |
+
vector=query_embedding,
|
23 |
+
top_k=4,
|
24 |
+
include_metadata=True
|
25 |
+
)
|
26 |
+
|
27 |
+
results = []
|
28 |
+
for match in response['matches']:
|
29 |
+
metadata = match['metadata']
|
30 |
+
score = match['score']
|
31 |
+
|
32 |
+
results.append({
|
33 |
+
"source_sentence": metadata["source_sentence"],
|
34 |
+
"target_sentence": metadata["target_sentence"],
|
35 |
+
"score": score
|
36 |
+
})
|
37 |
+
|
38 |
+
return results
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.95.1
|
2 |
+
pymilvus==2.4.8
|
3 |
+
sentence_transformers==2.3.1
|
4 |
+
uvicorn==0.22.0
|
5 |
+
pinecone==5.3.1
|
6 |
+
pinecone-client==5.0.1
|