pollitoconpapass commited on
Commit
e9fa8d8
1 Parent(s): 806388a

Add application file

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +27 -0
  3. app.py +38 -0
  4. functions.py +38 -0
  5. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ .env
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Usa la imagen slim-buster para evitar problemas con dependencias C
2
+ FROM python:3.12-slim
3
+
4
+ # Configura el directorio de trabajo
5
+ WORKDIR /app
6
+
7
+ # Copia solo requirements.txt para cachear la instalaci贸n de dependencias
8
+ COPY requirements.txt .
9
+
10
+ # Instala dependencias del sistema para compatibilidad con Milvus y Transformers
11
+ RUN apt-get update && apt-get install -y \
12
+ gcc \
13
+ libpq-dev \
14
+ build-essential \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Instala dependencias de Python
18
+ RUN pip install --no-cache-dir --upgrade pip \
19
+ && pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Copia el resto del c贸digo de la app
22
+ COPY . .
23
+
24
+ EXPOSE 7860
25
+
26
+ # Define el comando de arranque de la API
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.security import HTTPBearer
2
+ from fastapi import FastAPI, HTTPException, Depends
3
+ from functions import retrieve_similar_sentence
4
+
5
+ app = FastAPI()
6
+ security = HTTPBearer()
7
+
8
+ @app.post("/translate")
9
+ def translate_sentence(data: dict, token: str = Depends(security)):
10
+ try:
11
+ api_key = token.credentials
12
+ sentence = data["sentence"]
13
+ source_language = data["source_language"]
14
+ if not sentence or not source_language:
15
+ raise HTTPException(status_code=400, detail="Missing data in the request body")
16
+
17
+ results = retrieve_similar_sentence(sentence, source_language, api_key)
18
+ return {
19
+ "top_match": {
20
+ "source_sentence": results[0]["source_sentence"],
21
+ "target_sentence": results[0]["target_sentence"]
22
+ },
23
+ "2nd_match": {
24
+ "source_sentence": results[1]["source_sentence"],
25
+ "target_sentence": results[1]["target_sentence"]
26
+ },
27
+ "3rd_match": {
28
+ "source_sentence": results[2]["source_sentence"],
29
+ "target_sentence": results[2]["target_sentence"]
30
+ },
31
+ "4th_match": {
32
+ "source_sentence": results[3]["source_sentence"],
33
+ "target_sentence": results[3]["target_sentence"]
34
+ }
35
+ }
36
+
37
+ except Exception as e:
38
+ raise HTTPException(status_code=500, detail=str(e))
functions.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from pinecone import Pinecone
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ load_dotenv()
7
+
8
+ # === LOAD GNRL DATA ===
9
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
10
+ model = SentenceTransformer(EMBEDDING_MODEL)
11
+
12
+
13
+ # === RETRIEVAL FUNCTION FROM PINECONE ===
14
+ def retrieve_similar_sentence(query_sentence, source_language, api_key):
15
+ pc = Pinecone(api_key = api_key)
16
+ index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index"
17
+ index = pc.Index(index_name)
18
+
19
+ query_embedding = model.encode(query_sentence).tolist()
20
+
21
+ response = index.query(
22
+ vector=query_embedding,
23
+ top_k=4,
24
+ include_metadata=True
25
+ )
26
+
27
+ results = []
28
+ for match in response['matches']:
29
+ metadata = match['metadata']
30
+ score = match['score']
31
+
32
+ results.append({
33
+ "source_sentence": metadata["source_sentence"],
34
+ "target_sentence": metadata["target_sentence"],
35
+ "score": score
36
+ })
37
+
38
+ return results
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.95.1
2
+ pymilvus==2.4.8
3
+ sentence_transformers==2.3.1
4
+ uvicorn==0.22.0
5
+ pinecone==5.3.1
6
+ pinecone-client==5.0.1