cuzco-quechua-rag-api / functions.py
pollitoconpapass's picture
Add application file
e9fa8d8
import os
from dotenv import load_dotenv
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
load_dotenv()
# === LOAD GNRL DATA ===
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(EMBEDDING_MODEL)
# === RETRIEVAL FUNCTION FROM PINECONE ===
def retrieve_similar_sentence(query_sentence, source_language, api_key):
pc = Pinecone(api_key = api_key)
index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index"
index = pc.Index(index_name)
query_embedding = model.encode(query_sentence).tolist()
response = index.query(
vector=query_embedding,
top_k=4,
include_metadata=True
)
results = []
for match in response['matches']:
metadata = match['metadata']
score = match['score']
results.append({
"source_sentence": metadata["source_sentence"],
"target_sentence": metadata["target_sentence"],
"score": score
})
return results