import os from dotenv import load_dotenv from pinecone import Pinecone from sentence_transformers import SentenceTransformer load_dotenv() # === LOAD GNRL DATA === EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" model = SentenceTransformer(EMBEDDING_MODEL) # === RETRIEVAL FUNCTION FROM PINECONE === def retrieve_similar_sentence(query_sentence, source_language, api_key): pc = Pinecone(api_key = api_key) index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index" index = pc.Index(index_name) query_embedding = model.encode(query_sentence).tolist() response = index.query( vector=query_embedding, top_k=4, include_metadata=True ) results = [] for match in response['matches']: metadata = match['metadata'] score = match['score'] results.append({ "source_sentence": metadata["source_sentence"], "target_sentence": metadata["target_sentence"], "score": score }) return results