File size: 1,052 Bytes
e9fa8d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import os
from dotenv import load_dotenv
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
load_dotenv()
# === LOAD GNRL DATA ===
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(EMBEDDING_MODEL)
# === RETRIEVAL FUNCTION FROM PINECONE ===
def retrieve_similar_sentence(query_sentence, source_language, api_key):
pc = Pinecone(api_key = api_key)
index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index"
index = pc.Index(index_name)
query_embedding = model.encode(query_sentence).tolist()
response = index.query(
vector=query_embedding,
top_k=4,
include_metadata=True
)
results = []
for match in response['matches']:
metadata = match['metadata']
score = match['score']
results.append({
"source_sentence": metadata["source_sentence"],
"target_sentence": metadata["target_sentence"],
"score": score
})
return results
|