|
import os |
|
from dotenv import load_dotenv |
|
from pinecone import Pinecone |
|
from sentence_transformers import SentenceTransformer |
|
|
|
load_dotenv() |
|
|
|
|
|
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" |
|
model = SentenceTransformer(EMBEDDING_MODEL) |
|
|
|
|
|
|
|
def retrieve_similar_sentence(query_sentence, source_language, api_key): |
|
pc = Pinecone(api_key = api_key) |
|
index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index" |
|
index = pc.Index(index_name) |
|
|
|
query_embedding = model.encode(query_sentence).tolist() |
|
|
|
response = index.query( |
|
vector=query_embedding, |
|
top_k=4, |
|
include_metadata=True |
|
) |
|
|
|
results = [] |
|
for match in response['matches']: |
|
metadata = match['metadata'] |
|
score = match['score'] |
|
|
|
results.append({ |
|
"source_sentence": metadata["source_sentence"], |
|
"target_sentence": metadata["target_sentence"], |
|
"score": score |
|
}) |
|
|
|
return results |
|
|