File size: 1,052 Bytes
e9fa8d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from dotenv import load_dotenv
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer

load_dotenv()

# === LOAD GNRL DATA ===
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(EMBEDDING_MODEL)


# === RETRIEVAL FUNCTION FROM PINECONE ===
def retrieve_similar_sentence(query_sentence, source_language, api_key):
    pc = Pinecone(api_key = api_key)
    index_name = "spa-quz-translation-index" if source_language == "es" else "quz-spa-translation-index"
    index = pc.Index(index_name)

    query_embedding = model.encode(query_sentence).tolist()

    response = index.query(
        vector=query_embedding,
        top_k=4,
        include_metadata=True
    )

    results = []
    for match in response['matches']:
      metadata = match['metadata']
      score = match['score']

      results.append({
          "source_sentence": metadata["source_sentence"],
          "target_sentence": metadata["target_sentence"],
          "score": score
      })

    return results