Spaces:
Sleeping
Sleeping
File size: 3,290 Bytes
5ecde30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import faiss
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import time
# Start the timer
start_time = time.time()
# Load the FAISS index
index_path = "embeddings/multilingual-e5-small_vector_db.index"
#index_path = "embeddings/all-MiniLM-L6-v2_vector_db.index"
try:
index = faiss.read_index(index_path)
print(f"FAISS index loaded successfully from {index_path} - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error loading FAISS index: {e} - Time passed: {time.time() - start_time:.2f} seconds")
# Load the model
try:
model = SentenceTransformer('intfloat/multilingual-e5-small', local_files_only=True)
# model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print(f"Model loaded successfully - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error loading model: {e} - Time passed: {time.time() - start_time:.2f} seconds")
# Example new text
new_text = ["Cat am de plata"]
print(f'The text is: {new_text} - Time passed: {time.time() - start_time:.2f} seconds')
# Generate embeddings for the new text
try:
new_embeddings = model.encode(new_text)
print(f"Generated embeddings for new text: - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error generating embeddings: {e} - Time passed: {time.time() - start_time:.2f} seconds")
# Convert new embeddings to float32
try:
new_embeddings = np.array(new_embeddings).astype('float32')
print(f"Converted new embeddings to float32: - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error converting embeddings to float32: {e} - Time passed: {time.time() - start_time:.2f} seconds")
# Perform similarity search
try:
k = 5 # Number of nearest neighbors to retrieve
D, I = index.search(new_embeddings, k) # D: distances, I: indices
print(f"Similarity search results: Indices - {I}, Distances - {D} - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error performing similarity search: {e} - Time passed: {time.time() - start_time:.2f} seconds")
# Load the CSV file
csv_file_path = r'C:\Users\serban.tica\Documents\tobi_llm_intent_recognition\data\Pager_Intents_Cleaned.csv'
try:
data = pd.read_csv(csv_file_path)
print(f"CSV file loaded successfully from {csv_file_path}")
except Exception as e:
print(f"Error loading CSV file: {e}")
# Retrieve the corresponding rows from the DataFrame
'''t# Retrieve the corresponding rows from the DataFrame
try:
for i, query in enumerate(new_text):
print(f"Query: {query} - Time passed: {time.time() - start_time:.2f} seconds")
for idx in I[i]:
print(f"Index: {idx}, Row: {df.iloc[idx]} - Time passed: {time.time() - start_time:.2f} seconds")
except Exception as e:
print(f"Error retrieving rows from DataFrame: {e} - Time passed: {time.time() - start_time:.2f} seconds")'''
intents = data['intent'].tolist()
intent = intents[I[0][0]]
distance = D[0][0]
similarity = 1 / (1 + distance)
print(f"Intenția identificată: {intent}")
print(f"Nivel de încredere: {similarity:.4f}- Time passed: {time.time() - start_time:.2f} seconds") |