Update app.py
Browse files
app.py
CHANGED
@@ -47,13 +47,6 @@ def load_data():
|
|
47 |
|
48 |
embeddings, patent_numbers, metadata, texts = load_data()
|
49 |
|
50 |
-
# Normalize embeddings for cosine similarity
|
51 |
-
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
|
52 |
-
|
53 |
-
# Create FAISS index for cosine similarity
|
54 |
-
index = faiss.IndexFlatIP(embeddings.shape[1])
|
55 |
-
index.add(embeddings)
|
56 |
-
|
57 |
# Load BERT model for encoding search queries
|
58 |
try:
|
59 |
bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
|
@@ -66,6 +59,20 @@ except Exception as e:
|
|
66 |
print("Falling back to a general-purpose model.")
|
67 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
# Create TF-IDF vectorizer
|
70 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
71 |
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
|
|
|
47 |
|
48 |
embeddings, patent_numbers, metadata, texts = load_data()
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Load BERT model for encoding search queries
|
51 |
try:
|
52 |
bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
|
|
|
59 |
print("Falling back to a general-purpose model.")
|
60 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
61 |
|
62 |
+
# Check if the embedding dimensions match
|
63 |
+
if embeddings.shape[1] != model.get_sentence_embedding_dimension():
|
64 |
+
print("Embedding dimensions do not match. Rebuilding FAISS index.")
|
65 |
+
# Rebuild embeddings using the new model
|
66 |
+
embeddings = np.array([model.encode(text) for text in texts])
|
67 |
+
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
|
68 |
+
|
69 |
+
# Normalize embeddings for cosine similarity
|
70 |
+
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
|
71 |
+
|
72 |
+
# Create FAISS index for cosine similarity
|
73 |
+
index = faiss.IndexFlatIP(embeddings.shape[1])
|
74 |
+
index.add(embeddings)
|
75 |
+
|
76 |
# Create TF-IDF vectorizer
|
77 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
78 |
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
|