Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import numpy as np
|
|
3 |
import h5py
|
4 |
import faiss
|
5 |
import json
|
6 |
-
from
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import re
|
@@ -54,7 +55,16 @@ index = faiss.IndexFlatIP(embeddings.shape[1])
|
|
54 |
index.add(embeddings)
|
55 |
|
56 |
# Load BERT model for encoding search queries
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Create TF-IDF vectorizer
|
60 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
|
|
3 |
import h5py
|
4 |
import faiss
|
5 |
import json
|
6 |
+
from transformers import AutoModel, AutoTokenizer
|
7 |
+
from sentence_transformers import SentenceTransformer, models
|
8 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
import re
|
|
|
55 |
index.add(embeddings)
|
56 |
|
57 |
# Load BERT model for encoding search queries
|
58 |
+
try:
|
59 |
+
bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
|
60 |
+
tokenizer = AutoTokenizer.from_pretrained('anferico/bert-for-patents')
|
61 |
+
word_embedding_model = models.Transformer(bert_model, tokenizer)
|
62 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
|
63 |
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Error loading anferico/bert-for-patents: {e}")
|
66 |
+
print("Falling back to a general-purpose model.")
|
67 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
68 |
|
69 |
# Create TF-IDF vectorizer
|
70 |
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|