Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,8 @@ import numpy as np
|
|
3 |
import h5py
|
4 |
import faiss
|
5 |
import json
|
6 |
-
from
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import re
|
@@ -48,7 +49,11 @@ embeddings, patent_numbers, metadata, texts = load_data()
|
|
48 |
|
49 |
# Load BERT model for encoding search queries
|
50 |
try:
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
print(f"Error loading anferico/bert-for-patents: {e}")
|
54 |
print("Falling back to a general-purpose model.")
|
|
|
3 |
import h5py
|
4 |
import faiss
|
5 |
import json
|
6 |
+
from transformers import AutoTokenizer, AutoModel
|
7 |
+
from sentence_transformers import SentenceTransformer, models
|
8 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
import re
|
|
|
49 |
|
50 |
# Load BERT model for encoding search queries
|
51 |
try:
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained('anferico/bert-for-patents')
|
53 |
+
bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
|
54 |
+
word_embedding_model = models.Transformer(model_name='anferico/bert-for-patents', tokenizer=tokenizer, model=bert_model)
|
55 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
|
56 |
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
57 |
except Exception as e:
|
58 |
print(f"Error loading anferico/bert-for-patents: {e}")
|
59 |
print("Falling back to a general-purpose model.")
|