bhlewis commited on
Commit
113456b
1 Parent(s): 072fc9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -3,7 +3,8 @@ import numpy as np
3
  import h5py
4
  import faiss
5
  import json
6
- from sentence_transformers import SentenceTransformer
 
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  import re
@@ -54,7 +55,16 @@ index = faiss.IndexFlatIP(embeddings.shape[1])
54
  index.add(embeddings)
55
 
56
  # Load BERT model for encoding search queries
57
- model = SentenceTransformer('anferico/bert-for-patents')
 
 
 
 
 
 
 
 
 
58
 
59
  # Create TF-IDF vectorizer
60
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')
 
3
  import h5py
4
  import faiss
5
  import json
6
+ from transformers import AutoModel, AutoTokenizer
7
+ from sentence_transformers import SentenceTransformer, models
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import re
 
55
  index.add(embeddings)
56
 
57
  # Load BERT model for encoding search queries
58
+ try:
59
+ bert_model = AutoModel.from_pretrained('anferico/bert-for-patents')
60
+ tokenizer = AutoTokenizer.from_pretrained('anferico/bert-for-patents')
61
+ word_embedding_model = models.Transformer(bert_model, tokenizer)
62
+ pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
63
+ model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
64
+ except Exception as e:
65
+ print(f"Error loading anferico/bert-for-patents: {e}")
66
+ print("Falling back to a general-purpose model.")
67
+ model = SentenceTransformer('all-MiniLM-L6-v2')
68
 
69
  # Create TF-IDF vectorizer
70
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')