bhlewis commited on
Commit
c498c82
·
verified ·
1 Parent(s): 74523b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -8,7 +8,7 @@ from sentence_transformers import SentenceTransformer
8
  def load_data():
9
  with h5py.File('patent_embeddings.h5', 'r') as f:
10
  embeddings = f['embeddings'][:]
11
- patent_numbers = f['patent_numbers'][:]
12
 
13
  metadata = {}
14
  with open('patent_metadata.jsonl', 'r') as f:
@@ -17,6 +17,8 @@ def load_data():
17
  metadata[data['patent_number']] = data
18
 
19
  print(f"Embedding shape: {embeddings.shape}")
 
 
20
  return embeddings, patent_numbers, metadata
21
 
22
  embeddings, patent_numbers, metadata = load_data()
@@ -56,6 +58,9 @@ def search(query, top_k=5):
56
  results = []
57
  for i, idx in enumerate(indices[0]):
58
  patent_number = patent_numbers[idx]
 
 
 
59
  patent_data = metadata[patent_number]
60
  result = f"Patent Number: {patent_number}\n"
61
  result += f"Abstract: {patent_data['abstract'][:200]}...\n"
 
8
  def load_data():
9
  with h5py.File('patent_embeddings.h5', 'r') as f:
10
  embeddings = f['embeddings'][:]
11
+ patent_numbers = [pn.decode('utf-8') for pn in f['patent_numbers'][:]]
12
 
13
  metadata = {}
14
  with open('patent_metadata.jsonl', 'r') as f:
 
17
  metadata[data['patent_number']] = data
18
 
19
  print(f"Embedding shape: {embeddings.shape}")
20
+ print(f"Number of patent numbers: {len(patent_numbers)}")
21
+ print(f"Number of metadata entries: {len(metadata)}")
22
  return embeddings, patent_numbers, metadata
23
 
24
  embeddings, patent_numbers, metadata = load_data()
 
58
  results = []
59
  for i, idx in enumerate(indices[0]):
60
  patent_number = patent_numbers[idx]
61
+ if patent_number not in metadata:
62
+ print(f"Warning: Patent number {patent_number} not found in metadata")
63
+ continue
64
  patent_data = metadata[patent_number]
65
  result = f"Patent Number: {patent_number}\n"
66
  result += f"Abstract: {patent_data['abstract'][:200]}...\n"