Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from sentence_transformers import SentenceTransformer
|
|
8 |
def load_data():
|
9 |
with h5py.File('patent_embeddings.h5', 'r') as f:
|
10 |
embeddings = f['embeddings'][:]
|
11 |
-
patent_numbers = f['patent_numbers'][:]
|
12 |
|
13 |
metadata = {}
|
14 |
with open('patent_metadata.jsonl', 'r') as f:
|
@@ -17,6 +17,8 @@ def load_data():
|
|
17 |
metadata[data['patent_number']] = data
|
18 |
|
19 |
print(f"Embedding shape: {embeddings.shape}")
|
|
|
|
|
20 |
return embeddings, patent_numbers, metadata
|
21 |
|
22 |
embeddings, patent_numbers, metadata = load_data()
|
@@ -56,6 +58,9 @@ def search(query, top_k=5):
|
|
56 |
results = []
|
57 |
for i, idx in enumerate(indices[0]):
|
58 |
patent_number = patent_numbers[idx]
|
|
|
|
|
|
|
59 |
patent_data = metadata[patent_number]
|
60 |
result = f"Patent Number: {patent_number}\n"
|
61 |
result += f"Abstract: {patent_data['abstract'][:200]}...\n"
|
|
|
8 |
def load_data():
|
9 |
with h5py.File('patent_embeddings.h5', 'r') as f:
|
10 |
embeddings = f['embeddings'][:]
|
11 |
+
patent_numbers = [pn.decode('utf-8') for pn in f['patent_numbers'][:]]
|
12 |
|
13 |
metadata = {}
|
14 |
with open('patent_metadata.jsonl', 'r') as f:
|
|
|
17 |
metadata[data['patent_number']] = data
|
18 |
|
19 |
print(f"Embedding shape: {embeddings.shape}")
|
20 |
+
print(f"Number of patent numbers: {len(patent_numbers)}")
|
21 |
+
print(f"Number of metadata entries: {len(metadata)}")
|
22 |
return embeddings, patent_numbers, metadata
|
23 |
|
24 |
embeddings, patent_numbers, metadata = load_data()
|
|
|
58 |
results = []
|
59 |
for i, idx in enumerate(indices[0]):
|
60 |
patent_number = patent_numbers[idx]
|
61 |
+
if patent_number not in metadata:
|
62 |
+
print(f"Warning: Patent number {patent_number} not found in metadata")
|
63 |
+
continue
|
64 |
patent_data = metadata[patent_number]
|
65 |
result = f"Patent Number: {patent_number}\n"
|
66 |
result += f"Abstract: {patent_data['abstract'][:200]}...\n"
|