Update app.py
Browse files
app.py
CHANGED
@@ -10,13 +10,13 @@ import faiss
|
|
10 |
st.set_page_config(page_title='KRISSBERT UMLS Linker', layout='wide')
|
11 |
st.title('🧬 KRISSBERT + UMLS Entity Linker (Local FAISS)')
|
12 |
|
13 |
-
#
|
14 |
METADATA_PATH = 'umls_metadata.json'
|
15 |
EMBED_PATH = 'umls_embeddings.npy'
|
16 |
INDEX_PATH = 'umls_index.faiss'
|
17 |
MODEL_NAME = 'microsoft/BiomedNLP-KRISSBERT-PubMed-UMLS-EL'
|
18 |
|
19 |
-
# Load model & tokenizer
|
20 |
@st.cache_resource
|
21 |
def load_model():
|
22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
@@ -26,7 +26,7 @@ def load_model():
|
|
26 |
|
27 |
tokenizer, model = load_model()
|
28 |
|
29 |
-
# Load UMLS FAISS index
|
30 |
@st.cache_resource
|
31 |
def load_umls_index():
|
32 |
meta = json.load(open(METADATA_PATH, 'r'))
|
@@ -36,7 +36,7 @@ def load_umls_index():
|
|
36 |
|
37 |
faiss_index, umls_meta = load_umls_index()
|
38 |
|
39 |
-
# Embed text
|
40 |
@st.cache_resource
|
41 |
def embed_text(text, _tokenizer, _model):
|
42 |
inputs = _tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
@@ -45,7 +45,7 @@ def embed_text(text, _tokenizer, _model):
|
|
45 |
emb = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()
|
46 |
return emb / np.linalg.norm(emb)
|
47 |
|
48 |
-
# UI: examples
|
49 |
st.markdown('Enter a biomedical sentence to link entities via local UMLS FAISS index and KRISSBERT:')
|
50 |
examples = [
|
51 |
'The patient was administered metformin for type 2 diabetes.',
|
@@ -75,9 +75,9 @@ if st.button('🔗 Link Entities'):
|
|
75 |
if results:
|
76 |
st.success('Top UMLS candidates:')
|
77 |
for item in results:
|
78 |
-
st.markdown(
|
79 |
if item['definition']:
|
80 |
-
st.markdown(
|
81 |
-
st.markdown(
|
82 |
else:
|
83 |
-
st.info('No matches found in UMLS index.')
|
|
|
10 |
st.set_page_config(page_title='KRISSBERT UMLS Linker', layout='wide')
|
11 |
st.title('🧬 KRISSBERT + UMLS Entity Linker (Local FAISS)')
|
12 |
|
13 |
+
# Paths & model name
|
14 |
METADATA_PATH = 'umls_metadata.json'
|
15 |
EMBED_PATH = 'umls_embeddings.npy'
|
16 |
INDEX_PATH = 'umls_index.faiss'
|
17 |
MODEL_NAME = 'microsoft/BiomedNLP-KRISSBERT-PubMed-UMLS-EL'
|
18 |
|
19 |
+
# 1️⃣ Load model & tokenizer
|
20 |
@st.cache_resource
|
21 |
def load_model():
|
22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
26 |
|
27 |
tokenizer, model = load_model()
|
28 |
|
29 |
+
# 2️⃣ Load UMLS FAISS index & metadata
|
30 |
@st.cache_resource
|
31 |
def load_umls_index():
|
32 |
meta = json.load(open(METADATA_PATH, 'r'))
|
|
|
36 |
|
37 |
faiss_index, umls_meta = load_umls_index()
|
38 |
|
39 |
+
# 3️⃣ Embed text (prefix underscores to avoid caching errors)
|
40 |
@st.cache_resource
|
41 |
def embed_text(text, _tokenizer, _model):
|
42 |
inputs = _tokenizer(text, return_tensors='pt', truncation=True, padding=True)
|
|
|
45 |
emb = outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()
|
46 |
return emb / np.linalg.norm(emb)
|
47 |
|
48 |
+
# UI: examples + input
|
49 |
st.markdown('Enter a biomedical sentence to link entities via local UMLS FAISS index and KRISSBERT:')
|
50 |
examples = [
|
51 |
'The patient was administered metformin for type 2 diabetes.',
|
|
|
75 |
if results:
|
76 |
st.success('Top UMLS candidates:')
|
77 |
for item in results:
|
78 |
+
st.markdown(f"**{item['name']}** (CUI: `{item['cui']}`)")
|
79 |
if item['definition']:
|
80 |
+
st.markdown(f"> {item['definition']}\n")
|
81 |
+
st.markdown(f"_Source: {item['source']}_\n---")
|
82 |
else:
|
83 |
+
st.info('No matches found in UMLS index.')
|