Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from transformers import pipeline
|
|
|
4 |
|
5 |
# Load chatbot menggunakan InferenceClient
|
6 |
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
|
@@ -8,16 +9,20 @@ client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
|
|
8 |
# Load model Named Entity Recognition (NER)
|
9 |
ner_pipeline = pipeline("ner", model="d4data/biomedical-ner-all")
|
10 |
|
|
|
|
|
|
|
11 |
# Fungsi untuk ekstraksi entitas medis dari teks
|
12 |
def extract_entities(text):
|
13 |
entities = ner_pipeline(text)
|
14 |
-
|
15 |
merged_entities = []
|
16 |
current_word = ""
|
17 |
current_entity = None
|
18 |
-
|
19 |
for ent in entities:
|
20 |
word = ent["word"]
|
|
|
21 |
|
22 |
# Gabungkan token subword dengan kata sebelumnya jika diawali "##"
|
23 |
if word.startswith("#"):
|
@@ -27,16 +32,17 @@ def extract_entities(text):
|
|
27 |
merged_entities.append({"word": current_word, "entity": current_entity}) # Simpan kata sebelumnya
|
28 |
|
29 |
current_word = word # Mulai kata baru
|
30 |
-
current_entity =
|
31 |
-
|
32 |
if current_word and current_entity: # Tambahkan kata terakhir yang sudah digabung
|
33 |
merged_entities.append({"word": current_word, "entity": current_entity})
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
# Fungsi untuk highlight teks dan menampilkan daftar entitas yang dikenali
|
39 |
-
import re
|
40 |
def highlight_text(text, entities):
|
41 |
entities = sorted(entities, key=lambda x: text.lower().find(x["word"].lower()), reverse=True) # Urutkan dari belakang
|
42 |
|
@@ -59,8 +65,6 @@ def highlight_text(text, entities):
|
|
59 |
|
60 |
return text + "<br><br>" + entity_list
|
61 |
|
62 |
-
|
63 |
-
|
64 |
# Fungsi chatbot dengan NER
|
65 |
def chat_with_ner(message, history):
|
66 |
entities = extract_entities(message)
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
from transformers import pipeline
|
4 |
+
import re
|
5 |
|
6 |
# Load chatbot menggunakan InferenceClient
|
7 |
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
9 |
# Load model Named Entity Recognition (NER)
|
10 |
ner_pipeline = pipeline("ner", model="d4data/biomedical-ner-all")
|
11 |
|
12 |
+
# Entitas yang dianggap penting
|
13 |
+
important_entities = {"Disease_disorder", "Sign_symptom", "Diagnostic_procedure", "Therapeutic_procedure", "Medication", "Dosage"}
|
14 |
+
|
15 |
# Fungsi untuk ekstraksi entitas medis dari teks
|
16 |
def extract_entities(text):
|
17 |
entities = ner_pipeline(text)
|
18 |
+
|
19 |
merged_entities = []
|
20 |
current_word = ""
|
21 |
current_entity = None
|
22 |
+
|
23 |
for ent in entities:
|
24 |
word = ent["word"]
|
25 |
+
entity_type = ent["entity"].split("-")[-1] # Ambil tipe entitas tanpa B- atau I-
|
26 |
|
27 |
# Gabungkan token subword dengan kata sebelumnya jika diawali "##"
|
28 |
if word.startswith("#"):
|
|
|
32 |
merged_entities.append({"word": current_word, "entity": current_entity}) # Simpan kata sebelumnya
|
33 |
|
34 |
current_word = word # Mulai kata baru
|
35 |
+
current_entity = entity_type # Simpan tipe entity
|
36 |
+
|
37 |
if current_word and current_entity: # Tambahkan kata terakhir yang sudah digabung
|
38 |
merged_entities.append({"word": current_word, "entity": current_entity})
|
39 |
+
|
40 |
+
# Filter hanya entitas yang relevan
|
41 |
+
filtered_entities = [ent for ent in merged_entities if ent["entity"] in important_entities]
|
42 |
+
|
43 |
+
return filtered_entities
|
44 |
|
45 |
# Fungsi untuk highlight teks dan menampilkan daftar entitas yang dikenali
|
|
|
46 |
def highlight_text(text, entities):
|
47 |
entities = sorted(entities, key=lambda x: text.lower().find(x["word"].lower()), reverse=True) # Urutkan dari belakang
|
48 |
|
|
|
65 |
|
66 |
return text + "<br><br>" + entity_list
|
67 |
|
|
|
|
|
68 |
# Fungsi chatbot dengan NER
|
69 |
def chat_with_ner(message, history):
|
70 |
entities = extract_entities(message)
|