Spaces:
Runtime error
Runtime error
Commit
·
3cf2a36
1
Parent(s):
58ef0b0
Update app.py
Browse files
app.py
CHANGED
@@ -5,74 +5,75 @@ from transformers import pipeline
|
|
5 |
model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
|
6 |
basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
|
7 |
|
8 |
-
|
9 |
-
ner_results2 = basemodel(text)
|
10 |
-
|
11 |
-
|
12 |
-
# MendoBERT
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
if result["word"].startswith("##"):
|
19 |
-
formatted_results[-1]["end"] = end
|
20 |
-
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
21 |
-
else:
|
22 |
-
formatted_results.append({
|
23 |
-
'start': result["start"],
|
24 |
-
'end': end,
|
25 |
-
'entity': result["entity"],
|
26 |
-
'index': result["index"],
|
27 |
-
'score': result["score"],
|
28 |
-
'word': result["word"]})
|
29 |
|
30 |
-
for result in formatted_results:
|
31 |
-
if result["entity"].startswith("LABEL_0"):
|
32 |
-
result["entity"] = "O"
|
33 |
-
elif result["entity"].startswith("LABEL_1"):
|
34 |
-
result["entity"] = "B"
|
35 |
-
elif result["entity"].startswith("LABEL_2"):
|
36 |
-
result["entity"] = "I"
|
37 |
-
|
38 |
-
mendo =[]
|
39 |
-
for result in formatted_results:
|
40 |
-
if not result["entity"].startswith("O"):
|
41 |
-
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
42 |
-
|
43 |
-
# Base Model
|
44 |
-
|
45 |
-
formatted_results = []
|
46 |
-
for result in ner_results2:
|
47 |
-
end = result["start"]+len(result["word"].replace("##", ""))
|
48 |
-
|
49 |
-
if result["word"].startswith("##"):
|
50 |
-
formatted_results[-1]["end"] = end
|
51 |
-
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
52 |
-
else:
|
53 |
-
formatted_results.append({
|
54 |
-
'start': result["start"],
|
55 |
-
'end': end,
|
56 |
-
'entity': result["entity"],
|
57 |
-
'index': result["index"],
|
58 |
-
'score': result["score"],
|
59 |
-
'word': result["word"]})
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
if text:
|
77 |
st.json(base)
|
78 |
-
st.json(mendo)
|
|
|
|
5 |
model = pipeline("ner", model="/ner-app/mendobert/", tokenizer="indolem/indobert-base-uncased")
|
6 |
basemodel = pipeline("ner", model="/ner-app/base-model/", tokenizer="indolem/indobert-base-uncased")
|
7 |
|
8 |
+
text = st.text_area('enter some text: ')
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
if text:
|
11 |
+
ner_results = model(text)
|
12 |
+
ner_results2 = basemodel(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
# MendoBERT
|
16 |
+
|
17 |
+
formatted_results = []
|
18 |
+
for result in ner_results:
|
19 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
20 |
+
|
21 |
+
if result["word"].startswith("##"):
|
22 |
+
formatted_results[-1]["end"] = end
|
23 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
24 |
+
else:
|
25 |
+
formatted_results.append({
|
26 |
+
'start': result["start"],
|
27 |
+
'end': end,
|
28 |
+
'entity': result["entity"],
|
29 |
+
'index': result["index"],
|
30 |
+
'score': result["score"],
|
31 |
+
'word': result["word"]})
|
32 |
+
|
33 |
+
for result in formatted_results:
|
34 |
+
if result["entity"].startswith("LABEL_0"):
|
35 |
+
result["entity"] = "O"
|
36 |
+
elif result["entity"].startswith("LABEL_1"):
|
37 |
+
result["entity"] = "B"
|
38 |
+
elif result["entity"].startswith("LABEL_2"):
|
39 |
+
result["entity"] = "I"
|
40 |
+
|
41 |
+
mendo =[]
|
42 |
+
for result in formatted_results:
|
43 |
+
if not result["entity"].startswith("O"):
|
44 |
+
mendo.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
45 |
+
|
46 |
+
# Base Model
|
47 |
+
|
48 |
+
formatted_results = []
|
49 |
+
for result in ner_results2:
|
50 |
+
end = result["start"]+len(result["word"].replace("##", ""))
|
51 |
+
|
52 |
+
if result["word"].startswith("##"):
|
53 |
+
formatted_results[-1]["end"] = end
|
54 |
+
formatted_results[-1]["word"]+= result["word"].replace("##", "")
|
55 |
+
else:
|
56 |
+
formatted_results.append({
|
57 |
+
'start': result["start"],
|
58 |
+
'end': end,
|
59 |
+
'entity': result["entity"],
|
60 |
+
'index': result["index"],
|
61 |
+
'score': result["score"],
|
62 |
+
'word': result["word"]})
|
63 |
+
|
64 |
+
for result in formatted_results:
|
65 |
+
if result["entity"].startswith("LABEL_0"):
|
66 |
+
result["entity"] = "O"
|
67 |
+
elif result["entity"].startswith("LABEL_1"):
|
68 |
+
result["entity"] = "B"
|
69 |
+
elif result["entity"].startswith("LABEL_2"):
|
70 |
+
result["entity"] = "I"
|
71 |
+
|
72 |
+
base=[]
|
73 |
+
for result in formatted_results:
|
74 |
+
if not result["entity"].startswith("O"):
|
75 |
+
base.append(f"""Entity: {result["entity"]}, Start:{result["start"]}, End:{result["end"]}, word:{text[result["start"]:result["end"]]}""")
|
76 |
|
|
|
77 |
st.json(base)
|
78 |
+
st.json(mendo)
|
79 |
+
|