Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#The libraries used
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
from transformers import pipeline
|
5 |
+
|
6 |
+
|
7 |
+
#Implementing the Hugging Face NER model
|
8 |
+
|
9 |
+
ner = pipeline('ner', model = 'FacebookAI/xlm-roberta-large-finetuned-conll03-english', grouped_entities = True)
|
10 |
+
|
11 |
+
#Conveting the NER output into a DataFrame:
|
12 |
+
|
13 |
+
def entities_to_df(text):
|
14 |
+
all_entities = []
|
15 |
+
#the NER model will be used on the input text
|
16 |
+
entities = ner(text)
|
17 |
+
|
18 |
+
for entity in entities:
|
19 |
+
all_entities.append({
|
20 |
+
"Entity": entity['word'],
|
21 |
+
"Type" : entity['entity_group'],
|
22 |
+
"Score": float((entity['score'])),
|
23 |
+
"Start": entity['start'],
|
24 |
+
"End": entity['end'],
|
25 |
+
"Sentence": text,
|
26 |
+
})
|
27 |
+
|
28 |
+
df = pd.DataFrame(all_entities)
|
29 |
+
|
30 |
+
#the df in the output did not round the score above so I rounded it after creating the df
|
31 |
+
df['Score'] = df['Score'].round(4)
|
32 |
+
|
33 |
+
return df
|
34 |
+
|
35 |
+
|
36 |
+
#a function to highlight the entitties of the Dataframe using HTML
|
37 |
+
def highlight_entities(text):
|
38 |
+
df = entities_to_df(text)
|
39 |
+
highlighted_text = ""
|
40 |
+
last_idx = 0
|
41 |
+
|
42 |
+
# Iterating between the entities in the DF in order
|
43 |
+
for i, entity in df.iterrows(): #iterrows is a function in the df to iterate by rows
|
44 |
+
# Add the text before the entity
|
45 |
+
highlighted_text += text[last_idx:entity['Start']]
|
46 |
+
|
47 |
+
#highlighting the entities in RED by using HTML div and css and thiers types(per, org,loc or misc)
|
48 |
+
|
49 |
+
highlighted_text += f"<div style='background-color: red; display: inline;'>{entity['Entity']} ({entity['Type']})</div>"
|
50 |
+
|
51 |
+
#updating the index after the current entity
|
52 |
+
last_idx = entity['End']
|
53 |
+
|
54 |
+
# add the text after the last entity
|
55 |
+
highlighted_text += text[last_idx:]
|
56 |
+
|
57 |
+
# again we will use an HTML div to make the output looks better :)
|
58 |
+
return f"<div>{highlighted_text}</div>"
|
59 |
+
|
60 |
+
# The last function which will combine the two previous functions and will be used in the interface
|
61 |
+
def NER_output(text):
|
62 |
+
html = highlight_entities(text)
|
63 |
+
df = entities_to_df(text)
|
64 |
+
return html,df
|
65 |
+
|
66 |
+
#a defualt value that will be used in the gradio interface input
|
67 |
+
default_value ="J.K. Rowling wrote the Harry Potter series, which was published by Bloomsbury Publishing."
|
68 |
+
|
69 |
+
# Gradio Interface
|
70 |
+
demo = gr.Interface(
|
71 |
+
fn=NER_output,
|
72 |
+
inputs=gr.Textbox(label="Enter text:", lines=6, value = default_value),
|
73 |
+
outputs=[gr.HTML(label="Entity Visualization"), gr.Dataframe(label="Entities in DataFrame format"),]
|
74 |
+
#above, we used the NER_output, and since that function return the html and the df there will be two outputs
|
75 |
+
#The first is gr.HTML and the second gr.Datagrame
|
76 |
+
)
|
77 |
+
|
78 |
+
|
79 |
+
demo.launch()
|