File size: 4,987 Bytes
1dc581a
999a2cb
1dc581a
999a2cb
1dc581a
999a2cb
 
1dc581a
999a2cb
 
1dc581a
999a2cb
 
 
 
1dc581a
999a2cb
 
 
 
 
 
 
 
 
 
 
 
1dc581a
999a2cb
 
 
 
 
 
 
 
 
 
1dc581a
 
999a2cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc581a
999a2cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dc581a
999a2cb
 
 
 
 
 
 
 
1dc581a
999a2cb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset

# Load the subset dataset from Hugging Face Hub
subset_dataset = load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True)

# Load the GLiNER model
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)

# Define the NER function
def ner(text: str, labels: str, threshold: float, nested_ner: bool):
    labels = [label.strip() for label in labels.split(",")]
    entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
    
    # Filter for "textile machinery" entities
    textile_entities = [
        {
            "entity": ent["label"],
            "word": ent["text"],
            "start": ent["start"],
            "end": ent["end"],
            "score": ent.get("score", 0),
        }
        for ent in entities
        if ent["label"].lower() == "textile machinery"
    ]
    
    # Highlight entities with HTML
    highlighted_text = text
    for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
        highlighted_text = (
            highlighted_text[:ent['start']] +
            f"<span style='background-color: yellow'>{highlighted_text[ent['start']:ent['end']]}</span>" +
            highlighted_text[ent['end']:]
        )
    
    return gr.HTML(highlighted_text), textile_entities

# Build Gradio interface
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
    gr.Markdown(
        """
        # Textile Machinery Entity Recognition Demo
        This demo selects a random text snippet from the British Library's books dataset and identifies "textile machinery" entities using a fine-tuned GLiNER model.
        """
    )
    
    with gr.Accordion("How to run this model locally", open=False):
        gr.Markdown(
            """
            ## Installation
            To use this model, you must install the GLiNER Python library:
            ```
            !pip install gliner
            ```
         
            ## Usage
            Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`.
            """
        )
        gr.Code(
            '''
from gliner import GLiNER
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct")
text = "Your sample text here."
labels = ["textile machinery"]
entities = model.predict_entities(text, labels)
for entity in entities:
    print(entity["text"], "=>", entity["label"])
            ''',
            language="python",
        )
        gr.Code(
            """
Textile Machine 1 => textile machinery
Textile Machine 2 => textile machinery
            """
        )
    
    input_text = gr.Textbox(
        value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
        label="Text input",
        placeholder="Enter your text here",
        lines=5
    )
    
    with gr.Row():
        labels = gr.Textbox(
            value="textile machinery",
            label="Labels",
            placeholder="Enter your labels here (comma separated)",
            scale=2,
        )
        threshold = gr.Slider(
            0,
            1,
            value=0.3,
            step=0.01,
            label="Threshold",
            info="Lower the threshold to increase how many entities get predicted.",
            scale=1,
        )
        nested_ner = gr.Checkbox(
            value=False,
            label="Nested NER",
            info="Allow for nested NER?",
            scale=0,
        )
    
    output = gr.HighlightedText(label="Predicted Entities")
    
    submit_btn = gr.Button("Analyze Random Snippet")
    refresh_btn = gr.Button("Get New Snippet")
    
    # Function to fetch a new random snippet
    def get_new_snippet():
        # WARNING: Streaming datasets may have performance implications
        try:
            sample = next(iter(subset_dataset))['text']
            return sample
        except StopIteration:
            return "No more snippets available."
    
    refresh_btn.click(fn=get_new_snippet, outputs=input_text)
    
    submit_btn.click(
        fn=ner,
        inputs=[input_text, labels, threshold, nested_ner],
        outputs=[output, gr.JSON(label="Entities")]
    )
    
    examples = [
        [
            "However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
            "textile machinery",
            0.3,
            False,
        ],
        # Add more examples as needed
    ]
    
    gr.Examples(
        examples=examples,
        inputs=[input_text, labels, threshold, nested_ner],
        outputs=[output, gr.JSON(label="Entities")],
        fn=ner,
        label="Examples",
        cache_examples=True,
    )

demo.queue()
demo.launch(debug=True)