|
"""gr.HighlightedText() component.""" |
|
|
|
from __future__ import annotations |
|
|
|
from collections.abc import Callable, Sequence |
|
from typing import TYPE_CHECKING, Any, Union |
|
|
|
from gradio_client.documentation import document |
|
|
|
from gradio.components.base import Component |
|
from gradio.data_classes import GradioModel, GradioRootModel |
|
from gradio.events import Events |
|
|
|
if TYPE_CHECKING: |
|
from gradio.components import Timer |
|
|
|
|
|
class HighlightedToken(GradioModel): |
|
token: str |
|
class_or_confidence: Union[str, float, None] = None |
|
|
|
|
|
class HighlightedTextData(GradioRootModel): |
|
root: list[HighlightedToken] |
|
|
|
|
|
@document() |
|
class HighlightedText(Component): |
|
""" |
|
Displays text that contains spans that are highlighted by category or numerical value. |
|
|
|
Demos: diff_texts |
|
Guides: named-entity-recognition |
|
""" |
|
|
|
data_model = HighlightedTextData |
|
EVENTS = [Events.change, Events.select] |
|
|
|
def __init__( |
|
self, |
|
value: list[tuple[str, str | float | None]] | dict | Callable | None = None, |
|
*, |
|
color_map: dict[str, str] |
|
| None = None, |
|
show_legend: bool = False, |
|
show_inline_category: bool = True, |
|
combine_adjacent: bool = False, |
|
adjacent_separator: str = "", |
|
label: str | None = None, |
|
every: Timer | float | None = None, |
|
inputs: Component | Sequence[Component] | set[Component] | None = None, |
|
show_label: bool | None = None, |
|
container: bool = True, |
|
scale: int | None = None, |
|
min_width: int = 160, |
|
visible: bool = True, |
|
elem_id: str | None = None, |
|
elem_classes: list[str] | str | None = None, |
|
render: bool = True, |
|
key: int | str | None = None, |
|
interactive: bool | None = None, |
|
): |
|
""" |
|
Parameters: |
|
value: Default value to show. If callable, the function will be called whenever the app loads to set the initial value of the component. |
|
color_map: A dictionary mapping labels to colors. The colors may be specified as hex codes or by their names. For example: {"person": "red", "location": "#FFEE22"} |
|
show_legend: whether to show span categories in a separate legend or inline. |
|
show_inline_category: If False, will not display span category label. Only applies if show_legend=False and interactive=False. |
|
combine_adjacent: If True, will merge the labels of adjacent tokens belonging to the same category. |
|
adjacent_separator: Specifies the separator to be used between tokens if combine_adjacent is True. |
|
label: the label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to. |
|
every: Continously calls `value` to recalculate it if `value` is a function (has no effect otherwise). Can provide a Timer whose tick resets `value`, or a float that provides the regular interval for the reset Timer. |
|
inputs: Components that are used as inputs to calculate `value` if `value` is a function (has no effect otherwise). `value` is recalculated any time the inputs change. |
|
show_label: if True, will display label. |
|
container: If True, will place the component in a container - providing some extra padding around the border. |
|
scale: relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True. |
|
min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first. |
|
visible: If False, component will be hidden. |
|
elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles. |
|
elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles. |
|
render: If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later. |
|
key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved. |
|
interactive: If True, the component will be editable, and allow user to select spans of text and label them. |
|
""" |
|
self.color_map = color_map |
|
self.show_legend = show_legend |
|
self.show_inline_category = show_inline_category |
|
self.combine_adjacent = combine_adjacent |
|
self.adjacent_separator = adjacent_separator |
|
super().__init__( |
|
label=label, |
|
every=every, |
|
inputs=inputs, |
|
show_label=show_label, |
|
container=container, |
|
scale=scale, |
|
min_width=min_width, |
|
visible=visible, |
|
elem_id=elem_id, |
|
elem_classes=elem_classes, |
|
render=render, |
|
key=key, |
|
value=value, |
|
interactive=interactive, |
|
) |
|
|
|
def example_payload(self) -> Any: |
|
return [ |
|
{"token": "The", "class_or_confidence": None}, |
|
{"token": "quick", "class_or_confidence": "adj"}, |
|
] |
|
|
|
def example_value(self) -> Any: |
|
return [("The", None), ("quick", "adj"), ("brown", "adj"), ("fox", "noun")] |
|
|
|
def preprocess( |
|
self, payload: HighlightedTextData | None |
|
) -> list[tuple[str, str | float | None]] | None: |
|
""" |
|
Parameters: |
|
payload: An instance of HighlightedTextData |
|
Returns: |
|
Passes the value as a list of tuples as a `list[tuple]` into the function. Each `tuple` consists of a `str` substring of the text (so the entire text is included) and `str | float | None` label, which is the category or confidence of that substring. |
|
""" |
|
if payload is None: |
|
return None |
|
return payload.model_dump() |
|
|
|
def postprocess( |
|
self, value: list[tuple[str, str | float | None]] | dict | None |
|
) -> HighlightedTextData | None: |
|
""" |
|
Parameters: |
|
value: Expects a list of (word, category) tuples, or a dictionary of two keys: "text", and "entities", which itself is a list of dictionaries, each of which have the keys: "entity" (or "entity_group"), "start", and "end" |
|
Returns: |
|
An instance of HighlightedTextData |
|
""" |
|
if value is None: |
|
return None |
|
if isinstance(value, dict): |
|
try: |
|
text = value["text"] |
|
entities = value["entities"] |
|
except KeyError as ke: |
|
raise ValueError( |
|
"Expected a dictionary with keys 'text' and 'entities' " |
|
"for the value of the HighlightedText component." |
|
) from ke |
|
if len(entities) == 0: |
|
value = [(text, None)] |
|
else: |
|
list_format = [] |
|
index = 0 |
|
entities = sorted(entities, key=lambda x: x["start"]) |
|
for entity in entities: |
|
list_format.append((text[index : entity["start"]], None)) |
|
entity_category = entity.get("entity") or entity.get("entity_group") |
|
list_format.append( |
|
(text[entity["start"] : entity["end"]], entity_category) |
|
) |
|
index = entity["end"] |
|
list_format.append((text[index:], None)) |
|
value = list_format |
|
if self.combine_adjacent: |
|
output = [] |
|
running_text, running_category = None, None |
|
for text, category in value: |
|
if running_text is None: |
|
running_text = text |
|
running_category = category |
|
elif category == running_category: |
|
running_text += self.adjacent_separator + text |
|
elif not text: |
|
|
|
|
|
pass |
|
else: |
|
output.append((running_text, running_category)) |
|
running_text = text |
|
running_category = category |
|
if running_text is not None: |
|
output.append((running_text, running_category)) |
|
return HighlightedTextData( |
|
root=[ |
|
HighlightedToken(token=o[0], class_or_confidence=o[1]) |
|
for o in output |
|
] |
|
) |
|
else: |
|
return HighlightedTextData( |
|
root=[ |
|
HighlightedToken(token=o[0], class_or_confidence=o[1]) |
|
for o in value |
|
] |
|
) |
|
|