gsarti commited on
Commit
55479e9
·
1 Parent(s): af91a42

Add list <-> tagged text utilities to HTB class

Browse files
src/backend/gradio_highlightedtextbox/highlightedtextbox.py CHANGED
@@ -1,14 +1,14 @@
1
  from __future__ import annotations
2
 
3
- from typing import Any, Callable, List, Tuple
4
 
5
- from gradio.data_classes import GradioRootModel
6
  from gradio.components.base import FormComponent
 
7
  from gradio.events import Events
8
 
9
 
10
  class HighlightedTextData(GradioRootModel):
11
- root: List[Tuple[str, str | None]]
12
 
13
 
14
  class HighlightedTextbox(FormComponent):
@@ -16,12 +16,12 @@ class HighlightedTextbox(FormComponent):
16
  Creates a textarea for user to enter string input or display string output where some
17
  elements are highlighted.
18
  Preprocessing: passes a list of tuples as a {List[Tuple[str, float | str | None]]]} into the function. If no labels are provided, the text will be displayed as a single span.
19
- Postprocessing: expects a {List[Tuple[str, float | str]]]} consisting of spans of text and their associated labels, or a {Dict} with two keys:
20
- (1) "text" whose value is the complete text, and
21
- (2) "highlights", which is a list of dictionaries, each of which have the keys:
22
- "highlight_type" (consisting of the highlight label),
23
- "start" (the character index where the label starts), and
24
- "end" (the character index where the label ends).
25
  Highlights should not overlap.
26
  """
27
 
@@ -114,15 +114,15 @@ class HighlightedTextbox(FormComponent):
114
  every=every,
115
  )
116
 
117
- def preprocess(self, payload: str | None) -> str | None:
118
- return None if payload is None else str(payload)
119
 
120
  def postprocess(
121
- self, y: HighlightedTextData | dict | None
122
- ) -> HighlightedTextData | None:
123
  """
124
  Parameters:
125
- y: List of (word, category) tuples, or a dictionary of two keys: "text", and "highlights", which itself is
126
  a list of dictionaries, each of which have the keys: "highlight_type", "start", and "end"
127
  Returns:
128
  List of (word, category) tuples
@@ -178,3 +178,94 @@ class HighlightedTextbox(FormComponent):
178
 
179
  def example_inputs(self) -> Any:
180
  return [("Hello", None), ("world", "highlight")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ from typing import Any, Callable
4
 
 
5
  from gradio.components.base import FormComponent
6
+ from gradio.data_classes import GradioRootModel
7
  from gradio.events import Events
8
 
9
 
10
  class HighlightedTextData(GradioRootModel):
11
+ root: list[tuple[str, str | None]]
12
 
13
 
14
  class HighlightedTextbox(FormComponent):
 
16
  Creates a textarea for user to enter string input or display string output where some
17
  elements are highlighted.
18
  Preprocessing: passes a list of tuples as a {List[Tuple[str, float | str | None]]]} into the function. If no labels are provided, the text will be displayed as a single span.
19
+ Postprocessing: expects a {List[Tuple[str, float | str]]]} consisting of spans of text and their associated labels, or a {Dict} with two keys:
20
+ (1) "text" whose value is the complete text, and
21
+ (2) "highlights", which is a list of dictionaries, each of which have the keys:
22
+ "highlight_type" (consisting of the highlight label),
23
+ "start" (the character index where the label starts), and
24
+ "end" (the character index where the label ends).
25
  Highlights should not overlap.
26
  """
27
 
 
114
  every=every,
115
  )
116
 
117
+ def preprocess(self, payload: HighlightedTextData) -> dict:
118
+ return {"id": self.elem_id, "data": payload.root}
119
 
120
  def postprocess(
121
+ self, y: list[tuple[str, str | None]] | dict | None
122
+ ) -> list[tuple[str, str | None]] | None:
123
  """
124
  Parameters:
125
+ y: List of (word, category) tuples, or a dictionary of two keys: "text", and "highlights", which itself is
126
  a list of dictionaries, each of which have the keys: "highlight_type", "start", and "end"
127
  Returns:
128
  List of (word, category) tuples
 
178
 
179
  def example_inputs(self) -> Any:
180
  return [("Hello", None), ("world", "highlight")]
181
+
182
+ @classmethod
183
+ def tagged_text_to_tuples(
184
+ cls, text: str, tag_id: str, tag_open: str = "<h>", tag_close: str = "</h>"
185
+ ) -> list[tuple[str, str | None]]:
186
+ """Parse a text containing tags into a list of tuples in the format accepted by HighlightedTextbox.
187
+
188
+ E.g. Hello <h>world</h>! -> [("Hello", None), ("world", <TAG_ID>), ("!", None)]
189
+
190
+ Args:
191
+ text (`str`):
192
+ Text containing tags that needs to be parsed.
193
+ tag_id (`str`):
194
+ Label to use for the second element of the tuple.
195
+ tag_open (`str`, *optional*, defaults to "<h>"):
196
+ Tag used to mark the beginning of a highlighted section.
197
+ tag_close (`str`, *optional*, defaults to "</h>"):
198
+ Tag used to mark the end of a highlighted section.
199
+
200
+ Raises:
201
+ `ValueError`: Number of open tags does not match number of closed tags.
202
+
203
+ Returns:
204
+ `list[tuple[str, str | None]]`: List of tuples in the format accepted by HighlightedTextbox.
205
+ """
206
+ # Check that the text is well-formed (i.e. no nested or empty tags)
207
+ num_tags = text.count(tag_open)
208
+ if num_tags != text.count(tag_close):
209
+ raise ValueError(
210
+ f"Number of open tags ({tag_open}) does not match number of closed tags ({tag_close})."
211
+ )
212
+ elif num_tags == 0:
213
+ return [(text, None)]
214
+ elif num_tags > 0:
215
+ out = []
216
+ pre_tag_text = text[: text.index(tag_open)]
217
+ if pre_tag_text:
218
+ out += [(pre_tag_text.strip(), None)]
219
+
220
+ tag_text = text[
221
+ text.index(tag_open) + len(tag_open) : text.index(tag_close)
222
+ ]
223
+ out += [(tag_text.strip(), tag_id)]
224
+ if num_tags > 1:
225
+ remaining_text = text[text.index(tag_close) + len(tag_close) :]
226
+ out += cls.tagged_text_to_tuples(
227
+ remaining_text,
228
+ tag_id=tag_id,
229
+ tag_open=tag_open,
230
+ tag_close=tag_close,
231
+ )
232
+ else:
233
+ post_tag_text = text[text.index(tag_close) + len(tag_close) :]
234
+ if post_tag_text:
235
+ out += [(post_tag_text, None)]
236
+ return out
237
+
238
+ @staticmethod
239
+ def tuples_to_tagged_text(
240
+ tuples: list[tuple[str, str | None]],
241
+ tag_ids: str | list[str] = [],
242
+ tag_open: str = "<h>",
243
+ tag_close: str = "</h>",
244
+ ) -> str:
245
+ """Convert a list of tuples in the format accepted by HighlightedTextbox into a text containing tags.
246
+
247
+ E.g. [("Hello", None), ("world", <TAG_ID>), ("!", None)] -> Hello <h>world</h>!
248
+
249
+ Args:
250
+ tuples (`list[tuple[str, str | None]]`):
251
+ List of tuples in the format accepted by HighlightedTextbox.
252
+ tag_ids (`str` | `list[str]`):
253
+ Label(s) to select for the second element of the tuple. All other labels will be ignored
254
+ (i.e. replaced with None)
255
+ tag_open (`str`, *optional*, defaults to "<h>"):
256
+ Tag used to mark the beginning of a highlighted section.
257
+ tag_close (`str`, *optional*, defaults to "</h>"):
258
+ Tag used to mark the end of a highlighted section.
259
+
260
+ Returns:
261
+ `str`: Text containing tags.
262
+ """
263
+ if isinstance(tag_ids, str):
264
+ tag_ids = [tag_ids]
265
+ out = ""
266
+ for text, tag_id in tuples:
267
+ if tag_id in tag_ids:
268
+ out += f" {tag_open}{text.strip()}{tag_close} "
269
+ else:
270
+ out += text.strip()
271
+ return out.strip()
src/backend/gradio_highlightedtextbox/highlightedtextbox.pyi CHANGED
@@ -12,12 +12,12 @@ class HighlightedTextbox(FormComponent):
12
  Creates a textarea for user to enter string input or display string output where some
13
  elements are highlighted.
14
  Preprocessing: passes a list of tuples as a {List[Tuple[str, float | str | None]]]} into the function. If no labels are provided, the text will be displayed as a single span.
15
- Postprocessing: expects a {List[Tuple[str, float | str]]]} consisting of spans of text and their associated labels, or a {Dict} with two keys:
16
- (1) "text" whose value is the complete text, and
17
- (2) "highlights", which is a list of dictionaries, each of which have the keys:
18
- "highlight_type" (consisting of the highlight label),
19
- "start" (the character index where the label starts), and
20
- "end" (the character index where the label ends).
21
  Highlights should not overlap.
22
  """
23
 
@@ -110,15 +110,15 @@ class HighlightedTextbox(FormComponent):
110
  every=every,
111
  )
112
 
113
- def preprocess(self, payload: str | None) -> str | None:
114
- return None if payload is None else str(payload)
115
 
116
  def postprocess(
117
- self, y: HighlightedTextData | dict | None
118
- ) -> HighlightedTextData | None:
119
  """
120
  Parameters:
121
- y: List of (word, category) tuples, or a dictionary of two keys: "text", and "highlights", which itself is
122
  a list of dictionaries, each of which have the keys: "highlight_type", "start", and "end"
123
  Returns:
124
  List of (word, category) tuples
@@ -175,6 +175,97 @@ class HighlightedTextbox(FormComponent):
175
  def example_inputs(self) -> Any:
176
  return [("Hello", None), ("world", "highlight")]
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  def change(self,
180
  fn: Callable | None,
 
12
  Creates a textarea for user to enter string input or display string output where some
13
  elements are highlighted.
14
  Preprocessing: passes a list of tuples as a {List[Tuple[str, float | str | None]]]} into the function. If no labels are provided, the text will be displayed as a single span.
15
+ Postprocessing: expects a {List[Tuple[str, float | str]]]} consisting of spans of text and their associated labels, or a {Dict} with two keys:
16
+ (1) "text" whose value is the complete text, and
17
+ (2) "highlights", which is a list of dictionaries, each of which have the keys:
18
+ "highlight_type" (consisting of the highlight label),
19
+ "start" (the character index where the label starts), and
20
+ "end" (the character index where the label ends).
21
  Highlights should not overlap.
22
  """
23
 
 
110
  every=every,
111
  )
112
 
113
+ def preprocess(self, payload: HighlightedTextData) -> dict:
114
+ return {"id": self.elem_id, "data": payload.root}
115
 
116
  def postprocess(
117
+ self, y: list[tuple[str, str | None]] | dict | None
118
+ ) -> list[tuple[str, str | None]] | None:
119
  """
120
  Parameters:
121
+ y: List of (word, category) tuples, or a dictionary of two keys: "text", and "highlights", which itself is
122
  a list of dictionaries, each of which have the keys: "highlight_type", "start", and "end"
123
  Returns:
124
  List of (word, category) tuples
 
175
  def example_inputs(self) -> Any:
176
  return [("Hello", None), ("world", "highlight")]
177
 
178
+ @classmethod
179
+ def tagged_text_to_tuples(
180
+ cls, text: str, tag_id: str, tag_open: str = "<h>", tag_close: str = "</h>"
181
+ ) -> list[tuple[str, str | None]]:
182
+ """Parse a text containing tags into a list of tuples in the format accepted by HighlightedTextbox.
183
+
184
+ E.g. Hello <h>world</h>! -> [("Hello", None), ("world", <TAG_ID>), ("!", None)]
185
+
186
+ Args:
187
+ text (`str`):
188
+ Text containing tags that needs to be parsed.
189
+ tag_id (`str`):
190
+ Label to use for the second element of the tuple.
191
+ tag_open (`str`, *optional*, defaults to "<h>"):
192
+ Tag used to mark the beginning of a highlighted section.
193
+ tag_close (`str`, *optional*, defaults to "</h>"):
194
+ Tag used to mark the end of a highlighted section.
195
+
196
+ Raises:
197
+ `ValueError`: Number of open tags does not match number of closed tags.
198
+
199
+ Returns:
200
+ `list[tuple[str, str | None]]`: List of tuples in the format accepted by HighlightedTextbox.
201
+ """
202
+ # Check that the text is well-formed (i.e. no nested or empty tags)
203
+ num_tags = text.count(tag_open)
204
+ if num_tags != text.count(tag_close):
205
+ raise ValueError(
206
+ f"Number of open tags ({tag_open}) does not match number of closed tags ({tag_close})."
207
+ )
208
+ elif num_tags == 0:
209
+ return [(text, None)]
210
+ elif num_tags > 0:
211
+ out = []
212
+ pre_tag_text = text[: text.index(tag_open)]
213
+ if pre_tag_text:
214
+ out += [(pre_tag_text.strip(), None)]
215
+
216
+ tag_text = text[
217
+ text.index(tag_open) + len(tag_open) : text.index(tag_close)
218
+ ]
219
+ out += [(tag_text.strip(), tag_id)]
220
+ if num_tags > 1:
221
+ remaining_text = text[text.index(tag_close) + len(tag_close) :]
222
+ out += cls.tagged_text_to_tuples(
223
+ remaining_text,
224
+ tag_id=tag_id,
225
+ tag_open=tag_open,
226
+ tag_close=tag_close,
227
+ )
228
+ else:
229
+ post_tag_text = text[text.index(tag_close) + len(tag_close) :]
230
+ if post_tag_text:
231
+ out += [(post_tag_text, None)]
232
+ return out
233
+
234
+ @staticmethod
235
+ def tuples_to_tagged_text(
236
+ tuples: list[tuple[str, str | None]],
237
+ tag_ids: str | list[str] = [],
238
+ tag_open: str = "<h>",
239
+ tag_close: str = "</h>",
240
+ ) -> str:
241
+ """Convert a list of tuples in the format accepted by HighlightedTextbox into a text containing tags.
242
+
243
+ E.g. [("Hello", None), ("world", <TAG_ID>), ("!", None)] -> Hello <h>world</h>!
244
+
245
+ Args:
246
+ tuples (`list[tuple[str, str | None]]`):
247
+ List of tuples in the format accepted by HighlightedTextbox.
248
+ tag_ids (`str` | `list[str]`):
249
+ Label(s) to select for the second element of the tuple. All other labels will be ignored
250
+ (i.e. replaced with None)
251
+ tag_open (`str`, *optional*, defaults to "<h>"):
252
+ Tag used to mark the beginning of a highlighted section.
253
+ tag_close (`str`, *optional*, defaults to "</h>"):
254
+ Tag used to mark the end of a highlighted section.
255
+
256
+ Returns:
257
+ `str`: Text containing tags.
258
+ """
259
+ if isinstance(tag_ids, str):
260
+ tag_ids = [tag_ids]
261
+ out = ""
262
+ for text, tag_id in tuples:
263
+ if tag_id in tag_ids:
264
+ out += f" {tag_open}{text.strip()}{tag_close} "
265
+ else:
266
+ out += text.strip()
267
+ return out.strip()
268
+
269
 
270
  def change(self,
271
  fn: Callable | None,
src/demo/app.py CHANGED
@@ -1,20 +1,66 @@
1
  import gradio as gr
2
  from gradio_highlightedtextbox import HighlightedTextbox
3
 
4
- def set_highlighted():
5
- return HighlightedTextbox(
6
- value=[("Non è qualcosa di cui vergognarsi: non è diverso dalle paure e", None), ("odie", "Potential issue"), ("personali", None), ("di altre cose", "Potential issue"), ("che", None), ("molta gente ha", "Potential issue"), (".", None)],
7
- interactive=True, label="Output", show_legend=True, show_label=False, legend_label="Test:", show_legend_label=True
 
 
8
  )
9
 
 
10
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  with gr.Row():
12
- gr.Textbox(" It is not something to be ashamed of: it is no different from the personal fears and dislikes of other things that very many people have.", interactive=False)
 
 
 
 
 
 
13
  high = HighlightedTextbox(
14
- interactive=True, label="Input", show_legend=True, show_label=False, legend_label="Legend:", show_legend_label=True
 
 
 
 
 
 
15
  )
16
  button = gr.Button("Submit")
17
- button.click(fn=set_highlighted, inputs=[], outputs=high)
18
-
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- demo.launch()
 
1
  import gradio as gr
2
  from gradio_highlightedtextbox import HighlightedTextbox
3
 
4
+
5
+ def convert_tagged_text_to_highlighted_text(
6
+ tagged_text: str, tag_id: str, tag_open: str, tag_close: str
7
+ ) -> list[tuple[str, str | None]]:
8
+ return HighlightedTextbox.tagged_text_to_tuples(
9
+ tagged_text, tag_id, tag_open, tag_close
10
  )
11
 
12
+
13
  with gr.Blocks() as demo:
14
+ tag_id = gr.Textbox(
15
+ "Potential issue",
16
+ label="Tag ID",
17
+ show_label=True,
18
+ info="Insert a tag ID to use in the highlighted textbox.",
19
+ )
20
+ tag_open = gr.Textbox(
21
+ "<h>",
22
+ label="Tag open",
23
+ show_label=True,
24
+ info="Insert a tag to mark the beginning of a highlighted section.",
25
+ )
26
+ tag_close = gr.Textbox(
27
+ "</h>",
28
+ label="Tag close",
29
+ show_label=True,
30
+ info="Insert a tag to mark the end of a highlighted section.",
31
+ )
32
  with gr.Row():
33
+ tagged = gr.Textbox(
34
+ "It is not something to be ashamed of: it is no different from the <h>personal fears</h> and <h>dislikes</h> of other things that <h>very many people</h> have.",
35
+ interactive=True,
36
+ label="Input",
37
+ show_label=True,
38
+ info="Insert a text with <h>...</h> tags to mark spans that will be highlighted.",
39
+ )
40
  high = HighlightedTextbox(
41
+ interactive=True,
42
+ label="Output",
43
+ info="Highlighted textbox.",
44
+ show_legend=True,
45
+ show_label=True,
46
+ legend_label="Legend:",
47
+ show_legend_label=True,
48
  )
49
  button = gr.Button("Submit")
50
+ button.click(
51
+ fn=convert_tagged_text_to_highlighted_text,
52
+ inputs=[tagged, tag_id, tag_open, tag_close],
53
+ outputs=high,
54
+ )
55
+ # Initialization does not work
56
+ high = HighlightedTextbox(
57
+ convert_tagged_text_to_highlighted_text(
58
+ tagged.value, tag_id.value, tag_open.value, tag_close.value
59
+ ),
60
+ interactive=True,
61
+ label="Does not work",
62
+ show_label=True,
63
+ )
64
+
65
 
66
+ demo.launch()