Tonic commited on
Commit
c1a6fc6
β€’
1 Parent(s): 8bffd4f

add description

Browse files
Files changed (1) hide show
  1. app.py +9 -20
app.py CHANGED
@@ -8,16 +8,6 @@ from globe import title, description, joinus, model_name, placeholder, modelinfo
8
  tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
9
  model = DebertaV2ForTokenClassification.from_pretrained(model_name)
10
 
11
- # # Define id2label based on config.json
12
- #
13
- # id2label = {
14
- # 0: "author", 1: "bibliography", 2: "caption", 3: "contact",
15
- # 4: "date", 5: "dialog", 6: "footnote", 7: "keywords",
16
- # 8: "math", 9: "paratext", 10: "separator", 11: "table",
17
- # 12: "text", 13: "title"
18
- # }
19
-
20
-
21
  color_map = {
22
  "author": "blue", "bibliography": "purple", "caption": "orange",
23
  "contact": "cyan", "date": "green", "dialog": "yellow",
@@ -42,15 +32,15 @@ def segment_text(input_text):
42
  segments = []
43
  current_word = ""
44
  for token, label_id in zip(tokens_decoded, predictions):
45
- if token.startswith("▁"): # handling wordpieces, specific to some tokenizers
46
  if current_word:
47
- segments.append((current_word, id2label[label_id]))
48
- current_word = token.replace("▁", "") # new word
49
  else:
50
  current_word += token # append subword part to current word
51
 
52
  if current_word:
53
- segments.append((current_word, id2label[label_id]))
54
 
55
  return segments
56
 
@@ -58,18 +48,17 @@ with gr.Blocks() as demo:
58
  with gr.Row():
59
  gr.Markdown(title)
60
  with gr.Row():
61
- with gr.Column(scale=1):
62
- with gr.Group():
63
- gr.Markdown(description)
64
- with gr.Accordion(label="Join Us", open=False):
65
- gr.Markdown(joinus)
66
  with gr.Column(scale=1):
67
  with gr.Row():
68
  with gr.Group():
69
  gr.Markdown(modelinfor1)
70
  with gr.Group():
71
  gr.Markdown(modelinfor2)
72
-
 
73
  with gr.Row():
74
  input_text = gr.Textbox(label="Enter your text hereπŸ‘‡πŸ»", lines=5, placeholder=placeholder)
75
  output_text = gr.HighlightedText(label=" PLeIAs/βœ‚οΈπŸ“œ Segment Text", color_map=color_map, combine_adjacent=True, show_inline_category=True, show_legend=True)
 
8
  tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
9
  model = DebertaV2ForTokenClassification.from_pretrained(model_name)
10
 
 
 
 
 
 
 
 
 
 
 
11
  color_map = {
12
  "author": "blue", "bibliography": "purple", "caption": "orange",
13
  "contact": "cyan", "date": "green", "dialog": "yellow",
 
32
  segments = []
33
  current_word = ""
34
  for token, label_id in zip(tokens_decoded, predictions):
35
+ if token.startswith("▁"): # handle wordpieces
36
  if current_word:
37
+ segments.append((current_word, id2label[str(label_id)]))
38
+ current_word = token.replace("▁", "") # start a new word
39
  else:
40
  current_word += token # append subword part to current word
41
 
42
  if current_word:
43
+ segments.append((current_word, id2label[str(label_id)]))
44
 
45
  return segments
46
 
 
48
  with gr.Row():
49
  gr.Markdown(title)
50
  with gr.Row():
51
+ with gr.Group():
52
+ gr.Markdown(description)
53
+ with gr.Row():
 
 
54
  with gr.Column(scale=1):
55
  with gr.Row():
56
  with gr.Group():
57
  gr.Markdown(modelinfor1)
58
  with gr.Group():
59
  gr.Markdown(modelinfor2)
60
+ with gr.Accordion(label="Join Us", open=False):
61
+ gr.Markdown(joinus)
62
  with gr.Row():
63
  input_text = gr.Textbox(label="Enter your text hereπŸ‘‡πŸ»", lines=5, placeholder=placeholder)
64
  output_text = gr.HighlightedText(label=" PLeIAs/βœ‚οΈπŸ“œ Segment Text", color_map=color_map, combine_adjacent=True, show_inline_category=True, show_legend=True)