AlGe commited on
Commit
a8497b9
·
verified ·
1 Parent(s): 45ba383

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -48
app.py CHANGED
@@ -12,98 +12,76 @@ import spaces
12
  import torch
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
14
  import os
 
15
  import colorsys
16
  import matplotlib.pyplot as plt
17
 
18
  def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
19
  hex_color = hex_color.lstrip('#')
20
  return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
21
-
22
- def rgb_to_hex(rgb_color: tuple[int, int, int]) -> str:
23
- return "#{:02x}{:02x}{:02x}".format(*rgb_color)
24
-
25
- def adjust_brightness(rgb_color: tuple[int, int, int], factor: float) -> tuple[int, int, int]:
26
- hsv_color = colorsys.rgb_to_hsv(*[v / 255.0 for v in rgb_color])
27
- new_v = max(0, min(hsv_color[2] * factor, 1))
28
- new_rgb = colorsys.hsv_to_rgb(hsv_color[0], hsv_color[1], new_v)
29
  return tuple(int(v * 255) for v in new_rgb)
30
 
31
  monochrome = Monochrome()
32
 
33
  auth_token = os.environ['HF_TOKEN']
34
 
 
35
  tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
36
  model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
37
  tokenizer_bin.model_max_length = 512
38
  pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
39
 
 
40
  tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
41
  model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
42
  tokenizer_ext.model_max_length = 512
43
  pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
44
 
 
45
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
46
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
47
 
48
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
49
 
50
  def process_ner(text: str, pipeline) -> dict:
 
51
  output = pipeline(text)
52
  entities = []
53
  current_entity = None
54
 
55
- for token in output:
56
- entity_type = token['entity'][2:]
57
- entity_prefix = token['entity'][:1]
58
-
59
- if current_entity is None or entity_type != current_entity['entity'] or (entity_prefix == 'B' and entity_type == current_entity['entity']):
60
- if current_entity is not None:
61
- entities.append(current_entity)
62
- current_entity = {
63
- "entity": entity_type,
64
- "start": token['start'],
65
- "end": token['end'],
66
- "score": token['score']
67
- }
68
- else:
69
- current_entity['end'] = token['end']
70
- current_entity['score'] = max(current_entity['score'], token['score'])
71
-
72
- if current_entity is not None:
73
- entities.append(current_entity)
74
-
75
  return {"text": text, "entities": entities}
76
 
77
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
78
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
79
 
80
  with torch.no_grad():
81
- outputs1 = model1(**inputs1)
82
- outputs2 = model2(**inputs1)
83
-
84
- prediction1 = outputs1[0].item()
85
  prediction2 = outputs2[0].item()
86
  score = prediction1 / (prediction2 + prediction1)
87
 
88
  return f"{round(prediction1, 1)}", f"{round(prediction2, 1)}", f"{round(score, 2)}"
89
-
90
- def generate_charts(ner_output_bin: dict, ner_output_ext: dict, internal_count: float, external_count: float, score: float) -> Tuple[plt.Figure, plt.Figure]:
 
91
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
 
 
92
  entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
93
 
94
- pie_labels = list(entity_counts_ext.keys())
95
- pie_sizes = list(entity_counts_ext.values())
 
 
 
96
 
97
  fig1, ax1 = plt.subplots()
98
- ax1.pie(pie_sizes, labels=pie_labels, autopct='%1.1f%%', startangle=90)
99
  ax1.axis('equal')
100
 
101
  fig2, ax2 = plt.subplots()
102
- bars = ['Internal Detail Count', 'External Detail Count', 'Binary Classification Score']
103
- values = [internal_count, external_count, float(score)]
104
- ax2.bar(bars, values)
105
- ax2.set_ylabel('Count/Score')
106
- ax2.set_title('Internal vs External Details and Classification Score')
107
 
108
  return fig1, fig2
109
 
@@ -111,18 +89,21 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict, internal_count:
111
  def all(text: str):
112
  ner_output_bin = process_ner(text, pipe_bin)
113
  ner_output_ext = process_ner(text, pipe_ext)
114
- internal_count, external_count, score = process_classification(text, model1, model2, tokenizer1)
115
 
116
- pie_chart, bar_chart = generate_charts(ner_output_bin, ner_output_ext, float(internal_count), float(external_count), score)
117
 
118
  return (ner_output_bin, ner_output_ext,
119
- internal_count, external_count, score,
120
  pie_chart, bar_chart)
121
 
 
 
122
  examples = [
123
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
124
  ]
125
 
 
126
  iface = gr.Interface(
127
  fn=all,
128
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
@@ -148,8 +129,8 @@ iface = gr.Interface(
148
  gr.Label(label="Internal Detail Count"),
149
  gr.Label(label="External Detail Count"),
150
  gr.Label(label="Approximated Internal Detail Ratio"),
151
- gr.Plot(label="Extended Sequence Classification Pie Chart"),
152
- gr.Plot(label="Internal vs External Details and Classification Score Bar Chart")
153
  ],
154
  title="Scoring Demo",
155
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
@@ -157,4 +138,4 @@ iface = gr.Interface(
157
  theme=monochrome
158
  )
159
 
160
- iface.launch()
 
12
  import torch
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
14
  import os
15
+
16
  import colorsys
17
  import matplotlib.pyplot as plt
18
 
19
  def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
20
  hex_color = hex_color.lstrip('#')
21
  return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
 
 
 
 
 
 
 
 
22
  return tuple(int(v * 255) for v in new_rgb)
23
 
24
  monochrome = Monochrome()
25
 
26
  auth_token = os.environ['HF_TOKEN']
27
 
28
+
29
  tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
30
  model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
31
  tokenizer_bin.model_max_length = 512
32
  pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
33
 
34
+
35
  tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
36
  model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
37
  tokenizer_ext.model_max_length = 512
38
  pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
39
 
40
+
41
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
42
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
43
 
44
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
45
 
46
  def process_ner(text: str, pipeline) -> dict:
47
+
48
  output = pipeline(text)
49
  entities = []
50
  current_entity = None
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return {"text": text, "entities": entities}
53
 
54
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
55
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
56
 
57
  with torch.no_grad():
 
 
 
 
58
  prediction2 = outputs2[0].item()
59
  score = prediction1 / (prediction2 + prediction1)
60
 
61
  return f"{round(prediction1, 1)}", f"{round(prediction2, 1)}", f"{round(score, 2)}"
62
+
63
+ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[plt.Figure, plt.Figure]:
64
+ entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
65
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
66
+
67
+ entity_counts_bin = {entity: entities_bin.count(entity) for entity in set(entities_bin)}
68
  entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
69
 
70
+
71
+ pie_labels_bin = list(entity_counts_bin.keys())
72
+ pie_sizes_bin = list(entity_counts_bin.values())
73
+ pie_labels_ext = list(entity_counts_ext.keys())
74
+ pie_sizes_ext = list(entity_counts_ext.values())
75
 
76
  fig1, ax1 = plt.subplots()
77
+ ax1.pie(pie_sizes_ext, labels=pie_labels_ext, autopct='%1.1f%%', startangle=90)
78
  ax1.axis('equal')
79
 
80
  fig2, ax2 = plt.subplots()
81
+ ax2.bar(pie_labels_bin, pie_sizes_bin)
82
+ ax2.set_ylabel('Count')
83
+ ax2.set_xlabel('Entity Type')
84
+ ax2.set_title('Entity Counts')
 
85
 
86
  return fig1, fig2
87
 
 
89
  def all(text: str):
90
  ner_output_bin = process_ner(text, pipe_bin)
91
  ner_output_ext = process_ner(text, pipe_ext)
92
+ classification_output = process_classification(text, model1, model2, tokenizer1)
93
 
94
+ pie_chart, bar_chart = generate_charts(ner_output_bin, ner_output_ext)
95
 
96
  return (ner_output_bin, ner_output_ext,
97
+ classification_output[0], classification_output[1], classification_output[2],
98
  pie_chart, bar_chart)
99
 
100
+
101
+
102
  examples = [
103
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
104
  ]
105
 
106
+
107
  iface = gr.Interface(
108
  fn=all,
109
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
 
129
  gr.Label(label="Internal Detail Count"),
130
  gr.Label(label="External Detail Count"),
131
  gr.Label(label="Approximated Internal Detail Ratio"),
132
+ gr.Plot(label="Entity Distribution Pie Chart"),
133
+ gr.Plot(label="Entity Count Bar Chart")
134
  ],
135
  title="Scoring Demo",
136
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
 
138
  theme=monochrome
139
  )
140
 
141
+ iface.launch()