AlGe commited on
Commit
1a3dfac
·
verified ·
1 Parent(s): 9b345c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -75,6 +75,8 @@ def process_ner(text: str, pipeline) -> dict:
75
 
76
  return {"text": text, "entities": entities}
77
 
 
 
78
  def generate_charts(ner_output_bin: dict) -> Tuple[go.Figure, np.ndarray]:
79
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
80
 
@@ -83,7 +85,6 @@ def generate_charts(ner_output_bin: dict) -> Tuple[go.Figure, np.ndarray]:
83
  bin_labels = list(entity_counts_bin.keys())
84
  bin_sizes = list(entity_counts_bin.values())
85
 
86
-
87
  bin_color_map = {
88
  "External": "#6ad5bc",
89
  "Internal": "#ee8bac"
@@ -91,7 +92,6 @@ def generate_charts(ner_output_bin: dict) -> Tuple[go.Figure, np.ndarray]:
91
 
92
  bin_colors = [bin_color_map.get(label, "#FFFFFF") for label in bin_labels]
93
 
94
-
95
  # Create bar chart for binary classification
96
  fig2 = go.Figure(data=[go.Bar(x=bin_labels, y=bin_sizes, marker=dict(color=bin_colors))])
97
  fig2.update_layout(
@@ -103,14 +103,22 @@ def generate_charts(ner_output_bin: dict) -> Tuple[go.Figure, np.ndarray]:
103
  )
104
 
105
  # Generate word cloud
106
- wordcloud_image = generate_wordcloud(ner_output_bin['entities'], bin_color_map, "dh3.img")
107
 
108
  return fig2, wordcloud_image
109
 
110
  def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_path: str) -> np.ndarray:
111
-
112
- image_path = os.path.join(os.path.dirname(__file__), file_path)
 
113
 
 
 
 
 
 
 
 
114
  mask_image = np.array(Image.open(image_path))
115
 
116
  token_texts = []
@@ -119,14 +127,12 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_pat
119
 
120
  for entity in entities:
121
  for token in entity['tokens']:
122
- # Remove any leading non-alphanumeric characters
123
  cleaned_token = re.sub(r'^\W+', '', token)
124
  token_texts.append(cleaned_token)
125
  token_scores.append(entity['score'])
126
  token_types.append(entity['entity'])
127
  print(f"{cleaned_token} ({entity['entity']}): {entity['score']}")
128
 
129
- # Create a dictionary for word cloud
130
  word_freq = {text: score for text, score in zip(token_texts, token_scores)}
131
 
132
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
@@ -135,13 +141,11 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_pat
135
 
136
  wordcloud = WordCloud(width=800, height=400, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(word_freq)
137
 
138
- # Convert to image array
139
  plt.figure(figsize=(10, 5))
140
  plt.imshow(wordcloud, interpolation='bilinear')
141
  plt.axis('off')
142
  plt.tight_layout(pad=0)
143
 
144
- # Convert plt to numpy array
145
  plt_image = plt.gcf()
146
  plt_image.canvas.draw()
147
  image_array = np.frombuffer(plt_image.canvas.tostring_rgb(), dtype=np.uint8)
 
75
 
76
  return {"text": text, "entities": entities}
77
 
78
+ import os
79
+
80
  def generate_charts(ner_output_bin: dict) -> Tuple[go.Figure, np.ndarray]:
81
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
82
 
 
85
  bin_labels = list(entity_counts_bin.keys())
86
  bin_sizes = list(entity_counts_bin.values())
87
 
 
88
  bin_color_map = {
89
  "External": "#6ad5bc",
90
  "Internal": "#ee8bac"
 
92
 
93
  bin_colors = [bin_color_map.get(label, "#FFFFFF") for label in bin_labels]
94
 
 
95
  # Create bar chart for binary classification
96
  fig2 = go.Figure(data=[go.Bar(x=bin_labels, y=bin_sizes, marker=dict(color=bin_colors))])
97
  fig2.update_layout(
 
103
  )
104
 
105
  # Generate word cloud
106
+ wordcloud_image = generate_wordcloud(ner_output_bin['entities'], bin_color_map, "dh3.png")
107
 
108
  return fig2, wordcloud_image
109
 
110
  def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_path: str) -> np.ndarray:
111
+ # Construct the absolute path
112
+ base_path = os.path.dirname(os.path.abspath(__file__))
113
+ image_path = os.path.join(base_path, file_path)
114
 
115
+ # Debugging statement to print the image path
116
+ print(f"Image path: {image_path}")
117
+
118
+ # Check if the file exists
119
+ if not os.path.exists(image_path):
120
+ raise FileNotFoundError(f"Mask image file not found: {image_path}")
121
+
122
  mask_image = np.array(Image.open(image_path))
123
 
124
  token_texts = []
 
127
 
128
  for entity in entities:
129
  for token in entity['tokens']:
 
130
  cleaned_token = re.sub(r'^\W+', '', token)
131
  token_texts.append(cleaned_token)
132
  token_scores.append(entity['score'])
133
  token_types.append(entity['entity'])
134
  print(f"{cleaned_token} ({entity['entity']}): {entity['score']}")
135
 
 
136
  word_freq = {text: score for text, score in zip(token_texts, token_scores)}
137
 
138
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
 
141
 
142
  wordcloud = WordCloud(width=800, height=400, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(word_freq)
143
 
 
144
  plt.figure(figsize=(10, 5))
145
  plt.imshow(wordcloud, interpolation='bilinear')
146
  plt.axis('off')
147
  plt.tight_layout(pad=0)
148
 
 
149
  plt_image = plt.gcf()
150
  plt_image.canvas.draw()
151
  image_array = np.frombuffer(plt_image.canvas.tostring_rgb(), dtype=np.uint8)