Update app.py
Browse files
app.py
CHANGED
@@ -122,22 +122,21 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_pat
|
|
122 |
mask_image = np.array(Image.open(image_path))
|
123 |
mask_height, mask_width = mask_image.shape[:2]
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
|
129 |
for entity in entities:
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
print(f"{cleaned_token} ({entity['entity']}): {entity['score']}")
|
136 |
|
137 |
-
word_freq = {text: score for text, score in zip(
|
138 |
|
139 |
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
140 |
-
entity_type = next((t for t, w in zip(
|
141 |
return color_map.get(entity_type, "#FFFFFF")
|
142 |
|
143 |
wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(word_freq)
|
|
|
122 |
mask_image = np.array(Image.open(image_path))
|
123 |
mask_height, mask_width = mask_image.shape[:2]
|
124 |
|
125 |
+
entity_texts = []
|
126 |
+
entity_scores = []
|
127 |
+
entity_types = []
|
128 |
|
129 |
for entity in entities:
|
130 |
+
cleaned_entity = re.sub(r'^\W+', '', ' '.join(entity['tokens']))
|
131 |
+
entity_texts.append(cleaned_entity)
|
132 |
+
entity_scores.append(np.mean([token['score'] for token in entity['tokens']]))
|
133 |
+
entity_types.append(entity['entity'])
|
134 |
+
print(f"{cleaned_entity} ({entity['entity']}): {np.mean([token['score'] for token in entity['tokens']])}")
|
|
|
135 |
|
136 |
+
word_freq = {text: score for text, score in zip(entity_texts, entity_scores)}
|
137 |
|
138 |
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
139 |
+
entity_type = next((t for t, w in zip(entity_types, entity_texts) if w == word), None)
|
140 |
return color_map.get(entity_type, "#FFFFFF")
|
141 |
|
142 |
wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(word_freq)
|