Update app.py
Browse files
app.py
CHANGED
@@ -111,35 +111,45 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_pat
|
|
111 |
# Construct the absolute path
|
112 |
base_path = os.path.dirname(os.path.abspath(__file__))
|
113 |
image_path = os.path.join(base_path, file_path)
|
114 |
-
|
115 |
-
# Debugging statement to print the image path
|
116 |
-
print(f"Image path: {image_path}")
|
117 |
-
|
118 |
-
# Check if the file exists
|
119 |
if not os.path.exists(image_path):
|
120 |
raise FileNotFoundError(f"Mask image file not found: {image_path}")
|
121 |
|
122 |
mask_image = np.array(Image.open(image_path))
|
123 |
mask_height, mask_width = mask_image.shape[:2]
|
124 |
-
|
125 |
-
|
126 |
-
entity_scores = []
|
127 |
-
entity_types = []
|
128 |
|
129 |
for entity in entities:
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
|
|
|
137 |
|
138 |
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
139 |
-
entity_type =
|
140 |
return color_map.get(entity_type, "#FFFFFF")
|
141 |
|
142 |
-
wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(
|
143 |
|
144 |
plt.figure(figsize=(mask_width/100, mask_height/100))
|
145 |
plt.imshow(wordcloud, interpolation='bilinear')
|
|
|
111 |
# Construct the absolute path
|
112 |
base_path = os.path.dirname(os.path.abspath(__file__))
|
113 |
image_path = os.path.join(base_path, file_path)
|
|
|
|
|
|
|
|
|
|
|
114 |
if not os.path.exists(image_path):
|
115 |
raise FileNotFoundError(f"Mask image file not found: {image_path}")
|
116 |
|
117 |
mask_image = np.array(Image.open(image_path))
|
118 |
mask_height, mask_width = mask_image.shape[:2]
|
119 |
+
|
120 |
+
word_details = []
|
|
|
|
|
121 |
|
122 |
for entity in entities:
|
123 |
+
for token in entity['tokens']:
|
124 |
+
# Process each token
|
125 |
+
token_text = token.replace("▁", " ").strip()
|
126 |
+
if token_text: # Ensure token is not empty
|
127 |
+
word_details.append({
|
128 |
+
'text': token_text,
|
129 |
+
'score': entity.get('average_score', 0.5),
|
130 |
+
'entity': entity['entity']
|
131 |
+
})
|
132 |
+
|
133 |
+
# Calculate word frequency weighted by score
|
134 |
+
word_freq = {}
|
135 |
+
for detail in word_details:
|
136 |
+
if detail['text'] in word_freq:
|
137 |
+
word_freq[detail['text']]['score'] += detail['score']
|
138 |
+
word_freq[detail['text']]['count'] += 1
|
139 |
+
else:
|
140 |
+
word_freq[detail['text']] = {'score': detail['score'], 'count': 1, 'entity': detail['entity']}
|
141 |
+
|
142 |
+
# Average the scores and prepare final frequency dictionary
|
143 |
+
final_word_freq = {word: details['score'] / details['count'] for word, details in word_freq.items()}
|
144 |
|
145 |
+
# Prepare entity type mapping for color function
|
146 |
+
word_to_entity = {word: details['entity'] for word, details in word_freq.items()}
|
147 |
|
148 |
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
149 |
+
entity_type = word_to_entity.get(word, None)
|
150 |
return color_map.get(entity_type, "#FFFFFF")
|
151 |
|
152 |
+
wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(final_word_freq)
|
153 |
|
154 |
plt.figure(figsize=(mask_width/100, mask_height/100))
|
155 |
plt.imshow(wordcloud, interpolation='bilinear')
|