AlGe commited on
Commit
6bafdbf
·
verified ·
1 Parent(s): 0e57a26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -111,35 +111,45 @@ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str], file_pat
111
  # Construct the absolute path
112
  base_path = os.path.dirname(os.path.abspath(__file__))
113
  image_path = os.path.join(base_path, file_path)
114
-
115
- # Debugging statement to print the image path
116
- print(f"Image path: {image_path}")
117
-
118
- # Check if the file exists
119
  if not os.path.exists(image_path):
120
  raise FileNotFoundError(f"Mask image file not found: {image_path}")
121
 
122
  mask_image = np.array(Image.open(image_path))
123
  mask_height, mask_width = mask_image.shape[:2]
124
-
125
- entity_texts = []
126
- entity_scores = []
127
- entity_types = []
128
 
129
  for entity in entities:
130
- cleaned_entity = re.sub(r'^\W+', '', ' '.join(entity['tokens']))
131
- entity_texts.append(cleaned_entity)
132
- entity_scores.append(np.mean(entity['scores']))
133
- entity_types.append(entity['entity'])
134
- print(f"{cleaned_entity} ({entity['entity']}): {np.mean(entity['scores'])}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- word_freq = {text: score for text, score in zip(entity_texts, entity_scores)}
 
137
 
138
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
139
- entity_type = next((t for t, w in zip(entity_types, entity_texts) if w == word), None)
140
  return color_map.get(entity_type, "#FFFFFF")
141
 
142
- wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(word_freq)
143
 
144
  plt.figure(figsize=(mask_width/100, mask_height/100))
145
  plt.imshow(wordcloud, interpolation='bilinear')
 
111
  # Construct the absolute path
112
  base_path = os.path.dirname(os.path.abspath(__file__))
113
  image_path = os.path.join(base_path, file_path)
 
 
 
 
 
114
  if not os.path.exists(image_path):
115
  raise FileNotFoundError(f"Mask image file not found: {image_path}")
116
 
117
  mask_image = np.array(Image.open(image_path))
118
  mask_height, mask_width = mask_image.shape[:2]
119
+
120
+ word_details = []
 
 
121
 
122
  for entity in entities:
123
+ for token in entity['tokens']:
124
+ # Process each token
125
+ token_text = token.replace("▁", " ").strip()
126
+ if token_text: # Ensure token is not empty
127
+ word_details.append({
128
+ 'text': token_text,
129
+ 'score': entity.get('average_score', 0.5),
130
+ 'entity': entity['entity']
131
+ })
132
+
133
+ # Calculate word frequency weighted by score
134
+ word_freq = {}
135
+ for detail in word_details:
136
+ if detail['text'] in word_freq:
137
+ word_freq[detail['text']]['score'] += detail['score']
138
+ word_freq[detail['text']]['count'] += 1
139
+ else:
140
+ word_freq[detail['text']] = {'score': detail['score'], 'count': 1, 'entity': detail['entity']}
141
+
142
+ # Average the scores and prepare final frequency dictionary
143
+ final_word_freq = {word: details['score'] / details['count'] for word, details in word_freq.items()}
144
 
145
+ # Prepare entity type mapping for color function
146
+ word_to_entity = {word: details['entity'] for word, details in word_freq.items()}
147
 
148
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
149
+ entity_type = word_to_entity.get(word, None)
150
  return color_map.get(entity_type, "#FFFFFF")
151
 
152
+ wordcloud = WordCloud(width=mask_width, height=mask_height, background_color='#121212', mask=mask_image, color_func=color_func).generate_from_frequencies(final_word_freq)
153
 
154
  plt.figure(figsize=(mask_width/100, mask_height/100))
155
  plt.imshow(wordcloud, interpolation='bilinear')