# modules/text.py # -*- coding: utf-8 -*- # # PROJECT: CognitiveEDA v5.0 - The QuantumLeap Intelligence Platform # # DESCRIPTION: Specialized module for basic text analysis, focused on generating # a word cloud visualization from a text-heavy column. import base64 import io import logging import pandas as pd from wordcloud import WordCloud def generate_word_cloud(df: pd.DataFrame, text_col: str) -> str: """ Generates a word cloud from a text column and returns it as an HTML object. The function processes the text, generates a word cloud image, encodes it in Base64, and embeds it within an HTML string for display in Gradio. Args: df: The input DataFrame. text_col: The name of the column containing text data. Returns: An HTML string containing the word cloud image or a status/error message. """ # 1. Input Validation if not text_col: return "

Select a text column to generate a word cloud.

" if text_col not in df.columns: return f"

Error: Column '{text_col}' not found in the dataset.

" try: logging.info(f"Generating word cloud for column '{text_col}'") # 2. Text Corpus Preparation # Concatenate all non-null text entries into a single string text_corpus = ' '.join(df[text_col].dropna().astype(str)) if not text_corpus.strip(): logging.warning(f"Column '{text_col}' contains no text data to generate a cloud.") return "

No text data available in this column to generate a cloud.

" # 3. Word Cloud Generation wordcloud = WordCloud( width=800, height=400, background_color='white', colormap='viridis', max_words=150, collocations=False # Avoids generating two-word phrases ).generate(text_corpus) # 4. Image Encoding buf = io.BytesIO() wordcloud.to_image().save(buf, format='png') img_str = base64.b64encode(buf.getvalue()).decode('utf-8') # 5. HTML Output # The style attribute makes the image responsive to container width html_content = ( f'
' f'Word Cloud for {text_col}' f'
' ) return html_content except Exception as e: logging.error(f"Word cloud generation failed for column '{text_col}': {e}", exc_info=True) error_msg = f"Could not generate word cloud. An unexpected error occurred: {e}" return f"

❌ **Error:** {error_msg}

"