Spaces:

AIEcosystem
/

Scandinavian-JSON-Entity-Finder

Sleeping

App Files Files Community

Maria Tsilimos commited on Jul 17

Commit

190b8c6

unverified ·

1 Parent(s): 2ec60f4

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -84

app.py CHANGED Viewed

@@ -11,11 +11,11 @@ from comet_ml import Experiment
 import re
 import numpy as np
 import json
-from cryptography.fernet import Fernet
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
@@ -24,35 +24,51 @@ comet_initialized = False
 if COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME:
     comet_initialized = True
 if 'file_upload_attempts' not in st.session_state:
     st.session_state['file_upload_attempts'] = 0
 if 'encrypted_extracted_text' not in st.session_state:
     st.session_state['encrypted_extracted_text'] = None
 if 'json_dataframe' not in st.session_state:
     st.session_state['json_dataframe'] = None
-max_attempts = 10
 @st.cache_resource
 def load_ner_model():
     try:
-        return pipeline("token-classification",
-                        model="saattrupdan/nbailab-base-ner-scandi",
-                        aggregation_strategy="max", ignore_labels=["O"],
-                        stride=128)
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 @st.cache_resource
 def load_encryption_key():
     """
@@ -65,20 +81,25 @@ def load_encryption_key():
         key_str = os.environ.get("FERNET_KEY")
         if not key_str:
             raise ValueError("FERNET_KEY environment variable not set. Cannot perform encryption/decryption.")
         # Fernet key must be bytes, so encode the string
         key_bytes = key_str.encode('utf-8')
         return Fernet(key_bytes)
     except ValueError as ve:
-        st.error(f"Configuration Error: {ve}. Please ensure the 'FERNET_KEY' environment variable is set securely in your deployment environment (e.g., Hugging Face Spaces secrets, Render environment variables) or in a local .env file for development.")
-        st.stop() # Stop the app if the key is not found, as security is compromised
     except Exception as e:
         st.error(f"An unexpected error occurred while loading encryption key: {e}. Please check your key format and environment settings.")
         st.stop()
-# Initialize the Fernet cipher instance
 fernet = load_encryption_key()
 def encrypt_text(text_content: str) -> bytes:
     """
     Encrypts a string using the loaded Fernet cipher.
@@ -86,6 +107,7 @@ def encrypt_text(text_content: str) -> bytes:
     """
     return fernet.encrypt(text_content.encode('utf-8'))
 def decrypt_text(encrypted_bytes: bytes) -> str | None:
     """
     Decrypts bytes using the loaded Fernet cipher.
@@ -108,23 +130,17 @@ expander.write('''
    miscellaneous”). Results are presented in an easy-to-read table, visualized in
    an interactive tree map, pie chart, and bar chart, and are available for
    download along with a Glossary of tags.
    **How to Use:** Upload your JSON file. Then, click the 'Results' button
    to extract and tag entities in your text data.
    **Usage Limits:** You can request results up to 10 times.
    **Language settings:** Please check and adjust the language settings in
    your computer, so the Danish, Swedish, Norwegian, Icelandic and Faroese
    characters are handled properly in your downloaded file.
    **Customization:** To change the app's background color to white or
    black, click the three-dot menu on the right-hand side of your app, go to
    Settings and then Choose app theme, colors and fonts.
    **Technical issues:** If your connection times out, please refresh the
    page or reopen the app's URL.
    For any errors or inquiries, please contact us at [email protected]
 ''')
@@ -134,11 +150,10 @@ with st.sidebar:
                     "extracting and tagging entities in text data. Entities can be persons, "
                     "organizations, locations, countries, products, events etc.")
     st.subheader("Related NER Web Apps", divider="orange")
-    st.link_button("MediDoc Entity Finder",
-                   " https://nlpblogs.com/shop/named-entity-recognition-ner/medidoc-entity-finder/",
                    type="primary")
 uploaded_file = st.file_uploader("Choose a JSON file", type=["json"])
 # Initialize text for the current run outside the if uploaded_file block
@@ -149,7 +164,7 @@ if uploaded_file is not None:
     try:
         # Read the content as bytes first, then decode for JSON parsing
         file_contents_bytes = uploaded_file.read()
         # Reset the file pointer after reading, so json.load can read from the beginning
         uploaded_file.seek(0)
         dados = json.load(uploaded_file)
@@ -157,19 +172,17 @@ if uploaded_file is not None:
         # Attempt to convert JSON to DataFrame and extract text
         try:
             st.session_state['json_dataframe'] = pd.DataFrame(dados)
             # Concatenate all content into a single string for NER
             df_string_representation = st.session_state['json_dataframe'].to_string(index=False, header=False)
             # Simple regex to remove non-alphanumeric characters but keep spaces and periods
             text_content = re.sub(r'[^\w\s.]', '', df_string_representation)
             # Remove the specific string "Empty DataFrame Columns" if it appears due to conversion
             text_content = text_content.replace("Empty DataFrame Columns", "").strip()
-            current_run_text = text_content # Set text for current run
-            if not current_run_text.strip(): # Check if text is effectively empty
                 st.warning("No meaningful text could be extracted from the JSON DataFrame for analysis.")
-                current_run_text = None # Reset to None if empty
         except ValueError:
             # If direct conversion to DataFrame fails, try to extract strings directly from JSON structure
             st.info("JSON data could not be directly converted to a simple DataFrame for display. Attempting to extract text directly.")
@@ -196,7 +209,6 @@ if uploaded_file is not None:
                         for sub_val in value:
                             if isinstance(sub_val, str):
                                 extracted_texts_list.append(sub_val)
             if extracted_texts_list:
                 current_run_text = " ".join(extracted_texts_list).strip()
             else:
@@ -208,15 +220,13 @@ if uploaded_file is not None:
             encrypted_text_bytes = encrypt_text(current_run_text)
             st.session_state['encrypted_extracted_text'] = encrypted_text_bytes
             # Optionally clear the unencrypted version from session state if you only want the encrypted one
-            # st.session_state['extracted_text_for_ner'] = None
             st.success("JSON file uploaded successfully. File content encrypted and secured. Due to security protocols, the file content is hidden.")
             st.divider()
         else:
             st.session_state['encrypted_extracted_text'] = None
             # st.session_state['extracted_text_for_ner'] = None
             st.error("Could not extract meaningful text from the uploaded JSON file.")
     except json.JSONDecodeError as e:
         st.error(f"JSON Decode Error: {e}")
         st.error("Please ensure the uploaded file contains valid JSON data.")
@@ -227,10 +237,9 @@ if uploaded_file is not None:
         st.session_state['encrypted_extracted_text'] = None
         st.session_state['json_dataframe'] = None
 # --- Results Button and Processing Logic ---
 if st.button("Results"):
-    start_time = time.time()
     if not comet_initialized:
         st.warning("Comet ML not initialized. Check environment variables if you wish to log data.")
@@ -242,7 +251,7 @@ if st.button("Results"):
     text_for_ner = None
     if st.session_state['encrypted_extracted_text'] is not None:
         text_for_ner = decrypt_text(st.session_state['encrypted_extracted_text'])
     if text_for_ner is None or not text_for_ner.strip():
         st.warning("No extractable text content available for analysis. Please upload a valid JSON file.")
         st.stop()
@@ -251,19 +260,25 @@ if st.button("Results"):
     with st.spinner("Analyzing text...", show_time=True):
         model = load_ner_model()
-        text_entities = model(text_for_ner) # Use the decrypted text
         df = pd.DataFrame(text_entities)
         if 'word' in df.columns:
             # Ensure 'word' column is string type before applying regex
-            if df['word'].dtype == 'object':
-                pattern = r'[^\w\s]' # Regex to remove non-alphanumeric characters but keep spaces and periods
                 df['word'] = df['word'].astype(str).replace(pattern, '', regex=True)
             else:
                 st.warning("The 'word' column is not of string type; skipping character cleaning.")
         else:
             st.error("The 'word' column does not exist in the DataFrame. Cannot perform cleaning.")
-            st.stop() # Stop execution if the column is missing
         # Replace empty strings with 'Unknown' and drop rows with NaN after cleaning
         df = df.replace('', 'Unknown').dropna()
@@ -272,6 +287,11 @@ if st.button("Results"):
             st.warning("No entities were extracted from the uploaded text.")
             st.stop()
         if comet_initialized:
             experiment = Experiment(
                 api_key=COMET_API_KEY,
@@ -280,8 +300,11 @@ if st.button("Results"):
             )
             experiment.log_parameter("input_text_length", len(text_for_ner))
             experiment.log_table("predicted_entities", df)
         # --- Display Results ---
         properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
         df_styled = df.style.set_properties(**properties)
         st.dataframe(df_styled, use_container_width=True)
@@ -298,89 +321,90 @@ if st.button("Results"):
             '**start**': ['index of the start of the corresponding entity']
             '**end**': ['index of the end of the corresponding entity']
             ''')
-        entity_groups = {"PER": "person",
-                         "LOC": "location",
-                         "ORG": "organization",
-                         "MISC": "miscellaneous",
-                        }
-        st.subheader("Grouped entities", divider = "orange")
-        # Convert entity_groups dictionary to a list of (key, title) tuples
-        entity_items = list(entity_groups.items())
-        # Define how many tabs per row
-        tabs_per_row = 5
-        # Loop through the entity items in chunks
-        for i in range(0, len(entity_items), tabs_per_row):
-            current_row_entities = entity_items[i : i + tabs_per_row]
-            tab_titles = [item[1] for item in current_row_entities]
-            tabs = st.tabs(tab_titles)
-            for j, (entity_group_key, tab_title) in enumerate(current_row_entities):
                 with tabs[j]:
-                    if entity_group_key in df["entity_group"].unique():
-                        df_filtered = df[df["entity_group"] == entity_group_key]
                         st.dataframe(df_filtered, use_container_width=True)
                     else:
-                        st.info(f"No '{tab_title}' entities found in the text.")
                         # Display an empty DataFrame for consistency if no entities are found
                         st.dataframe(pd.DataFrame({
-                            'entity_group': [entity_group_key],
                             'score': [np.nan],
                             'word': [np.nan],
                             'start': [np.nan],
-                            'end': [np.nan]
                         }), hide_index=True)
         st.divider()
         # --- Visualizations ---
         st.subheader("Tree map", divider="orange")
-        fig_treemap = px.treemap(df, path=[px.Constant("all"), 'word',
-                                         'entity_group'],
-                                 values='score', color='entity_group')
         fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
         st.plotly_chart(fig_treemap)
         if comet_initialized:
             experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
-        value_counts1 = df['entity_group'].value_counts()
-        final_df_counts = value_counts1.reset_index().rename(columns={"index": "entity_group"})
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Pie Chart", divider="orange")
-            fig_pie = px.pie(final_df_counts, values='count', names='entity_group',
-                             hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted labels')
             fig_pie.update_traces(textposition='inside', textinfo='percent+label')
             st.plotly_chart(fig_pie)
             if comet_initialized:
-                experiment.log_figure(figure=fig_pie, figure_name="label_pie_chart")
         with col2:
             st.subheader("Bar Chart", divider="orange")
-            fig_bar = px.bar(final_df_counts, x="count", y="entity_group", color="entity_group", text_auto=True,
-                             title='Occurrences of predicted labels')
             st.plotly_chart(fig_bar)
             if comet_initialized:
-                experiment.log_figure(figure=fig_bar, figure_name="label_bar_chart")
         # --- Downloadable Content ---
         dfa = pd.DataFrame(
             data={
-                'Column Name': ['word', 'entity_group','score', 'start', 'end'],
                 'Description': [
                     'entity extracted from your text data',
                     'label (tag) assigned to a given extracted entity',
                     'accuracy score; how accurately a tag has been assigned to a given entity',
                     'index of the start of the corresponding entity',
                     'index of the end of the corresponding entity',
                 ]
             }
         )
         buf = io.BytesIO()
         with zipfile.ZipFile(buf, "w") as myzip:
             myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
@@ -402,7 +426,10 @@ if st.button("Results"):
         st.divider()
         if comet_initialized:
             experiment.end()
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
 st.write(f"Number of times you requested results: **{st.session_state['file_upload_attempts']}/{max_attempts}**")

 import re
 import numpy as np
 import json
+from cryptography.fernet import Fernet
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
+# --- Configuration ---
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 if COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME:
     comet_initialized = True
+# --- Initialize session state ---
 if 'file_upload_attempts' not in st.session_state:
     st.session_state['file_upload_attempts'] = 0
 if 'encrypted_extracted_text' not in st.session_state:
     st.session_state['encrypted_extracted_text'] = None
 if 'json_dataframe' not in st.session_state:
     st.session_state['json_dataframe'] = None
+max_attempts = 10
+# Define the categories and their associated entity labels
+ENTITY_LABELS_CATEGORIZED = {
+    "Persons": ["PER"],
+    "Locations": ["LOC"],
+    "Organizations": ["ORG"],
+    "Miscellaneous": ["MISC"],
+}
+# Create a mapping from each specific entity label to its category
+LABEL_TO_CATEGORY_MAP = {
+    label: category for category, labels in ENTITY_LABELS_CATEGORIZED.items() for label in labels
+}
 @st.cache_resource
 def load_ner_model():
+    """
+    Loads the pre-trained NER model ("saattrupdan/nbailab-base-ner-scandi") and caches it.
+    This model is specifically trained for Scandinavian languages.
+    """
     try:
+        return pipeline(
+            "token-classification",
+            model="saattrupdan/nbailab-base-ner-scandi",
+            aggregation_strategy="max",
+            ignore_labels=["O"],
+            stride=128
+        )
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 @st.cache_resource
 def load_encryption_key():
     """
         key_str = os.environ.get("FERNET_KEY")
         if not key_str:
             raise ValueError("FERNET_KEY environment variable not set. Cannot perform encryption/decryption.")
         # Fernet key must be bytes, so encode the string
         key_bytes = key_str.encode('utf-8')
         return Fernet(key_bytes)
     except ValueError as ve:
+        st.error(
+            f"Configuration Error: {ve}. Please ensure the 'FERNET_KEY' environment variable is set securely "
+            "in your deployment environment (e.g., Hugging Face Spaces secrets, Render environment variables) "
+            "or in a local .env file for development."
+        )
+        st.stop()  # Stop the app if the key is not found, as security is compromised
     except Exception as e:
         st.error(f"An unexpected error occurred while loading encryption key: {e}. Please check your key format and environment settings.")
         st.stop()
+# Initialize the Fernet cipher instance globally (cached)
 fernet = load_encryption_key()
 def encrypt_text(text_content: str) -> bytes:
     """
     Encrypts a string using the loaded Fernet cipher.
     """
     return fernet.encrypt(text_content.encode('utf-8'))
 def decrypt_text(encrypted_bytes: bytes) -> str | None:
     """
     Decrypts bytes using the loaded Fernet cipher.
    miscellaneous”). Results are presented in an easy-to-read table, visualized in
    an interactive tree map, pie chart, and bar chart, and are available for
    download along with a Glossary of tags.
    **How to Use:** Upload your JSON file. Then, click the 'Results' button
    to extract and tag entities in your text data.
    **Usage Limits:** You can request results up to 10 times.
    **Language settings:** Please check and adjust the language settings in
    your computer, so the Danish, Swedish, Norwegian, Icelandic and Faroese
    characters are handled properly in your downloaded file.
    **Customization:** To change the app's background color to white or
    black, click the three-dot menu on the right-hand side of your app, go to
    Settings and then Choose app theme, colors and fonts.
    **Technical issues:** If your connection times out, please refresh the
    page or reopen the app's URL.
    For any errors or inquiries, please contact us at [email protected]
 ''')
                     "extracting and tagging entities in text data. Entities can be persons, "
                     "organizations, locations, countries, products, events etc.")
     st.subheader("Related NER Web Apps", divider="orange")
+    st.link_button("Multilingual PDF & DOCX Entity Finder",
+                   "https://nlpblogs.com/shop/named-entity-recognition-ner/multilingual-pdf-docx-entity-finder/",
                    type="primary")
 uploaded_file = st.file_uploader("Choose a JSON file", type=["json"])
 # Initialize text for the current run outside the if uploaded_file block
     try:
         # Read the content as bytes first, then decode for JSON parsing
         file_contents_bytes = uploaded_file.read()
         # Reset the file pointer after reading, so json.load can read from the beginning
         uploaded_file.seek(0)
         dados = json.load(uploaded_file)
         # Attempt to convert JSON to DataFrame and extract text
         try:
             st.session_state['json_dataframe'] = pd.DataFrame(dados)
             # Concatenate all content into a single string for NER
             df_string_representation = st.session_state['json_dataframe'].to_string(index=False, header=False)
             # Simple regex to remove non-alphanumeric characters but keep spaces and periods
             text_content = re.sub(r'[^\w\s.]', '', df_string_representation)
             # Remove the specific string "Empty DataFrame Columns" if it appears due to conversion
             text_content = text_content.replace("Empty DataFrame Columns", "").strip()
+            current_run_text = text_content  # Set text for current run
+            if not current_run_text.strip():  # Check if text is effectively empty
                 st.warning("No meaningful text could be extracted from the JSON DataFrame for analysis.")
+                current_run_text = None  # Reset to None if empty
         except ValueError:
             # If direct conversion to DataFrame fails, try to extract strings directly from JSON structure
             st.info("JSON data could not be directly converted to a simple DataFrame for display. Attempting to extract text directly.")
                         for sub_val in value:
                             if isinstance(sub_val, str):
                                 extracted_texts_list.append(sub_val)
             if extracted_texts_list:
                 current_run_text = " ".join(extracted_texts_list).strip()
             else:
             encrypted_text_bytes = encrypt_text(current_run_text)
             st.session_state['encrypted_extracted_text'] = encrypted_text_bytes
             # Optionally clear the unencrypted version from session state if you only want the encrypted one
+            # st.session_state['extracted_text_for_ner'] = None
             st.success("JSON file uploaded successfully. File content encrypted and secured. Due to security protocols, the file content is hidden.")
             st.divider()
         else:
             st.session_state['encrypted_extracted_text'] = None
             # st.session_state['extracted_text_for_ner'] = None
             st.error("Could not extract meaningful text from the uploaded JSON file.")
     except json.JSONDecodeError as e:
         st.error(f"JSON Decode Error: {e}")
         st.error("Please ensure the uploaded file contains valid JSON data.")
         st.session_state['encrypted_extracted_text'] = None
         st.session_state['json_dataframe'] = None
 # --- Results Button and Processing Logic ---
 if st.button("Results"):
+    start_time_overall = time.time()  # Start time for overall processing
     if not comet_initialized:
         st.warning("Comet ML not initialized. Check environment variables if you wish to log data.")
     text_for_ner = None
     if st.session_state['encrypted_extracted_text'] is not None:
         text_for_ner = decrypt_text(st.session_state['encrypted_extracted_text'])
     if text_for_ner is None or not text_for_ner.strip():
         st.warning("No extractable text content available for analysis. Please upload a valid JSON file.")
         st.stop()
     with st.spinner("Analyzing text...", show_time=True):
         model = load_ner_model()
+        # Measure NER model processing time
+        start_time_ner = time.time()
+        text_entities = model(text_for_ner)  # Use the decrypted text
+        end_time_ner = time.time()
+        ner_processing_time = end_time_ner - start_time_ner
         df = pd.DataFrame(text_entities)
         if 'word' in df.columns:
             # Ensure 'word' column is string type before applying regex
+            if df['word'].dtype == 'object':
+                pattern = r'[^\w\s]'  # Regex to remove non-alphanumeric characters but keep spaces and periods
                 df['word'] = df['word'].astype(str).replace(pattern, '', regex=True)
             else:
                 st.warning("The 'word' column is not of string type; skipping character cleaning.")
         else:
             st.error("The 'word' column does not exist in the DataFrame. Cannot perform cleaning.")
+            st.stop()  # Stop execution if the column is missing
         # Replace empty strings with 'Unknown' and drop rows with NaN after cleaning
         df = df.replace('', 'Unknown').dropna()
             st.warning("No entities were extracted from the uploaded text.")
             st.stop()
+        # --- Add 'category' column to the DataFrame based on the grouped labels ---
+        df['category'] = df['entity_group'].map(LABEL_TO_CATEGORY_MAP)
+        # Handle cases where an entity_group might not have a category
+        df['category'] = df['category'].fillna('Uncategorized')
         if comet_initialized:
             experiment = Experiment(
                 api_key=COMET_API_KEY,
             )
             experiment.log_parameter("input_text_length", len(text_for_ner))
             experiment.log_table("predicted_entities", df)
+            experiment.log_metric("ner_processing_time_seconds", ner_processing_time)
         # --- Display Results ---
+        st.subheader("Extracted Entities", divider="rainbow")
         properties = {"border": "2px solid gray", "color": "blue", "font-size": "16px"}
         df_styled = df.style.set_properties(**properties)
         st.dataframe(df_styled, use_container_width=True)
             '**start**': ['index of the start of the corresponding entity']
             '**end**': ['index of the end of the corresponding entity']
+            '**category**': ['the broader category the entity belongs to']
             ''')
+        st.subheader("Grouped entities", divider="orange")
+        # Get unique categories and sort them for consistent tab order
+        unique_categories = sorted(df['category'].unique())
+        tabs_per_row = 4  # Adjust as needed for better layout
+        # Loop through categories in chunks to create rows of tabs
+        for i in range(0, len(unique_categories), tabs_per_row):
+            current_row_categories = unique_categories[i : i + tabs_per_row]
+            tabs = st.tabs(current_row_categories)
+            for j, category in enumerate(current_row_categories):
                 with tabs[j]:
+                    df_filtered = df[df["category"] == category]
+                    if not df_filtered.empty:
                         st.dataframe(df_filtered, use_container_width=True)
                     else:
+                        st.info(f"No '{category}' entities found in the text.")
                         # Display an empty DataFrame for consistency if no entities are found
                         st.dataframe(pd.DataFrame({
+                            'entity_group': [np.nan],
                             'score': [np.nan],
                             'word': [np.nan],
                             'start': [np.nan],
+                            'end': [np.nan],
+                            'category': [category]
                         }), hide_index=True)
         st.divider()
         # --- Visualizations ---
         st.subheader("Tree map", divider="orange")
+        fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'entity_group', 'word'],
+                                 values='score', color='category',
+                                 color_discrete_map={
+                                     'Persons': 'blue',
+                                     'Locations': 'green',
+                                     'Organizations': 'red',
+                                     'Miscellaneous': 'purple',
+                                     'Uncategorized': 'gray'
+                                 })
         fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
         st.plotly_chart(fig_treemap)
         if comet_initialized:
             experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
+        # Group by category and entity_group to get counts for pie and bar charts
+        grouped_counts = df.groupby('category').size().reset_index(name='count')
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Pie Chart", divider="orange")
+            fig_pie = px.pie(grouped_counts, values='count', names='category',
+                             hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
             fig_pie.update_traces(textposition='inside', textinfo='percent+label')
             st.plotly_chart(fig_pie)
             if comet_initialized:
+                experiment.log_figure(figure=fig_pie, figure_name="category_pie_chart")
         with col2:
             st.subheader("Bar Chart", divider="orange")
+            fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True,
+                             title='Occurrences of predicted categories')
             st.plotly_chart(fig_bar)
             if comet_initialized:
+                experiment.log_figure(figure=fig_bar, figure_name="category_bar_chart")
         # --- Downloadable Content ---
         dfa = pd.DataFrame(
             data={
+                'Column Name': ['word', 'entity_group', 'score', 'start', 'end', 'category'],
                 'Description': [
                     'entity extracted from your text data',
                     'label (tag) assigned to a given extracted entity',
                     'accuracy score; how accurately a tag has been assigned to a given entity',
                     'index of the start of the corresponding entity',
                     'index of the end of the corresponding entity',
+                    'the broader category the entity belongs to',
                 ]
             }
         )
         buf = io.BytesIO()
         with zipfile.ZipFile(buf, "w") as myzip:
             myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
         st.divider()
         if comet_initialized:
             experiment.end()
+    end_time_overall = time.time()
+    elapsed_time_overall = end_time_overall - start_time_overall
+    st.info(f"Results processed in **{elapsed_time_overall:.2f} seconds**.")
 st.write(f"Number of times you requested results: **{st.session_state['file_upload_attempts']}/{max_attempts}**")