Spaces:

blazingbunny
/

google-entity-analysis

Running

App Files Files Community

blazingbunny commited on 5 days ago

Commit

880bd77

verified ·

1 Parent(s): 2df8094

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -79

app.py CHANGED Viewed

@@ -2,12 +2,9 @@ import json
 import streamlit as st
 from google.oauth2 import service_account
 from google.cloud import language_v1
-import urllib.parse
-import urllib.request
 import pandas as pd
-# Function to query Google's Knowledge Graph API
 def query_knowledge_graph(entity_id):
     try:
         google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
@@ -15,68 +12,59 @@ def query_knowledge_graph(entity_id):
     except Exception as e:
         st.write(f"An error occurred: {e}")
-# Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
-def count_entities(entities):
-    count = 0
-    for entity in entities:
-        metadata = entity.metadata
-        if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
-            count += 1
-    return count
-# Function to serialize entity metadata
 def serialize_entity_metadata(metadata):
     return {k: str(v) for k, v in metadata.items()}
-# Function to export entities as a JSON or CSV file, only exporting entities with 'mid' in their metadata
 def export_entities(entities):
     entity_list = []
     for entity in entities:
-        # Check if entity has 'mid' in its metadata and if it contains '/g/' or '/m/' in 'mid'
-        if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
-            entity_info = {
-                "Name": entity.name,
-                "Type": language_v1.Entity.Type(entity.type_).name,
-                "Salience Score": entity.salience,
-                "Metadata": serialize_entity_metadata(entity.metadata),
-                "Mentions": [mention.text.content for mention in entity.mentions]
-            }
-            entity_list.append(entity_info)
-    # If there are no entities to export, notify the user
     if not entity_list:
-        st.write("No entities with a valid 'mid' found to export.")
         return
-    # Convert to DataFrame for easier export as CSV
     df = pd.DataFrame(entity_list)
-    # Export as CSV
-    csv = df.to_csv(index=False)
-    st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")
-    # Export as JSON
     json_data = json.dumps(entity_list, indent=2)
     st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
-# Sidebar content
 st.sidebar.title("About This Tool")
-st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
-st.sidebar.markdown("### Step-by-Step Guide")
 st.sidebar.markdown("""
-1. **Open the Tool**: Navigate to the URL where the tool is hosted.
-2. **User Input**: Enter the text you want to analyze.
-3. **Analyze**: Click the 'Analyze' button.
-4. **View Results**: See the identified entities and their details.
-5. **Export Entities**: Export the entities as JSON or CSV.
 """)
-# Header and intro
 st.title("Google Cloud NLP Entity Analyzer")
-st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
-st.write("Entity salience scores are always relative to the analyzed text.")
-def sample_analyze_entities(text_content):
     service_account_info = json.loads(st.secrets["google_nlp"])
     credentials = service_account.Credentials.from_service_account_info(
         service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
@@ -87,50 +75,46 @@ def sample_analyze_entities(text_content):
     encoding_type = language_v1.EncodingType.UTF8
     response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
-    # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
-    entity_count = count_entities(response.entities)
-    if entity_count == 0:
-        st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities")
-        st.write("---")
-    elif entity_count == 1:
-        st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity")
-        st.write("---")
     else:
-        st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities")
-        st.write("---")
-    for i, entity in enumerate(response.entities):
-        st.write(f"Entity {i+1} of {len(response.entities)}")
-        st.write(f"Name: {entity.name}")
-        st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
-        st.write(f"Salience Score: {entity.salience}")
         if entity.metadata:
-            st.write("Metadata:")
-            st.write(entity.metadata)
             if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
-                entity_id = entity.metadata['mid']
-                query_knowledge_graph(entity_id)
         if entity.mentions:
-            mention_count = len(entity.mentions)
-            plural = "s" if mention_count > 1 else ""
-            st.write(f"Mentions: {mention_count} mention{plural}")
-            st.write("Raw Array:")
-            st.write(entity.mentions)
         st.write("---")
-    # Add the export functionality
-    export_entities(response.entities)
-# User input for text analysis
 user_input = st.text_area("Enter text to analyze")
 if st.button("Analyze"):
-    if user_input:
-        sample_analyze_entities(user_input)

 import streamlit as st
 from google.oauth2 import service_account
 from google.cloud import language_v1
 import pandas as pd
+# Function to generate Google Search link for MID
 def query_knowledge_graph(entity_id):
     try:
         google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
     except Exception as e:
         st.write(f"An error occurred: {e}")
+# Function to serialize metadata
 def serialize_entity_metadata(metadata):
     return {k: str(v) for k, v in metadata.items()}
+# Count Google Entities (those with /g/ or /m/ mids)
+def count_google_entities(entities):
+    return sum(
+        1 for entity in entities
+        if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
+    )
+# Export all entities, regardless of mid
 def export_entities(entities):
     entity_list = []
     for entity in entities:
+        metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
+        mid = metadata.get('mid', '')
+        entity_info = {
+            "Name": entity.name,
+            "Type": language_v1.Entity.Type(entity.type_).name,
+            "Salience Score": entity.salience,
+            "MID": mid,
+            "Metadata": metadata,
+            "Mentions": [mention.text.content for mention in entity.mentions]
+        }
+        entity_list.append(entity_info)
     if not entity_list:
+        st.write("No entities found to export.")
         return
     df = pd.DataFrame(entity_list)
+    st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")
     json_data = json.dumps(entity_list, indent=2)
     st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
+# Sidebar
 st.sidebar.title("About This Tool")
+st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
+st.sidebar.markdown("### How to Use")
 st.sidebar.markdown("""
+1. **Enter text** in the box below.
+2. **Click Analyze** to detect entities.
+3. **Export** results to CSV or JSON.
 """)
+# Header
 st.title("Google Cloud NLP Entity Analyzer")
+st.write("Analyze text and extract all entities, including those without Google metadata (MID).")
+# NLP Analysis Logic
+def analyze_entities(text_content):
     service_account_info = json.loads(st.secrets["google_nlp"])
     credentials = service_account.Credentials.from_service_account_info(
         service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
     encoding_type = language_v1.EncodingType.UTF8
     response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
+    entities = response.entities
+    total_entities = len(entities)
+    google_entities = count_google_entities(entities)
+    if google_entities == 0:
+        st.markdown(f"### Found {total_entities} entities — no Google-linked (MID) entities found.")
     else:
+        st.markdown(f"### Found {total_entities} entities — {google_entities} Google-linked entities with MID.")
+    st.write("---")
+    for i, entity in enumerate(entities):
+        st.write(f"**Entity {i+1} of {total_entities}**")
+        st.write(f"**Name:** {entity.name}")
+        st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}")
+        st.write(f"**Salience Score:** {entity.salience:.4f}")
         if entity.metadata:
+            st.write("**Metadata:**")
+            st.json(entity.metadata)
             if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
+                query_knowledge_graph(entity.metadata['mid'])
+        else:
+            st.write("_No metadata available_")
         if entity.mentions:
+            st.write(f"**Mentions ({len(entity.mentions)}):**")
+            st.write([mention.text.content for mention in entity.mentions])
         st.write("---")
+    export_entities(entities)
+# Text Input
 user_input = st.text_area("Enter text to analyze")
 if st.button("Analyze"):
+    if user_input.strip():
+        analyze_entities(user_input)
+    else:
+        st.warning("Please enter some text before clicking Analyze.")