Spaces:

blazingbunny
/

google-entity-analysis

Running

File size: 4,481 Bytes

564ce0c
569a26f
564ce0c
 
98b7999
8deafd3
880bd77
8deafd3
e5f77a9
aa6444a
 
e5f77a9
aa6444a
 
880bd77
82bcffd
 
 
880bd77
 
 
 
 
 
 
 
98b7999
 
 
880bd77
 
 
 
 
 
 
 
 
 
 
 
2df8094
880bd77
2df8094
880bd77
98b7999
880bd77
 
98b7999
 
 
880bd77
c077e58
880bd77
 
c077e58
880bd77
 
 
c077e58
3c9b732
880bd77
c077e58
880bd77
3fec030
880bd77
 
c077e58
 
 
 
170f624
c077e58
54723ff
c077e58
 
 
880bd77
676e72f
880bd77
 
 
 
 
676e72f
880bd77
676e72f
880bd77
676e72f
880bd77
 
 
 
 
a95b8e3
170f624
880bd77
 
 
676e72f
880bd77
 
 
a95b8e3
0a35ca9
880bd77
 
a95b8e3
170f624
f66f708
880bd77
98b7999
880bd77
c99e844
569a26f
c2b8ffb
880bd77

import json
import streamlit as st
from google.oauth2 import service_account
from google.cloud import language_v1
import pandas as pd

# Function to generate Google Search link for MID
def query_knowledge_graph(entity_id):
    try:
        google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
        st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
    except Exception as e:
        st.write(f"An error occurred: {e}")

# Function to serialize metadata
def serialize_entity_metadata(metadata):
    return {k: str(v) for k, v in metadata.items()}

# Count Google Entities (those with /g/ or /m/ mids)
def count_google_entities(entities):
    return sum(
        1 for entity in entities
        if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
    )

# Export all entities, regardless of mid
def export_entities(entities):
    entity_list = []
    for entity in entities:
        metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
        mid = metadata.get('mid', '')
        entity_info = {
            "Name": entity.name,
            "Type": language_v1.Entity.Type(entity.type_).name,
            "Salience Score": entity.salience,
            "MID": mid,
            "Metadata": metadata,
            "Mentions": [mention.text.content for mention in entity.mentions]
        }
        entity_list.append(entity_info)

    if not entity_list:
        st.write("No entities found to export.")
        return

    df = pd.DataFrame(entity_list)
    st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")

    json_data = json.dumps(entity_list, indent=2)
    st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")

# Sidebar
st.sidebar.title("About This Tool")
st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
st.sidebar.markdown("### How to Use")
st.sidebar.markdown("""
1. **Enter text** in the box below.
2. **Click Analyze** to detect entities.
3. **Export** results to CSV or JSON.
""")

# Header
st.title("Google Cloud NLP Entity Analyzer")
st.write("Analyze text and extract all entities, including those without Google metadata (MID).")

# NLP Analysis Logic
def analyze_entities(text_content):
    service_account_info = json.loads(st.secrets["google_nlp"])
    credentials = service_account.Credentials.from_service_account_info(
        service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    
    client = language_v1.LanguageServiceClient(credentials=credentials)
    document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
    entities = response.entities

    total_entities = len(entities)
    google_entities = count_google_entities(entities)

    if google_entities == 0:
        st.markdown(f"### Found {total_entities} entities — no Google-linked (MID) entities found.")
    else:
        st.markdown(f"### Found {total_entities} entities — {google_entities} Google-linked entities with MID.")

    st.write("---")

    for i, entity in enumerate(entities):
        st.write(f"**Entity {i+1} of {total_entities}**")
        st.write(f"**Name:** {entity.name}")
        st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}")
        st.write(f"**Salience Score:** {entity.salience:.4f}")

        if entity.metadata:
            st.write("**Metadata:**")
            st.json(entity.metadata)

            if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
                query_knowledge_graph(entity.metadata['mid'])
        else:
            st.write("_No metadata available_")

        if entity.mentions:
            st.write(f"**Mentions ({len(entity.mentions)}):**")
            st.write([mention.text.content for mention in entity.mentions])

        st.write("---")

    export_entities(entities)

# Text Input
user_input = st.text_area("Enter text to analyze")

if st.button("Analyze"):
    if user_input.strip():
        analyze_entities(user_input)
    else:
        st.warning("Please enter some text before clicking Analyze.")