Spaces:

blazingbunny
/

google-entity-analysis

Running

File size: 3,793 Bytes

564ce0c
569a26f
564ce0c
 
8deafd3
 
 
 
 
e5f77a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564ce0c
0553e73
3fec030
 
 
 
0553e73
3fec030
 
 
c077e58
 
170f624
c077e58
 
 
170f624
 
 
c077e58
 
 
 
f63dcc6
8deafd3
3fec030
170f624
c077e58
 
 
 
170f624
c077e58
170f624
c077e58
 
 
3fec030
0553e73
3fec030
c077e58
0553e73
170f624
bc4e0d2
170f624
 
 
 
 
a95b8e3
170f624
d57d7e1
170f624
a95b8e3
 
2534d93
8deafd3
a95b8e3
0a35ca9
 
a95b8e3
 
170f624
f66f708
c2b8ffb
a95b8e3
569a26f
c2b8ffb
170f624

import json
import streamlit as st
from google.oauth2 import service_account
from google.cloud import language_v1
import urllib.parse
import urllib.request

# Function to query Google's Knowledge Graph API
def query_knowledge_graph(entity_id):
    try:
        service_url = 'https://kgsearch.googleapis.com/v1/entities:search'
        params = {
            'ids': entity_id,
            'limit': 1,
            'indent': True,
            'key': st.secrets["kg_api"],
        }
        url = service_url + '?' + urllib.parse.urlencode(params)
        response = json.loads(urllib.request.urlopen(url).read())
        for element in response['itemListElement']:
            st.write(f"Knowledge Graph name: {element['result']['name']}")
            st.write(f"Knowledge Graph score: {element['resultScore']}")
    except Exception as e:
        st.write(f"An error occurred while querying the Knowledge Graph: {e}")


# Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
def count_entities(entities):
    count = 0
    for entity in entities:
        metadata = entity.metadata
        if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
            count += 1
    return count

# Sidebar content
st.sidebar.title("About This Tool")
st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
st.sidebar.markdown("### Step-by-Step Guide")
st.sidebar.markdown("""
1. **Open the Tool**: Navigate to the URL where the tool is hosted.
2. **User Input**: Enter the text you want to analyze.
3. **Analyze**: Click the 'Analyze' button.
4. **View Results**: See the identified entities and their details.
""")

# Header and intro
st.title("Google Cloud NLP Entity Analyzer")
st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events")
st.write("Entity salience scores are always relative to the analysed text.")

def sample_analyze_entities(text_content):
    service_account_info = json.loads(st.secrets["google_nlp"])
    credentials = service_account.Credentials.from_service_account_info(
        service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    
    client = language_v1.LanguageServiceClient(credentials=credentials)
    document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
    
    # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
    entity_count = count_entities(response.entities)

    st.write(f"We found {len(response.entities)} entities - We found {entity_count} Google Entities")
    st.write("---")

    for i, entity in enumerate(response.entities):
        st.write(f"Entity {i+1} of {len(response.entities)}")
        st.write(f"Name: {entity.name}")
        st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
        st.write(f"Salience Score: {entity.salience}")

        if entity.metadata:
            st.write("Metadata:")
            st.write(entity.metadata)
            
            if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
                entity_id = entity.metadata['mid']
                query_knowledge_graph(entity_id)

        if entity.mentions:
            st.write("Mentions:")
            st.write(', '.join([mention.text.content for mention in entity.mentions]))

        st.write("---")

# User input for text analysis
user_input = st.text_area("Enter text to analyze", max_chars=5000)

if st.button("Analyze"):
    if user_input:
        sample_analyze_entities(user_input)