File size: 5,510 Bytes
564ce0c
569a26f
564ce0c
 
676e72f
 
98b7999
8deafd3
ccdf079
8deafd3
 
e5f77a9
aa6444a
 
e5f77a9
aa6444a
 
676e72f
 
 
 
 
 
 
 
3fec030
82bcffd
 
 
 
2df8094
98b7999
 
 
2df8094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98b7999
 
 
 
 
 
 
 
 
 
 
 
c077e58
 
170f624
c077e58
 
 
170f624
 
 
98b7999
c077e58
3c9b732
c077e58
 
98b7999
 
3fec030
170f624
c077e58
 
 
 
170f624
c077e58
54723ff
c077e58
 
 
3fec030
676e72f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170f624
 
 
a95b8e3
170f624
d57d7e1
170f624
a95b8e3
676e72f
2534d93
8deafd3
a95b8e3
0a35ca9
c226cf4
 
 
2be4187
0ca0300
a95b8e3
170f624
f66f708
98b7999
676e72f
98b7999
c2b8ffb
c99e844
569a26f
c2b8ffb
170f624
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import json
import streamlit as st
from google.oauth2 import service_account
from google.cloud import language_v1
import urllib.parse
import urllib.request
import pandas as pd


# Function to query Google's Knowledge Graph API
def query_knowledge_graph(entity_id):
    try:
        google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
        st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
    except Exception as e:
        st.write(f"An error occurred: {e}")

# Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
def count_entities(entities):
    count = 0
    for entity in entities:
        metadata = entity.metadata
        if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
            count += 1
    return count

# Function to serialize entity metadata
def serialize_entity_metadata(metadata):
    return {k: str(v) for k, v in metadata.items()}

# Function to export entities as a JSON or CSV file, only exporting entities with 'mid' in their metadata
def export_entities(entities):
    entity_list = []
    for entity in entities:
        # Check if entity has 'mid' in its metadata and if it contains '/g/' or '/m/' in 'mid'
        if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
            entity_info = {
                "Name": entity.name,
                "Type": language_v1.Entity.Type(entity.type_).name,
                "Salience Score": entity.salience,
                "Metadata": serialize_entity_metadata(entity.metadata),
                "Mentions": [mention.text.content for mention in entity.mentions]
            }
            entity_list.append(entity_info)
    
    # If there are no entities to export, notify the user
    if not entity_list:
        st.write("No entities with a valid 'mid' found to export.")
        return
    
    # Convert to DataFrame for easier export as CSV
    df = pd.DataFrame(entity_list)
    
    # Export as CSV
    csv = df.to_csv(index=False)
    st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")
    
    # Export as JSON
    json_data = json.dumps(entity_list, indent=2)
    st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")

# Sidebar content
st.sidebar.title("About This Tool")
st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
st.sidebar.markdown("### Step-by-Step Guide")
st.sidebar.markdown("""
1. **Open the Tool**: Navigate to the URL where the tool is hosted.
2. **User Input**: Enter the text you want to analyze.
3. **Analyze**: Click the 'Analyze' button.
4. **View Results**: See the identified entities and their details.
5. **Export Entities**: Export the entities as JSON or CSV.
""")

# Header and intro
st.title("Google Cloud NLP Entity Analyzer")
st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
st.write("Entity salience scores are always relative to the analyzed text.")

def sample_analyze_entities(text_content):
    service_account_info = json.loads(st.secrets["google_nlp"])
    credentials = service_account.Credentials.from_service_account_info(
        service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    
    client = language_v1.LanguageServiceClient(credentials=credentials)
    document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
    encoding_type = language_v1.EncodingType.UTF8

    response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
    
    # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
    entity_count = count_entities(response.entities)

    if entity_count == 0:
        st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities")
        st.write("---")
    elif entity_count == 1:
        st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity")
        st.write("---")
    else:
        st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities")
        st.write("---")


    for i, entity in enumerate(response.entities):
        st.write(f"Entity {i+1} of {len(response.entities)}")
        st.write(f"Name: {entity.name}")
        st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
        st.write(f"Salience Score: {entity.salience}")

        if entity.metadata:
            st.write("Metadata:")
            st.write(entity.metadata)
            
            if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
                entity_id = entity.metadata['mid']
                query_knowledge_graph(entity_id)

        if entity.mentions:
            mention_count = len(entity.mentions)
            plural = "s" if mention_count > 1 else ""
            st.write(f"Mentions: {mention_count} mention{plural}")
            st.write("Raw Array:")
            st.write(entity.mentions)

        st.write("---")

    # Add the export functionality
    export_entities(response.entities)

# User input for text analysis
user_input = st.text_area("Enter text to analyze")

if st.button("Analyze"):
    if user_input:
        sample_analyze_entities(user_input)