File size: 4,481 Bytes
564ce0c 569a26f 564ce0c 98b7999 8deafd3 880bd77 8deafd3 e5f77a9 aa6444a e5f77a9 aa6444a 880bd77 82bcffd 880bd77 98b7999 880bd77 2df8094 880bd77 2df8094 880bd77 98b7999 880bd77 98b7999 880bd77 c077e58 880bd77 c077e58 880bd77 c077e58 3c9b732 880bd77 c077e58 880bd77 3fec030 880bd77 c077e58 170f624 c077e58 54723ff c077e58 880bd77 676e72f 880bd77 676e72f 880bd77 676e72f 880bd77 676e72f 880bd77 a95b8e3 170f624 880bd77 676e72f 880bd77 a95b8e3 0a35ca9 880bd77 a95b8e3 170f624 f66f708 880bd77 98b7999 880bd77 c99e844 569a26f c2b8ffb 880bd77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import json
import streamlit as st
from google.oauth2 import service_account
from google.cloud import language_v1
import pandas as pd
# Function to generate Google Search link for MID
def query_knowledge_graph(entity_id):
try:
google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
except Exception as e:
st.write(f"An error occurred: {e}")
# Function to serialize metadata
def serialize_entity_metadata(metadata):
return {k: str(v) for k, v in metadata.items()}
# Count Google Entities (those with /g/ or /m/ mids)
def count_google_entities(entities):
return sum(
1 for entity in entities
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
)
# Export all entities, regardless of mid
def export_entities(entities):
entity_list = []
for entity in entities:
metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
mid = metadata.get('mid', '')
entity_info = {
"Name": entity.name,
"Type": language_v1.Entity.Type(entity.type_).name,
"Salience Score": entity.salience,
"MID": mid,
"Metadata": metadata,
"Mentions": [mention.text.content for mention in entity.mentions]
}
entity_list.append(entity_info)
if not entity_list:
st.write("No entities found to export.")
return
df = pd.DataFrame(entity_list)
st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")
json_data = json.dumps(entity_list, indent=2)
st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
# Sidebar
st.sidebar.title("About This Tool")
st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
st.sidebar.markdown("### How to Use")
st.sidebar.markdown("""
1. **Enter text** in the box below.
2. **Click Analyze** to detect entities.
3. **Export** results to CSV or JSON.
""")
# Header
st.title("Google Cloud NLP Entity Analyzer")
st.write("Analyze text and extract all entities, including those without Google metadata (MID).")
# NLP Analysis Logic
def analyze_entities(text_content):
service_account_info = json.loads(st.secrets["google_nlp"])
credentials = service_account.Credentials.from_service_account_info(
service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
client = language_v1.LanguageServiceClient(credentials=credentials)
document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
encoding_type = language_v1.EncodingType.UTF8
response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
entities = response.entities
total_entities = len(entities)
google_entities = count_google_entities(entities)
if google_entities == 0:
st.markdown(f"### Found {total_entities} entities β no Google-linked (MID) entities found.")
else:
st.markdown(f"### Found {total_entities} entities β {google_entities} Google-linked entities with MID.")
st.write("---")
for i, entity in enumerate(entities):
st.write(f"**Entity {i+1} of {total_entities}**")
st.write(f"**Name:** {entity.name}")
st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}")
st.write(f"**Salience Score:** {entity.salience:.4f}")
if entity.metadata:
st.write("**Metadata:**")
st.json(entity.metadata)
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
query_knowledge_graph(entity.metadata['mid'])
else:
st.write("_No metadata available_")
if entity.mentions:
st.write(f"**Mentions ({len(entity.mentions)}):**")
st.write([mention.text.content for mention in entity.mentions])
st.write("---")
export_entities(entities)
# Text Input
user_input = st.text_area("Enter text to analyze")
if st.button("Analyze"):
if user_input.strip():
analyze_entities(user_input)
else:
st.warning("Please enter some text before clicking Analyze.")
|