|
import json |
|
import streamlit as st |
|
from google.oauth2 import service_account |
|
from google.cloud import language_v1 |
|
import urllib.parse |
|
import urllib.request |
|
import pandas as pd |
|
|
|
|
|
def query_knowledge_graph(entity_id): |
|
try: |
|
google_search_link = f"https://www.google.com/search?kgmid={entity_id}" |
|
st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True) |
|
except Exception as e: |
|
st.write(f"An error occurred: {e}") |
|
|
|
|
|
def count_entities(entities): |
|
count = 0 |
|
for entity in entities: |
|
metadata = entity.metadata |
|
if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']): |
|
count += 1 |
|
return count |
|
|
|
|
|
def serialize_entity_metadata(metadata): |
|
return {k: str(v) for k, v in metadata.items()} |
|
|
|
|
|
def export_entities(entities): |
|
entity_list = [] |
|
for entity in entities: |
|
entity_info = { |
|
"Name": entity.name, |
|
"Type": language_v1.Entity.Type(entity.type_).name, |
|
"Salience Score": entity.salience, |
|
"Metadata": serialize_entity_metadata(entity.metadata), |
|
"Mentions": [mention.text.content for mention in entity.mentions] |
|
} |
|
entity_list.append(entity_info) |
|
|
|
|
|
df = pd.DataFrame(entity_list) |
|
|
|
|
|
csv = df.to_csv(index=False) |
|
st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv") |
|
|
|
|
|
json_data = json.dumps(entity_list, indent=2) |
|
st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json") |
|
|
|
|
|
st.sidebar.title("About This Tool") |
|
st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.") |
|
st.sidebar.markdown("### Step-by-Step Guide") |
|
st.sidebar.markdown(""" |
|
1. **Open the Tool**: Navigate to the URL where the tool is hosted. |
|
2. **User Input**: Enter the text you want to analyze. |
|
3. **Analyze**: Click the 'Analyze' button. |
|
4. **View Results**: See the identified entities and their details. |
|
5. **Export Entities**: Export the entities as JSON or CSV. |
|
""") |
|
|
|
|
|
st.title("Google Cloud NLP Entity Analyzer") |
|
st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.") |
|
st.write("Entity salience scores are always relative to the analyzed text.") |
|
|
|
def sample_analyze_entities(text_content): |
|
service_account_info = json.loads(st.secrets["google_nlp"]) |
|
credentials = service_account.Credentials.from_service_account_info( |
|
service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"] |
|
) |
|
|
|
client = language_v1.LanguageServiceClient(credentials=credentials) |
|
document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"} |
|
encoding_type = language_v1.EncodingType.UTF8 |
|
|
|
response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type}) |
|
|
|
|
|
entity_count = count_entities(response.entities) |
|
|
|
if entity_count == 0: |
|
st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities") |
|
st.write("---") |
|
elif entity_count == 1: |
|
st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity") |
|
st.write("---") |
|
else: |
|
st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities") |
|
st.write("---") |
|
|
|
|
|
for i, entity in enumerate(response.entities): |
|
st.write(f"Entity {i+1} of {len(response.entities)}") |
|
st.write(f"Name: {entity.name}") |
|
st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}") |
|
st.write(f"Salience Score: {entity.salience}") |
|
|
|
if entity.metadata: |
|
st.write("Metadata:") |
|
st.write(entity.metadata) |
|
|
|
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']): |
|
entity_id = entity.metadata['mid'] |
|
query_knowledge_graph(entity_id) |
|
|
|
if entity.mentions: |
|
mention_count = len(entity.mentions) |
|
plural = "s" if mention_count > 1 else "" |
|
st.write(f"Mentions: {mention_count} mention{plural}") |
|
st.write("Raw Array:") |
|
st.write(entity.mentions) |
|
|
|
st.write("---") |
|
|
|
|
|
export_entities(response.entities) |
|
|
|
|
|
user_input = st.text_area("Enter text to analyze") |
|
|
|
if st.button("Analyze"): |
|
if user_input: |
|
sample_analyze_entities(user_input) |
|
|