Spaces:
Sleeping
Sleeping
File size: 5,510 Bytes
564ce0c 569a26f 564ce0c 676e72f 98b7999 8deafd3 ccdf079 8deafd3 e5f77a9 aa6444a e5f77a9 aa6444a 676e72f 3fec030 82bcffd 2df8094 98b7999 2df8094 98b7999 c077e58 170f624 c077e58 170f624 98b7999 c077e58 3c9b732 c077e58 98b7999 3fec030 170f624 c077e58 170f624 c077e58 54723ff c077e58 3fec030 676e72f 170f624 a95b8e3 170f624 d57d7e1 170f624 a95b8e3 676e72f 2534d93 8deafd3 a95b8e3 0a35ca9 c226cf4 2be4187 0ca0300 a95b8e3 170f624 f66f708 98b7999 676e72f 98b7999 c2b8ffb c99e844 569a26f c2b8ffb 170f624 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import json
import streamlit as st
from google.oauth2 import service_account
from google.cloud import language_v1
import urllib.parse
import urllib.request
import pandas as pd
# Function to query Google's Knowledge Graph API
def query_knowledge_graph(entity_id):
try:
google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
except Exception as e:
st.write(f"An error occurred: {e}")
# Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
def count_entities(entities):
count = 0
for entity in entities:
metadata = entity.metadata
if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
count += 1
return count
# Function to serialize entity metadata
def serialize_entity_metadata(metadata):
return {k: str(v) for k, v in metadata.items()}
# Function to export entities as a JSON or CSV file, only exporting entities with 'mid' in their metadata
def export_entities(entities):
entity_list = []
for entity in entities:
# Check if entity has 'mid' in its metadata and if it contains '/g/' or '/m/' in 'mid'
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
entity_info = {
"Name": entity.name,
"Type": language_v1.Entity.Type(entity.type_).name,
"Salience Score": entity.salience,
"Metadata": serialize_entity_metadata(entity.metadata),
"Mentions": [mention.text.content for mention in entity.mentions]
}
entity_list.append(entity_info)
# If there are no entities to export, notify the user
if not entity_list:
st.write("No entities with a valid 'mid' found to export.")
return
# Convert to DataFrame for easier export as CSV
df = pd.DataFrame(entity_list)
# Export as CSV
csv = df.to_csv(index=False)
st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")
# Export as JSON
json_data = json.dumps(entity_list, indent=2)
st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
# Sidebar content
st.sidebar.title("About This Tool")
st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
st.sidebar.markdown("### Step-by-Step Guide")
st.sidebar.markdown("""
1. **Open the Tool**: Navigate to the URL where the tool is hosted.
2. **User Input**: Enter the text you want to analyze.
3. **Analyze**: Click the 'Analyze' button.
4. **View Results**: See the identified entities and their details.
5. **Export Entities**: Export the entities as JSON or CSV.
""")
# Header and intro
st.title("Google Cloud NLP Entity Analyzer")
st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
st.write("Entity salience scores are always relative to the analyzed text.")
def sample_analyze_entities(text_content):
service_account_info = json.loads(st.secrets["google_nlp"])
credentials = service_account.Credentials.from_service_account_info(
service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
client = language_v1.LanguageServiceClient(credentials=credentials)
document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
encoding_type = language_v1.EncodingType.UTF8
response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
# Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
entity_count = count_entities(response.entities)
if entity_count == 0:
st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities")
st.write("---")
elif entity_count == 1:
st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity")
st.write("---")
else:
st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities")
st.write("---")
for i, entity in enumerate(response.entities):
st.write(f"Entity {i+1} of {len(response.entities)}")
st.write(f"Name: {entity.name}")
st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
st.write(f"Salience Score: {entity.salience}")
if entity.metadata:
st.write("Metadata:")
st.write(entity.metadata)
if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
entity_id = entity.metadata['mid']
query_knowledge_graph(entity_id)
if entity.mentions:
mention_count = len(entity.mentions)
plural = "s" if mention_count > 1 else ""
st.write(f"Mentions: {mention_count} mention{plural}")
st.write("Raw Array:")
st.write(entity.mentions)
st.write("---")
# Add the export functionality
export_entities(response.entities)
# User input for text analysis
user_input = st.text_area("Enter text to analyze")
if st.button("Analyze"):
if user_input:
sample_analyze_entities(user_input)
|