Spaces:

blazingbunny
/

google-entity-analysis

Running

App Files Files Community

google-entity-analysis / app.py

blazingbunny

Update app.py

ad1dcf0 verified 10 months ago

raw

history blame

4.45 kB

	import json
	import streamlit as st
	from google.oauth2 import service_account
	from google.cloud import language_v1
	import pandas as pd

	# Function to query Google's Knowledge Graph API
	def query_knowledge_graph(entity_id):
	try:
	google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
	st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
	except Exception as e:
	st.write(f"An error occurred: {e}")

	# Function to filter entities with "mid" in their metadata
	def filter_entities_with_mid(entities):
	return [entity for entity in entities if 'mid' in entity.metadata]

	# Function to serialize entity metadata
	def serialize_entity_metadata(metadata):
	return {k: str(v) for k, v in metadata.items()}

	# Function to export entities as a JSON or CSV file
	def export_entities(entities):
	entity_list = []
	for entity in entities:
	entity_info = {
	"Name": entity.name,
	"Type": language_v1.Entity.Type(entity.type_).name,
	"Salience Score": entity.salience,
	"Metadata": serialize_entity_metadata(entity.metadata),
	"Mentions": [mention.text.content for mention in entity.mentions]
	}
	entity_list.append(entity_info)

	# Convert to DataFrame for easier export as CSV
	df = pd.DataFrame(entity_list)

	# Export as CSV
	csv = df.to_csv(index=False)
	st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")

	# Export as JSON
	json_data = json.dumps(entity_list, indent=2)
	st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")

	# Sidebar content
	st.sidebar.title("About This Tool")
	st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
	st.sidebar.markdown("### Step-by-Step Guide")
	st.sidebar.markdown("""
	1. Open the Tool: Navigate to the URL where the tool is hosted.
	2. User Input: Enter the text you want to analyze.
	3. Analyze: Click the 'Analyze' button.
	4. View Results: See the identified entities and their details.
	5. Export Entities: Export the entities as JSON or CSV.
	""")

	# Header and intro
	st.title("Google Cloud NLP Entity Analyzer")
	st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
	st.write("Entity salience scores are always relative to the analyzed text.")

	def sample_analyze_entities(text_content):
	service_account_info = json.loads(st.secrets["google_nlp"])
	credentials = service_account.Credentials.from_service_account_info(
	service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
	)

	client = language_v1.LanguageServiceClient(credentials=credentials)
	document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
	encoding_type = language_v1.EncodingType.UTF8

	response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})

	# Filter entities that have a "mid" in their metadata
	entities_with_mid = filter_entities_with_mid(response.entities)

	st.markdown(f"# We found {len(entities_with_mid)} entities with 'mid' in their metadata")
	st.write("---")

	for i, entity in enumerate(entities_with_mid):
	st.write(f"Entity {i+1} of {len(entities_with_mid)}")
	st.write(f"Name: {entity.name}")
	st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
	st.write(f"Salience Score: {entity.salience}")

	if entity.metadata:
	st.write("Metadata:")
	st.write(entity.metadata)

	if 'mid' in entity.metadata:
	entity_id = entity.metadata['mid']
	query_knowledge_graph(entity_id)

	if entity.mentions:
	mention_count = len(entity.mentions)
	plural = "s" if mention_count > 1 else ""
	st.write(f"Mentions: {mention_count} mention{plural}")
	st.write("Raw Array:")
	st.write(entity.mentions)

	st.write("---")

	# Add the export functionality
	export_entities(entities_with_mid)

	# User input for text analysis
	user_input = st.text_area("Enter text to analyze")

	if st.button("Analyze"):
	if user_input:
	sample_analyze_entities(user_input)