Spaces:

blazingbunny
/

google-entity-analysis

Running

App Files Files Community

google-entity-analysis / app.py

blazingbunny

Update app.py

880bd77 verified 6 days ago

raw

history blame

4.48 kB

	import json
	import streamlit as st
	from google.oauth2 import service_account
	from google.cloud import language_v1
	import pandas as pd

	# Function to generate Google Search link for MID
	def query_knowledge_graph(entity_id):
	try:
	google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
	st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True)
	except Exception as e:
	st.write(f"An error occurred: {e}")

	# Function to serialize metadata
	def serialize_entity_metadata(metadata):
	return {k: str(v) for k, v in metadata.items()}

	# Count Google Entities (those with /g/ or /m/ mids)
	def count_google_entities(entities):
	return sum(
	1 for entity in entities
	if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
	)

	# Export all entities, regardless of mid
	def export_entities(entities):
	entity_list = []
	for entity in entities:
	metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
	mid = metadata.get('mid', '')
	entity_info = {
	"Name": entity.name,
	"Type": language_v1.Entity.Type(entity.type_).name,
	"Salience Score": entity.salience,
	"MID": mid,
	"Metadata": metadata,
	"Mentions": [mention.text.content for mention in entity.mentions]
	}
	entity_list.append(entity_info)

	if not entity_list:
	st.write("No entities found to export.")
	return

	df = pd.DataFrame(entity_list)
	st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")

	json_data = json.dumps(entity_list, indent=2)
	st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")

	# Sidebar
	st.sidebar.title("About This Tool")
	st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
	st.sidebar.markdown("### How to Use")
	st.sidebar.markdown("""
	1. Enter text in the box below.
	2. Click Analyze to detect entities.
	3. Export results to CSV or JSON.
	""")

	# Header
	st.title("Google Cloud NLP Entity Analyzer")
	st.write("Analyze text and extract all entities, including those without Google metadata (MID).")

	# NLP Analysis Logic
	def analyze_entities(text_content):
	service_account_info = json.loads(st.secrets["google_nlp"])
	credentials = service_account.Credentials.from_service_account_info(
	service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
	)

	client = language_v1.LanguageServiceClient(credentials=credentials)
	document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"}
	encoding_type = language_v1.EncodingType.UTF8

	response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
	entities = response.entities

	total_entities = len(entities)
	google_entities = count_google_entities(entities)

	if google_entities == 0:
	st.markdown(f"### Found {total_entities} entities — no Google-linked (MID) entities found.")
	else:
	st.markdown(f"### Found {total_entities} entities — {google_entities} Google-linked entities with MID.")

	st.write("---")

	for i, entity in enumerate(entities):
	st.write(f"Entity {i+1} of {total_entities}")
	st.write(f"Name: {entity.name}")
	st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
	st.write(f"Salience Score: {entity.salience:.4f}")

	if entity.metadata:
	st.write("Metadata:")
	st.json(entity.metadata)

	if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
	query_knowledge_graph(entity.metadata['mid'])
	else:
	st.write("_No metadata available_")

	if entity.mentions:
	st.write(f"Mentions ({len(entity.mentions)}):")
	st.write([mention.text.content for mention in entity.mentions])

	st.write("---")

	export_entities(entities)

	# Text Input
	user_input = st.text_area("Enter text to analyze")

	if st.button("Analyze"):
	if user_input.strip():
	analyze_entities(user_input)
	else:
	st.warning("Please enter some text before clicking Analyze.")