import json import streamlit as st from google.oauth2 import service_account from google.cloud import language_v1 import pandas as pd # Function to generate Google Search link for MID def query_knowledge_graph(entity_id): try: google_search_link = f"https://www.google.com/search?kgmid={entity_id}" st.markdown(f'[Open in Google Search]({google_search_link})', unsafe_allow_html=True) except Exception as e: st.write(f"An error occurred: {e}") # Function to serialize metadata def serialize_entity_metadata(metadata): return {k: str(v) for k, v in metadata.items()} # Count Google Entities (those with /g/ or /m/ mids) def count_google_entities(entities): return sum( 1 for entity in entities if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']) ) # Export all entities, regardless of mid def export_entities(entities): entity_list = [] for entity in entities: metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {} mid = metadata.get('mid', '') entity_info = { "Name": entity.name, "Type": language_v1.Entity.Type(entity.type_).name, "Salience Score": entity.salience, "MID": mid, "Metadata": metadata, "Mentions": [mention.text.content for mention in entity.mentions] } entity_list.append(entity_info) if not entity_list: st.write("No entities found to export.") return df = pd.DataFrame(entity_list) st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv") json_data = json.dumps(entity_list, indent=2) st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json") # Sidebar st.sidebar.title("About This Tool") st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.") st.sidebar.markdown("### How to Use") st.sidebar.markdown(""" 1. **Enter text** in the box below. 2. **Click Analyze** to detect entities. 3. **Export** results to CSV or JSON. """) # Header st.title("Google Cloud NLP Entity Analyzer") st.write("Analyze text and extract all entities, including those without Google metadata (MID).") # NLP Analysis Logic def analyze_entities(text_content): service_account_info = json.loads(st.secrets["google_nlp"]) credentials = service_account.Credentials.from_service_account_info( service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"] ) client = language_v1.LanguageServiceClient(credentials=credentials) document = {"content": text_content, "type_": language_v1.Document.Type.PLAIN_TEXT, "language": "en"} encoding_type = language_v1.EncodingType.UTF8 response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type}) entities = response.entities total_entities = len(entities) google_entities = count_google_entities(entities) if google_entities == 0: st.markdown(f"### Found {total_entities} entities — no Google-linked (MID) entities found.") else: st.markdown(f"### Found {total_entities} entities — {google_entities} Google-linked entities with MID.") st.write("---") for i, entity in enumerate(entities): st.write(f"**Entity {i+1} of {total_entities}**") st.write(f"**Name:** {entity.name}") st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}") st.write(f"**Salience Score:** {entity.salience:.4f}") if entity.metadata: st.write("**Metadata:**") st.json(entity.metadata) if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']): query_knowledge_graph(entity.metadata['mid']) else: st.write("_No metadata available_") if entity.mentions: st.write(f"**Mentions ({len(entity.mentions)}):**") st.write([mention.text.content for mention in entity.mentions]) st.write("---") export_entities(entities) # Text Input user_input = st.text_area("Enter text to analyze") if st.button("Analyze"): if user_input.strip(): analyze_entities(user_input) else: st.warning("Please enter some text before clicking Analyze.")