blazingbunny commited on
Commit
df9da30
·
verified ·
1 Parent(s): 880bd77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -50
app.py CHANGED
@@ -2,9 +2,12 @@ import json
2
  import streamlit as st
3
  from google.oauth2 import service_account
4
  from google.cloud import language_v1
 
 
5
  import pandas as pd
6
 
7
- # Function to generate Google Search link for MID
 
8
  def query_knowledge_graph(entity_id):
9
  try:
10
  google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
@@ -12,28 +15,28 @@ def query_knowledge_graph(entity_id):
12
  except Exception as e:
13
  st.write(f"An error occurred: {e}")
14
 
15
- # Function to serialize metadata
 
 
 
 
 
 
 
 
 
16
  def serialize_entity_metadata(metadata):
17
  return {k: str(v) for k, v in metadata.items()}
18
 
19
- # Count Google Entities (those with /g/ or /m/ mids)
20
- def count_google_entities(entities):
21
- return sum(
22
- 1 for entity in entities
23
- if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
24
- )
25
-
26
- # Export all entities, regardless of mid
27
  def export_entities(entities):
28
  entity_list = []
29
  for entity in entities:
30
  metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
31
- mid = metadata.get('mid', '')
32
  entity_info = {
33
  "Name": entity.name,
34
  "Type": language_v1.Entity.Type(entity.type_).name,
35
  "Salience Score": entity.salience,
36
- "MID": mid,
37
  "Metadata": metadata,
38
  "Mentions": [mention.text.content for mention in entity.mentions]
39
  }
@@ -44,27 +47,33 @@ def export_entities(entities):
44
  return
45
 
46
  df = pd.DataFrame(entity_list)
47
- st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")
48
 
 
 
 
 
 
49
  json_data = json.dumps(entity_list, indent=2)
50
  st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
51
 
52
- # Sidebar
53
  st.sidebar.title("About This Tool")
54
- st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
55
- st.sidebar.markdown("### How to Use")
56
  st.sidebar.markdown("""
57
- 1. **Enter text** in the box below.
58
- 2. **Click Analyze** to detect entities.
59
- 3. **Export** results to CSV or JSON.
 
 
60
  """)
61
 
62
- # Header
63
  st.title("Google Cloud NLP Entity Analyzer")
64
- st.write("Analyze text and extract all entities, including those without Google metadata (MID).")
 
65
 
66
- # NLP Analysis Logic
67
- def analyze_entities(text_content):
68
  service_account_info = json.loads(st.secrets["google_nlp"])
69
  credentials = service_account.Credentials.from_service_account_info(
70
  service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
@@ -75,46 +84,50 @@ def analyze_entities(text_content):
75
  encoding_type = language_v1.EncodingType.UTF8
76
 
77
  response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
78
- entities = response.entities
79
-
80
- total_entities = len(entities)
81
- google_entities = count_google_entities(entities)
82
 
83
- if google_entities == 0:
84
- st.markdown(f"### Found {total_entities} entities no Google-linked (MID) entities found.")
 
 
 
 
85
  else:
86
- st.markdown(f"### Found {total_entities} entities {google_entities} Google-linked entities with MID.")
 
87
 
88
- st.write("---")
89
 
90
- for i, entity in enumerate(entities):
91
- st.write(f"**Entity {i+1} of {total_entities}**")
92
- st.write(f"**Name:** {entity.name}")
93
- st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}")
94
- st.write(f"**Salience Score:** {entity.salience:.4f}")
95
 
96
  if entity.metadata:
97
- st.write("**Metadata:**")
98
- st.json(entity.metadata)
99
-
100
  if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
101
- query_knowledge_graph(entity.metadata['mid'])
102
- else:
103
- st.write("_No metadata available_")
104
 
105
  if entity.mentions:
106
- st.write(f"**Mentions ({len(entity.mentions)}):**")
107
- st.write([mention.text.content for mention in entity.mentions])
 
 
 
108
 
109
  st.write("---")
110
 
111
- export_entities(entities)
 
112
 
113
- # Text Input
114
  user_input = st.text_area("Enter text to analyze")
115
 
116
  if st.button("Analyze"):
117
- if user_input.strip():
118
- analyze_entities(user_input)
119
- else:
120
- st.warning("Please enter some text before clicking Analyze.")
 
2
  import streamlit as st
3
  from google.oauth2 import service_account
4
  from google.cloud import language_v1
5
+ import urllib.parse
6
+ import urllib.request
7
  import pandas as pd
8
 
9
+
10
+ # Function to query Google's Knowledge Graph API
11
  def query_knowledge_graph(entity_id):
12
  try:
13
  google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
 
15
  except Exception as e:
16
  st.write(f"An error occurred: {e}")
17
 
18
+ # Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
19
+ def count_entities(entities):
20
+ count = 0
21
+ for entity in entities:
22
+ metadata = entity.metadata
23
+ if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
24
+ count += 1
25
+ return count
26
+
27
+ # Function to serialize entity metadata
28
  def serialize_entity_metadata(metadata):
29
  return {k: str(v) for k, v in metadata.items()}
30
 
31
+ # Function to export all entities, including those without metadata
 
 
 
 
 
 
 
32
  def export_entities(entities):
33
  entity_list = []
34
  for entity in entities:
35
  metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
 
36
  entity_info = {
37
  "Name": entity.name,
38
  "Type": language_v1.Entity.Type(entity.type_).name,
39
  "Salience Score": entity.salience,
 
40
  "Metadata": metadata,
41
  "Mentions": [mention.text.content for mention in entity.mentions]
42
  }
 
47
  return
48
 
49
  df = pd.DataFrame(entity_list)
 
50
 
51
+ # Export as CSV
52
+ csv = df.to_csv(index=False)
53
+ st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")
54
+
55
+ # Export as JSON
56
  json_data = json.dumps(entity_list, indent=2)
57
  st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
58
 
59
+ # Sidebar content
60
  st.sidebar.title("About This Tool")
61
+ st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
62
+ st.sidebar.markdown("### Step-by-Step Guide")
63
  st.sidebar.markdown("""
64
+ 1. **Open the Tool**: Navigate to the URL where the tool is hosted.
65
+ 2. **User Input**: Enter the text you want to analyze.
66
+ 3. **Analyze**: Click the 'Analyze' button.
67
+ 4. **View Results**: See the identified entities and their details.
68
+ 5. **Export Entities**: Export the entities as JSON or CSV.
69
  """)
70
 
71
+ # Header and intro
72
  st.title("Google Cloud NLP Entity Analyzer")
73
+ st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
74
+ st.write("Entity salience scores are always relative to the analyzed text.")
75
 
76
+ def sample_analyze_entities(text_content):
 
77
  service_account_info = json.loads(st.secrets["google_nlp"])
78
  credentials = service_account.Credentials.from_service_account_info(
79
  service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
 
84
  encoding_type = language_v1.EncodingType.UTF8
85
 
86
  response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
87
+
88
+ # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
89
+ entity_count = count_entities(response.entities)
 
90
 
91
+ if entity_count == 0:
92
+ st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities")
93
+ st.write("---")
94
+ elif entity_count == 1:
95
+ st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity")
96
+ st.write("---")
97
  else:
98
+ st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities")
99
+ st.write("---")
100
 
 
101
 
102
+ for i, entity in enumerate(response.entities):
103
+ st.write(f"Entity {i+1} of {len(response.entities)}")
104
+ st.write(f"Name: {entity.name}")
105
+ st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
106
+ st.write(f"Salience Score: {entity.salience}")
107
 
108
  if entity.metadata:
109
+ st.write("Metadata:")
110
+ st.write(entity.metadata)
111
+
112
  if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
113
+ entity_id = entity.metadata['mid']
114
+ query_knowledge_graph(entity_id)
 
115
 
116
  if entity.mentions:
117
+ mention_count = len(entity.mentions)
118
+ plural = "s" if mention_count > 1 else ""
119
+ st.write(f"Mentions: {mention_count} mention{plural}")
120
+ st.write("Raw Array:")
121
+ st.write(entity.mentions)
122
 
123
  st.write("---")
124
 
125
+ # Add the export functionality
126
+ export_entities(response.entities)
127
 
128
+ # User input for text analysis
129
  user_input = st.text_area("Enter text to analyze")
130
 
131
  if st.button("Analyze"):
132
+ if user_input:
133
+ sample_analyze_entities(user_input)