blazingbunny commited on
Commit
880bd77
·
verified ·
1 Parent(s): 2df8094

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -79
app.py CHANGED
@@ -2,12 +2,9 @@ import json
2
  import streamlit as st
3
  from google.oauth2 import service_account
4
  from google.cloud import language_v1
5
- import urllib.parse
6
- import urllib.request
7
  import pandas as pd
8
 
9
-
10
- # Function to query Google's Knowledge Graph API
11
  def query_knowledge_graph(entity_id):
12
  try:
13
  google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
@@ -15,68 +12,59 @@ def query_knowledge_graph(entity_id):
15
  except Exception as e:
16
  st.write(f"An error occurred: {e}")
17
 
18
- # Function to count entities with 'mid' that contains '/g/' or '/m/' in their metadata
19
- def count_entities(entities):
20
- count = 0
21
- for entity in entities:
22
- metadata = entity.metadata
23
- if 'mid' in metadata and ('/g/' in metadata['mid'] or '/m/' in metadata['mid']):
24
- count += 1
25
- return count
26
-
27
- # Function to serialize entity metadata
28
  def serialize_entity_metadata(metadata):
29
  return {k: str(v) for k, v in metadata.items()}
30
 
31
- # Function to export entities as a JSON or CSV file, only exporting entities with 'mid' in their metadata
 
 
 
 
 
 
 
32
  def export_entities(entities):
33
  entity_list = []
34
  for entity in entities:
35
- # Check if entity has 'mid' in its metadata and if it contains '/g/' or '/m/' in 'mid'
36
- if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
37
- entity_info = {
38
- "Name": entity.name,
39
- "Type": language_v1.Entity.Type(entity.type_).name,
40
- "Salience Score": entity.salience,
41
- "Metadata": serialize_entity_metadata(entity.metadata),
42
- "Mentions": [mention.text.content for mention in entity.mentions]
43
- }
44
- entity_list.append(entity_info)
45
-
46
- # If there are no entities to export, notify the user
47
  if not entity_list:
48
- st.write("No entities with a valid 'mid' found to export.")
49
  return
50
-
51
- # Convert to DataFrame for easier export as CSV
52
  df = pd.DataFrame(entity_list)
53
-
54
- # Export as CSV
55
- csv = df.to_csv(index=False)
56
- st.download_button(label="Export Entities as CSV", data=csv, file_name="entities.csv", mime="text/csv")
57
-
58
- # Export as JSON
59
  json_data = json.dumps(entity_list, indent=2)
60
  st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
61
 
62
- # Sidebar content
63
  st.sidebar.title("About This Tool")
64
- st.sidebar.markdown("This tool leverages Google's NLP technology for entity analysis.")
65
- st.sidebar.markdown("### Step-by-Step Guide")
66
  st.sidebar.markdown("""
67
- 1. **Open the Tool**: Navigate to the URL where the tool is hosted.
68
- 2. **User Input**: Enter the text you want to analyze.
69
- 3. **Analyze**: Click the 'Analyze' button.
70
- 4. **View Results**: See the identified entities and their details.
71
- 5. **Export Entities**: Export the entities as JSON or CSV.
72
  """)
73
 
74
- # Header and intro
75
  st.title("Google Cloud NLP Entity Analyzer")
76
- st.write("This tool analyzes text to identify entities such as people, locations, organizations, and events.")
77
- st.write("Entity salience scores are always relative to the analyzed text.")
78
 
79
- def sample_analyze_entities(text_content):
 
80
  service_account_info = json.loads(st.secrets["google_nlp"])
81
  credentials = service_account.Credentials.from_service_account_info(
82
  service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
@@ -87,50 +75,46 @@ def sample_analyze_entities(text_content):
87
  encoding_type = language_v1.EncodingType.UTF8
88
 
89
  response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
90
-
91
- # Count the entities with 'mid' and either '/g/' or '/m/' in their metadata
92
- entity_count = count_entities(response.entities)
93
 
94
- if entity_count == 0:
95
- st.markdown(f"# We found {len(response.entities)} entities - but found no Google Entities")
96
- st.write("---")
97
- elif entity_count == 1:
98
- st.markdown(f"# We found {len(response.entities)} entities - and found 1 Google Entity")
99
- st.write("---")
100
  else:
101
- st.markdown(f"# We found {len(response.entities)} entities - and found {entity_count} Google Entities")
102
- st.write("---")
103
 
 
104
 
105
- for i, entity in enumerate(response.entities):
106
- st.write(f"Entity {i+1} of {len(response.entities)}")
107
- st.write(f"Name: {entity.name}")
108
- st.write(f"Type: {language_v1.Entity.Type(entity.type_).name}")
109
- st.write(f"Salience Score: {entity.salience}")
110
 
111
  if entity.metadata:
112
- st.write("Metadata:")
113
- st.write(entity.metadata)
114
-
115
  if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
116
- entity_id = entity.metadata['mid']
117
- query_knowledge_graph(entity_id)
 
118
 
119
  if entity.mentions:
120
- mention_count = len(entity.mentions)
121
- plural = "s" if mention_count > 1 else ""
122
- st.write(f"Mentions: {mention_count} mention{plural}")
123
- st.write("Raw Array:")
124
- st.write(entity.mentions)
125
 
126
  st.write("---")
127
 
128
- # Add the export functionality
129
- export_entities(response.entities)
130
 
131
- # User input for text analysis
132
  user_input = st.text_area("Enter text to analyze")
133
 
134
  if st.button("Analyze"):
135
- if user_input:
136
- sample_analyze_entities(user_input)
 
 
 
2
  import streamlit as st
3
  from google.oauth2 import service_account
4
  from google.cloud import language_v1
 
 
5
  import pandas as pd
6
 
7
+ # Function to generate Google Search link for MID
 
8
  def query_knowledge_graph(entity_id):
9
  try:
10
  google_search_link = f"https://www.google.com/search?kgmid={entity_id}"
 
12
  except Exception as e:
13
  st.write(f"An error occurred: {e}")
14
 
15
+ # Function to serialize metadata
 
 
 
 
 
 
 
 
 
16
  def serialize_entity_metadata(metadata):
17
  return {k: str(v) for k, v in metadata.items()}
18
 
19
+ # Count Google Entities (those with /g/ or /m/ mids)
20
+ def count_google_entities(entities):
21
+ return sum(
22
+ 1 for entity in entities
23
+ if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid'])
24
+ )
25
+
26
+ # Export all entities, regardless of mid
27
  def export_entities(entities):
28
  entity_list = []
29
  for entity in entities:
30
+ metadata = serialize_entity_metadata(entity.metadata) if entity.metadata else {}
31
+ mid = metadata.get('mid', '')
32
+ entity_info = {
33
+ "Name": entity.name,
34
+ "Type": language_v1.Entity.Type(entity.type_).name,
35
+ "Salience Score": entity.salience,
36
+ "MID": mid,
37
+ "Metadata": metadata,
38
+ "Mentions": [mention.text.content for mention in entity.mentions]
39
+ }
40
+ entity_list.append(entity_info)
41
+
42
  if not entity_list:
43
+ st.write("No entities found to export.")
44
  return
45
+
 
46
  df = pd.DataFrame(entity_list)
47
+ st.download_button(label="Export Entities as CSV", data=df.to_csv(index=False), file_name="entities.csv", mime="text/csv")
48
+
 
 
 
 
49
  json_data = json.dumps(entity_list, indent=2)
50
  st.download_button(label="Export Entities as JSON", data=json_data, file_name="entities.json", mime="application/json")
51
 
52
+ # Sidebar
53
  st.sidebar.title("About This Tool")
54
+ st.sidebar.markdown("This tool uses Google Cloud Natural Language API to identify entities.")
55
+ st.sidebar.markdown("### How to Use")
56
  st.sidebar.markdown("""
57
+ 1. **Enter text** in the box below.
58
+ 2. **Click Analyze** to detect entities.
59
+ 3. **Export** results to CSV or JSON.
 
 
60
  """)
61
 
62
+ # Header
63
  st.title("Google Cloud NLP Entity Analyzer")
64
+ st.write("Analyze text and extract all entities, including those without Google metadata (MID).")
 
65
 
66
+ # NLP Analysis Logic
67
+ def analyze_entities(text_content):
68
  service_account_info = json.loads(st.secrets["google_nlp"])
69
  credentials = service_account.Credentials.from_service_account_info(
70
  service_account_info, scopes=["https://www.googleapis.com/auth/cloud-platform"]
 
75
  encoding_type = language_v1.EncodingType.UTF8
76
 
77
  response = client.analyze_entities(request={"document": document, "encoding_type": encoding_type})
78
+ entities = response.entities
 
 
79
 
80
+ total_entities = len(entities)
81
+ google_entities = count_google_entities(entities)
82
+
83
+ if google_entities == 0:
84
+ st.markdown(f"### Found {total_entities} entities no Google-linked (MID) entities found.")
 
85
  else:
86
+ st.markdown(f"### Found {total_entities} entities {google_entities} Google-linked entities with MID.")
 
87
 
88
+ st.write("---")
89
 
90
+ for i, entity in enumerate(entities):
91
+ st.write(f"**Entity {i+1} of {total_entities}**")
92
+ st.write(f"**Name:** {entity.name}")
93
+ st.write(f"**Type:** {language_v1.Entity.Type(entity.type_).name}")
94
+ st.write(f"**Salience Score:** {entity.salience:.4f}")
95
 
96
  if entity.metadata:
97
+ st.write("**Metadata:**")
98
+ st.json(entity.metadata)
99
+
100
  if 'mid' in entity.metadata and ('/g/' in entity.metadata['mid'] or '/m/' in entity.metadata['mid']):
101
+ query_knowledge_graph(entity.metadata['mid'])
102
+ else:
103
+ st.write("_No metadata available_")
104
 
105
  if entity.mentions:
106
+ st.write(f"**Mentions ({len(entity.mentions)}):**")
107
+ st.write([mention.text.content for mention in entity.mentions])
 
 
 
108
 
109
  st.write("---")
110
 
111
+ export_entities(entities)
 
112
 
113
+ # Text Input
114
  user_input = st.text_area("Enter text to analyze")
115
 
116
  if st.button("Analyze"):
117
+ if user_input.strip():
118
+ analyze_entities(user_input)
119
+ else:
120
+ st.warning("Please enter some text before clicking Analyze.")