Medresearch / components /knowledge_graph.py
mgbam's picture
Add untracked files and synchronize with remote
9c7387c
from py2neo import Graph, Node, Relationship
import spacy
def extract_knowledge_graph(text, nlp):
"""Extracts entities and relationships and stores them to Neo4j."""
graph = Graph("bolt://localhost:7687", auth=("neo4j", "password")) # Adjust credentials
doc = nlp(text)
for ent in doc.ents:
node = Node("Entity", name=ent.text, label=ent.label_)
graph.create(node)
#This requires more work for the relationship
"""
This needs more work to make the information work.
Example only. More data cleaning needed before real implementation
for token in doc:
# Example: look for verbs connecting entities
if token.dep_ == "ROOT" and token.pos_ == "VERB":
for child in token.children:
if child.dep_ == "nsubj" and child.ent_type_: # Subject is an entity
for obj in token.children:
if obj.dep_ == "dobj" and obj.ent_type_: # Object is an entity
subject_node = Node("Entity", name=child.text, label=child.ent_type_)
object_node = Node("Entity", name=obj.text, label=obj.ent_type_)
relation = Relationship(subject_node, token.text, object_node)
graph.create(relation)
"""
print("Successfully loaded data to the knowledge base.")
# Example Node
print("Create a node called entity.")