File size: 2,456 Bytes
f7ab812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from langchain.chains.openai_functions import (
    create_openai_fn_chain,
    create_structured_output_chain,
)
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from typing import List, Optional
from KG_classes import KnowledgeGraph

with open("graphRAG/openai.txt", "r") as f:
    api_key = f.read()
llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=api_key)


def get_extraction_chain(
    allowed_nodes: Optional[List[str]] = None, allowed_rels: Optional[List[str]] = None
):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                f"""# Knowledge Graph Extraction Instructions
    ## 1. Purpose
    You are a state-of-the-art system for extracting structured data to construct a **knowledge graph**. The graph consists of:
    - **Nodes**: Entities or concepts.
    - **Relationships**: Connections between nodes representing their interactions or associations.

    ## 2. Guidelines for Nodes
    - Use **general labels** for node types (e.g., "person", "organization").
    - Use **human-readable identifiers** for node IDs (no integers or generic IDs).
    - Include attributes as key-value pairs with `camelCase` keys (e.g., `birthDate: "1990-01-01"`).
    - Do **not create separate nodes** for numerical data or dates; these should always be node properties.

    {'- **Allowed Node Labels:** ' + ", ".join(allowed_nodes) if allowed_nodes else ""}
    ## 3. Guidelines for Relationships
    - Clearly define relationships between nodes, using concise and meaningful labels (e.g., "worksAt", "bornIn").
    - Only use relationships allowed in the context.
    {'- **Allowed Relationship Types:** ' + ", ".join(allowed_rels) if allowed_rels else ""}
    - Avoid overly detailed or complex relationship labels.

    ## 4. Coreference Resolution
    - Use the most complete identifier for entities across the graph. For example, use "John Doe" instead of "John" or "he".

    ## 5. Strict Compliance
    Adhere to these rules exactly to ensure the generated graph is clear, coherent, and consistent.""",
            ),
            (
                "human",
                "Extract information from the following text using these rules: {input}",
            ),
            ("human", "Ensure the output is in the correct format."),
        ]
    )
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)