|
import os |
|
from langchain.chains.openai_functions import ( |
|
create_openai_fn_chain, |
|
create_structured_output_chain, |
|
) |
|
from langchain_openai import ChatOpenAI |
|
from langchain.prompts import ChatPromptTemplate |
|
from typing import List, Optional |
|
from KG_classes import KnowledgeGraph |
|
|
|
with open("graphRAG/openai.txt", "r") as f: |
|
api_key = f.read() |
|
llm = ChatOpenAI(model="gpt-4o", temperature=0, api_key=api_key) |
|
|
|
|
|
def get_extraction_chain( |
|
allowed_nodes: Optional[List[str]] = None, allowed_rels: Optional[List[str]] = None |
|
): |
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
( |
|
"system", |
|
f"""# Knowledge Graph Extraction Instructions |
|
## 1. Purpose |
|
You are a state-of-the-art system for extracting structured data to construct a **knowledge graph**. The graph consists of: |
|
- **Nodes**: Entities or concepts. |
|
- **Relationships**: Connections between nodes representing their interactions or associations. |
|
|
|
## 2. Guidelines for Nodes |
|
- Use **general labels** for node types (e.g., "person", "organization"). |
|
- Use **human-readable identifiers** for node IDs (no integers or generic IDs). |
|
- Include attributes as key-value pairs with `camelCase` keys (e.g., `birthDate: "1990-01-01"`). |
|
- Do **not create separate nodes** for numerical data or dates; these should always be node properties. |
|
|
|
{'- **Allowed Node Labels:** ' + ", ".join(allowed_nodes) if allowed_nodes else ""} |
|
## 3. Guidelines for Relationships |
|
- Clearly define relationships between nodes, using concise and meaningful labels (e.g., "worksAt", "bornIn"). |
|
- Only use relationships allowed in the context. |
|
{'- **Allowed Relationship Types:** ' + ", ".join(allowed_rels) if allowed_rels else ""} |
|
- Avoid overly detailed or complex relationship labels. |
|
|
|
## 4. Coreference Resolution |
|
- Use the most complete identifier for entities across the graph. For example, use "John Doe" instead of "John" or "he". |
|
|
|
## 5. Strict Compliance |
|
Adhere to these rules exactly to ensure the generated graph is clear, coherent, and consistent.""", |
|
), |
|
( |
|
"human", |
|
"Extract information from the following text using these rules: {input}", |
|
), |
|
("human", "Ensure the output is in the correct format."), |
|
] |
|
) |
|
return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False) |
|
|