Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -14,13 +14,11 @@ from langchain_experimental.graph_transformers import LLMGraphTransformer
|
|
14 |
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
15 |
from neo4j import GraphDatabase
|
16 |
|
17 |
-
#
|
18 |
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex, Settings
|
19 |
from llama_index.core.graph_stores import SimpleGraphStore
|
20 |
from llama_index.core import StorageContext
|
21 |
-
from llama_index.
|
22 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
23 |
-
from llama_index.embeddings.langchain import LangchainEmbedding
|
24 |
|
25 |
def main():
|
26 |
st.set_page_config(
|
@@ -117,7 +115,7 @@ def main():
|
|
117 |
st.write("PDF file uploaded and saved to temporary file.")
|
118 |
|
119 |
# Process document using Llama-Index
|
120 |
-
index = process_document(tmp_file_path, graph)
|
121 |
|
122 |
# Store the index in session state
|
123 |
st.session_state['index'] = index
|
@@ -188,41 +186,31 @@ def main():
|
|
188 |
res = st.session_state['qa'].invoke({"query": question})
|
189 |
st.write("\n**Answer:**\n" + res['result'])
|
190 |
|
191 |
-
def process_document(file_path, graph):
|
192 |
-
#
|
|
|
193 |
Settings.chunk_size = 512
|
|
|
|
|
|
|
|
|
194 |
|
195 |
# Create graph store
|
196 |
graph_store = SimpleGraphStore()
|
197 |
storage_context = StorageContext.from_defaults(graph_store=graph_store)
|
198 |
|
199 |
-
# Load document
|
200 |
-
documents = SimpleDirectoryReader(file_path).load_data()
|
201 |
|
202 |
# Create Knowledge Graph Index
|
203 |
index = KnowledgeGraphIndex.from_documents(
|
204 |
documents=documents,
|
205 |
max_triplets_per_chunk=3,
|
206 |
storage_context=storage_context,
|
|
|
207 |
include_embeddings=True
|
208 |
)
|
209 |
|
210 |
-
# Convert to Neo4j
|
211 |
-
g = index.get_networkx_graph()
|
212 |
-
for node in g.nodes():
|
213 |
-
cypher = f"""
|
214 |
-
CREATE (n:{node['type']} {{id: '{node['id']}', text: '{node['text']}'}})
|
215 |
-
"""
|
216 |
-
graph.query(cypher)
|
217 |
-
|
218 |
-
for edge in g.edges():
|
219 |
-
cypher = f"""
|
220 |
-
MATCH (a), (b)
|
221 |
-
WHERE a.id = '{edge[0]}' AND b.id = '{edge[1]}'
|
222 |
-
CREATE (a)-[r:{edge['relationship']}]->(b)
|
223 |
-
"""
|
224 |
-
graph.query(cypher)
|
225 |
-
|
226 |
return index
|
227 |
|
228 |
if __name__ == "__main__":
|
|
|
14 |
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
15 |
from neo4j import GraphDatabase
|
16 |
|
17 |
+
# Llama-Index imports
|
18 |
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex, Settings
|
19 |
from llama_index.core.graph_stores import SimpleGraphStore
|
20 |
from llama_index.core import StorageContext
|
21 |
+
from llama_index.embeddings import OpenAIEmbedding
|
|
|
|
|
22 |
|
23 |
def main():
|
24 |
st.set_page_config(
|
|
|
115 |
st.write("PDF file uploaded and saved to temporary file.")
|
116 |
|
117 |
# Process document using Llama-Index
|
118 |
+
index = process_document(tmp_file_path, graph, st.session_state['OPENAI_API_KEY'])
|
119 |
|
120 |
# Store the index in session state
|
121 |
st.session_state['index'] = index
|
|
|
186 |
res = st.session_state['qa'].invoke({"query": question})
|
187 |
st.write("\n**Answer:**\n" + res['result'])
|
188 |
|
189 |
+
def process_document(file_path, graph, openai_api_key):
|
190 |
+
# Configure OpenAI
|
191 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
192 |
Settings.chunk_size = 512
|
193 |
+
Settings.llm = ChatOpenAI(temperature=0, model="gpt-4")
|
194 |
+
|
195 |
+
# Setup embeddings
|
196 |
+
embed_model = OpenAIEmbedding()
|
197 |
|
198 |
# Create graph store
|
199 |
graph_store = SimpleGraphStore()
|
200 |
storage_context = StorageContext.from_defaults(graph_store=graph_store)
|
201 |
|
202 |
+
# Load and process document
|
203 |
+
documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
|
204 |
|
205 |
# Create Knowledge Graph Index
|
206 |
index = KnowledgeGraphIndex.from_documents(
|
207 |
documents=documents,
|
208 |
max_triplets_per_chunk=3,
|
209 |
storage_context=storage_context,
|
210 |
+
embed_model=embed_model,
|
211 |
include_embeddings=True
|
212 |
)
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
return index
|
215 |
|
216 |
if __name__ == "__main__":
|