eaglelandsonce commited on
Commit
97aae78
·
verified ·
1 Parent(s): c672b82

Update pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +60 -46
pages/21_GraphRag.py CHANGED
@@ -1,46 +1,60 @@
1
- import streamlit as st
2
- import graphrag
3
- import inspect
4
-
5
- st.title("GraphRAG Module Explorer")
6
-
7
- # Display all attributes and functions in the graphrag module
8
- st.header("GraphRAG Module Contents")
9
- graphrag_contents = dir(graphrag)
10
-
11
- for item in graphrag_contents:
12
- attr = getattr(graphrag, item)
13
- st.subheader(f"{item}")
14
- st.write(f"Type: {type(attr)}")
15
-
16
- if inspect.isclass(attr):
17
- st.write("Class Methods:")
18
- for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
19
- st.write(f"- {name}")
20
- st.write(f" Signature: {inspect.signature(method)}")
21
- st.write(f" Docstring: {method.__doc__}")
22
-
23
- elif inspect.isfunction(attr):
24
- st.write("Function:")
25
- st.write(f"Signature: {inspect.signature(attr)}")
26
- st.write(f"Docstring: {attr.__doc__}")
27
-
28
- elif isinstance(attr, (int, float, str, bool)):
29
- st.write(f"Value: {attr}")
30
-
31
- st.write("---")
32
-
33
- # Display the module's docstring if available
34
- if graphrag.__doc__:
35
- st.header("GraphRAG Module Documentation")
36
- st.write(graphrag.__doc__)
37
-
38
- st.header("Next Steps")
39
- st.write("""
40
- Based on the information above, we need to determine:
41
- 1. How to create a graph representation of text using graphrag.
42
- 2. How to process this graph representation for analysis.
43
- 3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
44
-
45
- Please review the module contents and let me know which components seem most relevant for our text analysis task.
46
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModel
3
+ from sentence_transformers import SentenceTransformer
4
+ import networkx as nx
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Load pre-trained model and tokenizer
8
+ model_name = "bert-base-uncased"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModel.from_pretrained(model_name)
11
+
12
+ # Function to get embeddings
13
+ def get_embeddings(texts):
14
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
15
+ with torch.no_grad():
16
+ outputs = model(**inputs)
17
+ return outputs.last_hidden_state[:, 0, :].numpy()
18
+
19
+ # Sample data (replace with your own data import)
20
+ documents = [
21
+ "The quick brown fox jumps over the lazy dog.",
22
+ "A journey of a thousand miles begins with a single step.",
23
+ "To be or not to be, that is the question.",
24
+ "All that glitters is not gold.",
25
+ ]
26
+
27
+ # Get embeddings for documents
28
+ embeddings = get_embeddings(documents)
29
+
30
+ # Create graph
31
+ G = nx.Graph()
32
+
33
+ # Add nodes and edges based on cosine similarity
34
+ threshold = 0.5 # Adjust this threshold as needed
35
+ for i in range(len(documents)):
36
+ G.add_node(i, text=documents[i])
37
+ for j in range(i+1, len(documents)):
38
+ similarity = torch.cosine_similarity(torch.tensor(embeddings[i]), torch.tensor(embeddings[j]), dim=0)
39
+ if similarity > threshold:
40
+ G.add_edge(i, j, weight=similarity.item())
41
+
42
+ # Visualize the graph
43
+ pos = nx.spring_layout(G)
44
+ nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, font_size=8, font_weight='bold')
45
+ edge_labels = nx.get_edge_attributes(G, 'weight')
46
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
47
+ plt.title("Document Similarity Graph")
48
+ plt.show()
49
+
50
+ # Example of querying the graph
51
+ query = "What is the meaning of life?"
52
+ query_embedding = get_embeddings([query])[0]
53
+
54
+ # Find most similar document
55
+ similarities = [torch.cosine_similarity(torch.tensor(query_embedding), torch.tensor(emb), dim=0) for emb in embeddings]
56
+ most_similar_idx = max(range(len(similarities)), key=similarities.__getitem__)
57
+
58
+ print(f"Most similar document to the query: {documents[most_similar_idx]}")
59
+
60
+ # You can extend this to implement more complex graph-based retrieval algorithms