eaglelandsonce commited on
Commit
2ead64f
1 Parent(s): 5794470

Update pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +71 -59
pages/21_GraphRag.py CHANGED
@@ -1,70 +1,82 @@
1
  import streamlit as st
2
- import sys
3
- import subprocess
4
- import importlib
 
 
 
 
 
5
 
6
- st.title("GraphRAG Module Explorer")
 
 
 
 
7
 
8
- # Function to install a package
9
- def install_package(package):
10
- subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Check and install required packages
13
- required_packages = ['graphrag', 'sentence_transformers']
14
- for package in required_packages:
15
- try:
16
- importlib.import_module(package)
17
- except ImportError:
18
- st.write(f"Installing {package}...")
19
- install_package(package)
20
- st.write(f"{package} installed successfully.")
 
 
 
 
21
 
22
- # Now try to import graphrag
23
- try:
24
- import graphrag
25
- import inspect
26
 
27
- # Display all attributes and functions in the graphrag module
28
- st.header("GraphRAG Module Contents")
29
- graphrag_contents = dir(graphrag)
30
 
31
- for item in graphrag_contents:
32
- attr = getattr(graphrag, item)
33
- st.subheader(f"{item}")
34
- st.write(f"Type: {type(attr)}")
 
35
 
36
- if inspect.isclass(attr):
37
- st.write("Class Methods:")
38
- for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
39
- st.write(f"- {name}")
40
- st.write(f" Signature: {inspect.signature(method)}")
41
- st.write(f" Docstring: {method.__doc__}")
42
 
43
- elif inspect.isfunction(attr):
44
- st.write("Function:")
45
- st.write(f"Signature: {inspect.signature(attr)}")
46
- st.write(f"Docstring: {attr.__doc__}")
47
 
48
- elif isinstance(attr, (int, float, str, bool)):
49
- st.write(f"Value: {attr}")
 
 
 
 
50
 
51
- st.write("---")
52
-
53
- # Display the module's docstring if available
54
- if graphrag.__doc__:
55
- st.header("GraphRAG Module Documentation")
56
- st.write(graphrag.__doc__)
57
-
58
- st.header("Next Steps")
59
- st.write("""
60
- Based on the information above, we need to determine:
61
- 1. How to create a graph representation of text using graphrag.
62
- 2. How to process this graph representation for analysis.
63
- 3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
64
-
65
- Please review the module contents and let me know which components seem most relevant for our text analysis task.
66
- """)
67
-
68
- except Exception as e:
69
- st.error(f"An error occurred while exploring the graphrag module: {str(e)}")
70
- st.write("Please check the installation of graphrag and its dependencies, and try running the app again.")
 
1
  import streamlit as st
2
+ import graphrag
3
+ import networkx as nx
4
+ import matplotlib.pyplot as plt
5
+ from sentence_transformers import SentenceTransformer
6
+ import torch
7
+ import nltk
8
+ from nltk.tokenize import sent_tokenize, word_tokenize
9
+ nltk.download('punkt', quiet=True)
10
 
11
+ @st.cache_resource
12
+ def load_models():
13
+ # Load SentenceTransformer model for sentence embeddings
14
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
15
+ return sentence_model
16
 
17
+ def text_to_graph(text, sentence_model):
18
+ # Tokenize text into sentences
19
+ sentences = sent_tokenize(text)
20
+
21
+ # Create graph
22
+ G = nx.Graph()
23
+
24
+ # Add nodes (sentences) to the graph
25
+ for i, sentence in enumerate(sentences):
26
+ embedding = sentence_model.encode(sentence)
27
+ G.add_node(i, text=sentence, embedding=embedding)
28
+
29
+ # Add edges between sentences based on cosine similarity
30
+ for i in range(len(sentences)):
31
+ for j in range(i+1, len(sentences)):
32
+ similarity = torch.cosine_similarity(
33
+ torch.tensor(G.nodes[i]['embedding']),
34
+ torch.tensor(G.nodes[j]['embedding']),
35
+ dim=0
36
+ )
37
+ if similarity > 0.5: # Adjust this threshold as needed
38
+ G.add_edge(i, j, weight=similarity.item())
39
+
40
+ return G, sentences
41
 
42
+ def analyze_text(text, sentence_model):
43
+ G, sentences = text_to_graph(text, sentence_model)
44
+
45
+ # Basic graph analysis
46
+ num_nodes = G.number_of_nodes()
47
+ num_edges = G.number_of_edges()
48
+ avg_degree = sum(dict(G.degree()).values()) / num_nodes
49
+
50
+ # Identify important sentences using PageRank
51
+ pagerank = nx.pagerank(G)
52
+ important_sentences = sorted(pagerank, key=pagerank.get, reverse=True)[:3]
53
+
54
+ return G, sentences, num_nodes, num_edges, avg_degree, important_sentences
55
 
56
+ st.title("GraphRAG-based Text Analysis")
 
 
 
57
 
58
+ sentence_model = load_models()
 
 
59
 
60
+ text_input = st.text_area("Enter text for analysis:", height=200)
61
+
62
+ if st.button("Analyze Text"):
63
+ if text_input:
64
+ G, sentences, num_nodes, num_edges, avg_degree, important_sentences = analyze_text(text_input, sentence_model)
65
 
66
+ st.write(f"Number of sentences: {num_nodes}")
67
+ st.write(f"Number of connections: {num_edges}")
68
+ st.write(f"Average connections per sentence: {avg_degree:.2f}")
 
 
 
69
 
70
+ st.subheader("Most important sentences:")
71
+ for i in important_sentences:
72
+ st.write(f"- {sentences[i]}")
 
73
 
74
+ # Visualize graph
75
+ plt.figure(figsize=(10, 6))
76
+ pos = nx.spring_layout(G)
77
+ nx.draw(G, pos, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
78
+ plt.title("Text as Graph")
79
+ st.pyplot(plt)
80
 
81
+ else:
82
+ st.write("Please enter some text to analyze.")