eaglelandsonce commited on
Commit
fbd0bc7
·
verified ·
1 Parent(s): f72b662

Delete pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +0 -82
pages/21_GraphRag.py DELETED
@@ -1,82 +0,0 @@
1
- import streamlit as st
2
- import graphrag
3
- import networkx as nx
4
- import matplotlib.pyplot as plt
5
- from sentence_transformers import SentenceTransformer
6
- import torch
7
- import nltk
8
- from nltk.tokenize import sent_tokenize, word_tokenize
9
- nltk.download('punkt', quiet=True)
10
-
11
- @st.cache_resource
12
- def load_models():
13
- # Load SentenceTransformer model for sentence embeddings
14
- sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
15
- return sentence_model
16
-
17
- def text_to_graph(text, sentence_model):
18
- # Tokenize text into sentences
19
- sentences = sent_tokenize(text)
20
-
21
- # Create graph
22
- G = nx.Graph()
23
-
24
- # Add nodes (sentences) to the graph
25
- for i, sentence in enumerate(sentences):
26
- embedding = sentence_model.encode(sentence)
27
- G.add_node(i, text=sentence, embedding=embedding)
28
-
29
- # Add edges between sentences based on cosine similarity
30
- for i in range(len(sentences)):
31
- for j in range(i+1, len(sentences)):
32
- similarity = torch.cosine_similarity(
33
- torch.tensor(G.nodes[i]['embedding']),
34
- torch.tensor(G.nodes[j]['embedding']),
35
- dim=0
36
- )
37
- if similarity > 0.5: # Adjust this threshold as needed
38
- G.add_edge(i, j, weight=similarity.item())
39
-
40
- return G, sentences
41
-
42
- def analyze_text(text, sentence_model):
43
- G, sentences = text_to_graph(text, sentence_model)
44
-
45
- # Basic graph analysis
46
- num_nodes = G.number_of_nodes()
47
- num_edges = G.number_of_edges()
48
- avg_degree = sum(dict(G.degree()).values()) / num_nodes
49
-
50
- # Identify important sentences using PageRank
51
- pagerank = nx.pagerank(G)
52
- important_sentences = sorted(pagerank, key=pagerank.get, reverse=True)[:3]
53
-
54
- return G, sentences, num_nodes, num_edges, avg_degree, important_sentences
55
-
56
- st.title("GraphRAG-based Text Analysis")
57
-
58
- sentence_model = load_models()
59
-
60
- text_input = st.text_area("Enter text for analysis:", height=200)
61
-
62
- if st.button("Analyze Text"):
63
- if text_input:
64
- G, sentences, num_nodes, num_edges, avg_degree, important_sentences = analyze_text(text_input, sentence_model)
65
-
66
- st.write(f"Number of sentences: {num_nodes}")
67
- st.write(f"Number of connections: {num_edges}")
68
- st.write(f"Average connections per sentence: {avg_degree:.2f}")
69
-
70
- st.subheader("Most important sentences:")
71
- for i in important_sentences:
72
- st.write(f"- {sentences[i]}")
73
-
74
- # Visualize graph
75
- plt.figure(figsize=(10, 6))
76
- pos = nx.spring_layout(G)
77
- nx.draw(G, pos, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
78
- plt.title("Text as Graph")
79
- st.pyplot(plt)
80
-
81
- else:
82
- st.write("Please enter some text to analyze.")