eaglelandsonce commited on
Commit
c672b82
·
verified ·
1 Parent(s): c753736

Update pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +37 -90
pages/21_GraphRag.py CHANGED
@@ -1,99 +1,46 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModel
3
- import torch
4
- import networkx as nx
5
- import matplotlib.pyplot as plt
6
- from collections import Counter
7
  import graphrag
8
  import inspect
9
 
10
- st.title("GraphRAG Module Exploration and Text Analysis")
11
 
12
- # Diagnostic section
13
  st.header("GraphRAG Module Contents")
14
  graphrag_contents = dir(graphrag)
15
- st.write("Available attributes and methods in graphrag module:")
16
- for item in graphrag_contents:
17
- st.write(f"- {item}")
18
- attr = getattr(graphrag, item)
19
- if inspect.isclass(attr) or inspect.isfunction(attr):
20
- st.write(f" Signature: {inspect.signature(attr)}")
21
- st.write(f" Docstring: {attr.__doc__}")
22
 
23
- # Attempt to find a suitable model class
24
- model_class = None
25
  for item in graphrag_contents:
26
- if 'model' in item.lower():
27
- model_class = getattr(graphrag, item)
28
- st.write(f"Found potential model class: {item}")
29
- break
30
-
31
- if model_class is None:
32
- st.error("Could not find a suitable model class in graphrag module.")
33
- st.stop()
34
-
35
- @st.cache_resource
36
- def load_model():
37
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
38
- bert_model = AutoModel.from_pretrained("bert-base-uncased")
39
-
40
- # Initialize graphrag model
41
- # Note: This is a placeholder. Adjust based on the actual model class found
42
- graph_rag_model = model_class(
43
- bert_model,
44
- num_labels=2, # For binary sentiment classification
45
- # Add or remove parameters based on the actual model's requirements
46
- )
47
-
48
- return tokenizer, graph_rag_model
49
-
50
- def text_to_graph(text):
51
- words = text.split()
52
- G = nx.Graph()
53
- for i, word in enumerate(words):
54
- G.add_node(i, word=word)
55
- if i > 0:
56
- G.add_edge(i-1, i)
57
-
58
- edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
59
- [e[1] for e in G.edges()] + [e[0] for e in G.edges()]]
60
-
61
- return {
62
- "edge_index": edge_index,
63
- "num_nodes": len(G.nodes()),
64
- "node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature
65
- "edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute
66
- }
67
-
68
- def analyze_text(text, tokenizer, model):
69
- # Tokenize the text
70
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
71
-
72
- # Create graph representation
73
- graph = text_to_graph(text)
74
-
75
- # Combine tokenized input with graph representation
76
- # Note: This is a placeholder. Adjust based on the actual model's input requirements
77
- combined_input = {
78
- "input_ids": inputs["input_ids"],
79
- "attention_mask": inputs["attention_mask"],
80
- "edge_index": torch.tensor(graph["edge_index"], dtype=torch.long),
81
- "node_feat": torch.tensor(graph["node_feat"], dtype=torch.float),
82
- "edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float),
83
- "num_nodes": graph["num_nodes"]
84
- }
85
-
86
- # Perform inference
87
- with torch.no_grad():
88
- outputs = model(**combined_input)
89
-
90
- # Process outputs
91
- # Note: Adjust this based on the actual model's output format
92
- logits = outputs.logits if hasattr(outputs, 'logits') else outputs
93
- probabilities = torch.softmax(logits, dim=1)
94
- sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
95
- confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
96
-
97
- return sentiment, confidence, graph
98
-
99
- # Rest of the Streamlit app (text input, analysis button, etc.) remains the same...
 
1
  import streamlit as st
 
 
 
 
 
2
  import graphrag
3
  import inspect
4
 
5
+ st.title("GraphRAG Module Explorer")
6
 
7
+ # Display all attributes and functions in the graphrag module
8
  st.header("GraphRAG Module Contents")
9
  graphrag_contents = dir(graphrag)
 
 
 
 
 
 
 
10
 
 
 
11
  for item in graphrag_contents:
12
+ attr = getattr(graphrag, item)
13
+ st.subheader(f"{item}")
14
+ st.write(f"Type: {type(attr)}")
15
+
16
+ if inspect.isclass(attr):
17
+ st.write("Class Methods:")
18
+ for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
19
+ st.write(f"- {name}")
20
+ st.write(f" Signature: {inspect.signature(method)}")
21
+ st.write(f" Docstring: {method.__doc__}")
22
+
23
+ elif inspect.isfunction(attr):
24
+ st.write("Function:")
25
+ st.write(f"Signature: {inspect.signature(attr)}")
26
+ st.write(f"Docstring: {attr.__doc__}")
27
+
28
+ elif isinstance(attr, (int, float, str, bool)):
29
+ st.write(f"Value: {attr}")
30
+
31
+ st.write("---")
32
+
33
+ # Display the module's docstring if available
34
+ if graphrag.__doc__:
35
+ st.header("GraphRAG Module Documentation")
36
+ st.write(graphrag.__doc__)
37
+
38
+ st.header("Next Steps")
39
+ st.write("""
40
+ Based on the information above, we need to determine:
41
+ 1. How to create a graph representation of text using graphrag.
42
+ 2. How to process this graph representation for analysis.
43
+ 3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
44
+
45
+ Please review the module contents and let me know which components seem most relevant for our text analysis task.
46
+ """)