Spaces:
Sleeping
Sleeping
Update pages/21_GraphRag.py
Browse files- pages/21_GraphRag.py +34 -44
pages/21_GraphRag.py
CHANGED
@@ -4,21 +4,45 @@ import torch
|
|
4 |
import networkx as nx
|
5 |
import matplotlib.pyplot as plt
|
6 |
from collections import Counter
|
7 |
-
import graphrag
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
@st.cache_resource
|
10 |
def load_model():
|
11 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
12 |
bert_model = AutoModel.from_pretrained("bert-base-uncased")
|
13 |
|
14 |
-
# Initialize
|
15 |
-
# Note:
|
16 |
-
graph_rag_model =
|
17 |
bert_model,
|
18 |
num_labels=2, # For binary sentiment classification
|
19 |
-
|
20 |
-
hidden_size=768,
|
21 |
-
intermediate_size=3072,
|
22 |
)
|
23 |
|
24 |
return tokenizer, graph_rag_model
|
@@ -49,7 +73,7 @@ def analyze_text(text, tokenizer, model):
|
|
49 |
graph = text_to_graph(text)
|
50 |
|
51 |
# Combine tokenized input with graph representation
|
52 |
-
# Note:
|
53 |
combined_input = {
|
54 |
"input_ids": inputs["input_ids"],
|
55 |
"attention_mask": inputs["attention_mask"],
|
@@ -64,7 +88,7 @@ def analyze_text(text, tokenizer, model):
|
|
64 |
outputs = model(**combined_input)
|
65 |
|
66 |
# Process outputs
|
67 |
-
# Note: Adjust this based on
|
68 |
logits = outputs.logits if hasattr(outputs, 'logits') else outputs
|
69 |
probabilities = torch.softmax(logits, dim=1)
|
70 |
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
|
@@ -72,38 +96,4 @@ def analyze_text(text, tokenizer, model):
|
|
72 |
|
73 |
return sentiment, confidence, graph
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
tokenizer, model = load_model()
|
78 |
-
|
79 |
-
text_input = st.text_area("Enter text for analysis:", height=200)
|
80 |
-
|
81 |
-
if st.button("Analyze Text"):
|
82 |
-
if text_input:
|
83 |
-
sentiment, confidence, graph = analyze_text(text_input, tokenizer, model)
|
84 |
-
st.write(f"Sentiment: {sentiment}")
|
85 |
-
st.write(f"Confidence: {confidence:.2f}")
|
86 |
-
|
87 |
-
# Additional analysis
|
88 |
-
word_count = len(text_input.split())
|
89 |
-
st.write(f"Word count: {word_count}")
|
90 |
-
|
91 |
-
# Most common words
|
92 |
-
words = [word.lower() for word in text_input.split() if word.isalnum()]
|
93 |
-
word_freq = Counter(words).most_common(5)
|
94 |
-
|
95 |
-
st.write("Top 5 most common words:")
|
96 |
-
for word, freq in word_freq:
|
97 |
-
st.write(f"- {word}: {freq}")
|
98 |
-
|
99 |
-
# Visualize graph
|
100 |
-
G = nx.Graph()
|
101 |
-
G.add_edges_from(zip(graph["edge_index"][0], graph["edge_index"][1]))
|
102 |
-
|
103 |
-
plt.figure(figsize=(10, 6))
|
104 |
-
nx.draw(G, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
|
105 |
-
plt.title("Text as Graph")
|
106 |
-
st.pyplot(plt)
|
107 |
-
|
108 |
-
else:
|
109 |
-
st.write("Please enter some text to analyze.")
|
|
|
4 |
import networkx as nx
|
5 |
import matplotlib.pyplot as plt
|
6 |
from collections import Counter
|
7 |
+
import graphrag
|
8 |
+
import inspect
|
9 |
+
|
10 |
+
st.title("GraphRAG Module Exploration and Text Analysis")
|
11 |
+
|
12 |
+
# Diagnostic section
|
13 |
+
st.header("GraphRAG Module Contents")
|
14 |
+
graphrag_contents = dir(graphrag)
|
15 |
+
st.write("Available attributes and methods in graphrag module:")
|
16 |
+
for item in graphrag_contents:
|
17 |
+
st.write(f"- {item}")
|
18 |
+
attr = getattr(graphrag, item)
|
19 |
+
if inspect.isclass(attr) or inspect.isfunction(attr):
|
20 |
+
st.write(f" Signature: {inspect.signature(attr)}")
|
21 |
+
st.write(f" Docstring: {attr.__doc__}")
|
22 |
+
|
23 |
+
# Attempt to find a suitable model class
|
24 |
+
model_class = None
|
25 |
+
for item in graphrag_contents:
|
26 |
+
if 'model' in item.lower():
|
27 |
+
model_class = getattr(graphrag, item)
|
28 |
+
st.write(f"Found potential model class: {item}")
|
29 |
+
break
|
30 |
+
|
31 |
+
if model_class is None:
|
32 |
+
st.error("Could not find a suitable model class in graphrag module.")
|
33 |
+
st.stop()
|
34 |
|
35 |
@st.cache_resource
|
36 |
def load_model():
|
37 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
38 |
bert_model = AutoModel.from_pretrained("bert-base-uncased")
|
39 |
|
40 |
+
# Initialize graphrag model
|
41 |
+
# Note: This is a placeholder. Adjust based on the actual model class found
|
42 |
+
graph_rag_model = model_class(
|
43 |
bert_model,
|
44 |
num_labels=2, # For binary sentiment classification
|
45 |
+
# Add or remove parameters based on the actual model's requirements
|
|
|
|
|
46 |
)
|
47 |
|
48 |
return tokenizer, graph_rag_model
|
|
|
73 |
graph = text_to_graph(text)
|
74 |
|
75 |
# Combine tokenized input with graph representation
|
76 |
+
# Note: This is a placeholder. Adjust based on the actual model's input requirements
|
77 |
combined_input = {
|
78 |
"input_ids": inputs["input_ids"],
|
79 |
"attention_mask": inputs["attention_mask"],
|
|
|
88 |
outputs = model(**combined_input)
|
89 |
|
90 |
# Process outputs
|
91 |
+
# Note: Adjust this based on the actual model's output format
|
92 |
logits = outputs.logits if hasattr(outputs, 'logits') else outputs
|
93 |
probabilities = torch.softmax(logits, dim=1)
|
94 |
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
|
|
|
96 |
|
97 |
return sentiment, confidence, graph
|
98 |
|
99 |
+
# Rest of the Streamlit app (text input, analysis button, etc.) remains the same...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|