TensorFlowClass / pages /21_GraphRag.py
eaglelandsonce's picture
Update pages/21_GraphRag.py
c753736 verified
raw
history blame
3.59 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import graphrag
import inspect
st.title("GraphRAG Module Exploration and Text Analysis")
# Diagnostic section
st.header("GraphRAG Module Contents")
graphrag_contents = dir(graphrag)
st.write("Available attributes and methods in graphrag module:")
for item in graphrag_contents:
st.write(f"- {item}")
attr = getattr(graphrag, item)
if inspect.isclass(attr) or inspect.isfunction(attr):
st.write(f" Signature: {inspect.signature(attr)}")
st.write(f" Docstring: {attr.__doc__}")
# Attempt to find a suitable model class
model_class = None
for item in graphrag_contents:
if 'model' in item.lower():
model_class = getattr(graphrag, item)
st.write(f"Found potential model class: {item}")
break
if model_class is None:
st.error("Could not find a suitable model class in graphrag module.")
st.stop()
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
bert_model = AutoModel.from_pretrained("bert-base-uncased")
# Initialize graphrag model
# Note: This is a placeholder. Adjust based on the actual model class found
graph_rag_model = model_class(
bert_model,
num_labels=2, # For binary sentiment classification
# Add or remove parameters based on the actual model's requirements
)
return tokenizer, graph_rag_model
def text_to_graph(text):
words = text.split()
G = nx.Graph()
for i, word in enumerate(words):
G.add_node(i, word=word)
if i > 0:
G.add_edge(i-1, i)
edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
[e[1] for e in G.edges()] + [e[0] for e in G.edges()]]
return {
"edge_index": edge_index,
"num_nodes": len(G.nodes()),
"node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature
"edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute
}
def analyze_text(text, tokenizer, model):
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
# Create graph representation
graph = text_to_graph(text)
# Combine tokenized input with graph representation
# Note: This is a placeholder. Adjust based on the actual model's input requirements
combined_input = {
"input_ids": inputs["input_ids"],
"attention_mask": inputs["attention_mask"],
"edge_index": torch.tensor(graph["edge_index"], dtype=torch.long),
"node_feat": torch.tensor(graph["node_feat"], dtype=torch.float),
"edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float),
"num_nodes": graph["num_nodes"]
}
# Perform inference
with torch.no_grad():
outputs = model(**combined_input)
# Process outputs
# Note: Adjust this based on the actual model's output format
logits = outputs.logits if hasattr(outputs, 'logits') else outputs
probabilities = torch.softmax(logits, dim=1)
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
return sentiment, confidence, graph
# Rest of the Streamlit app (text input, analysis button, etc.) remains the same...