import streamlit as st from transformers import AutoTokenizer, AutoModel import torch import networkx as nx import matplotlib.pyplot as plt from collections import Counter import graphrag import inspect st.title("GraphRAG Module Exploration and Text Analysis") # Diagnostic section st.header("GraphRAG Module Contents") graphrag_contents = dir(graphrag) st.write("Available attributes and methods in graphrag module:") for item in graphrag_contents: st.write(f"- {item}") attr = getattr(graphrag, item) if inspect.isclass(attr) or inspect.isfunction(attr): st.write(f" Signature: {inspect.signature(attr)}") st.write(f" Docstring: {attr.__doc__}") # Attempt to find a suitable model class model_class = None for item in graphrag_contents: if 'model' in item.lower(): model_class = getattr(graphrag, item) st.write(f"Found potential model class: {item}") break if model_class is None: st.error("Could not find a suitable model class in graphrag module.") st.stop() @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") bert_model = AutoModel.from_pretrained("bert-base-uncased") # Initialize graphrag model # Note: This is a placeholder. Adjust based on the actual model class found graph_rag_model = model_class( bert_model, num_labels=2, # For binary sentiment classification # Add or remove parameters based on the actual model's requirements ) return tokenizer, graph_rag_model def text_to_graph(text): words = text.split() G = nx.Graph() for i, word in enumerate(words): G.add_node(i, word=word) if i > 0: G.add_edge(i-1, i) edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()], [e[1] for e in G.edges()] + [e[0] for e in G.edges()]] return { "edge_index": edge_index, "num_nodes": len(G.nodes()), "node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature "edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute } def analyze_text(text, tokenizer, model): # Tokenize the text inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) # Create graph representation graph = text_to_graph(text) # Combine tokenized input with graph representation # Note: This is a placeholder. Adjust based on the actual model's input requirements combined_input = { "input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "edge_index": torch.tensor(graph["edge_index"], dtype=torch.long), "node_feat": torch.tensor(graph["node_feat"], dtype=torch.float), "edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float), "num_nodes": graph["num_nodes"] } # Perform inference with torch.no_grad(): outputs = model(**combined_input) # Process outputs # Note: Adjust this based on the actual model's output format logits = outputs.logits if hasattr(outputs, 'logits') else outputs probabilities = torch.softmax(logits, dim=1) sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative" confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item() return sentiment, confidence, graph # Rest of the Streamlit app (text input, analysis button, etc.) remains the same...