Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

TensorFlowClass / pages /21_GraphRag.py

eaglelandsonce

Update pages/21_GraphRag.py

c753736 verified about 1 year ago

raw

history blame

3.59 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModel
	import torch
	import networkx as nx
	import matplotlib.pyplot as plt
	from collections import Counter
	import graphrag
	import inspect

	st.title("GraphRAG Module Exploration and Text Analysis")

	# Diagnostic section
	st.header("GraphRAG Module Contents")
	graphrag_contents = dir(graphrag)
	st.write("Available attributes and methods in graphrag module:")
	for item in graphrag_contents:
	st.write(f"- {item}")
	attr = getattr(graphrag, item)
	if inspect.isclass(attr) or inspect.isfunction(attr):
	st.write(f" Signature: {inspect.signature(attr)}")
	st.write(f" Docstring: {attr.__doc__}")

	# Attempt to find a suitable model class
	model_class = None
	for item in graphrag_contents:
	if 'model' in item.lower():
	model_class = getattr(graphrag, item)
	st.write(f"Found potential model class: {item}")
	break

	if model_class is None:
	st.error("Could not find a suitable model class in graphrag module.")
	st.stop()

	@st.cache_resource
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
	bert_model = AutoModel.from_pretrained("bert-base-uncased")

	# Initialize graphrag model
	# Note: This is a placeholder. Adjust based on the actual model class found
	graph_rag_model = model_class(
	bert_model,
	num_labels=2, # For binary sentiment classification
	# Add or remove parameters based on the actual model's requirements
	)

	return tokenizer, graph_rag_model

	def text_to_graph(text):
	words = text.split()
	G = nx.Graph()
	for i, word in enumerate(words):
	G.add_node(i, word=word)
	if i > 0:
	G.add_edge(i-1, i)

	edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
	[e[1] for e in G.edges()] + [e[0] for e in G.edges()]]

	return {
	"edge_index": edge_index,
	"num_nodes": len(G.nodes()),
	"node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature
	"edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute
	}

	def analyze_text(text, tokenizer, model):
	# Tokenize the text
	inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)

	# Create graph representation
	graph = text_to_graph(text)

	# Combine tokenized input with graph representation
	# Note: This is a placeholder. Adjust based on the actual model's input requirements
	combined_input = {
	"input_ids": inputs["input_ids"],
	"attention_mask": inputs["attention_mask"],
	"edge_index": torch.tensor(graph["edge_index"], dtype=torch.long),
	"node_feat": torch.tensor(graph["node_feat"], dtype=torch.float),
	"edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float),
	"num_nodes": graph["num_nodes"]
	}

	# Perform inference
	with torch.no_grad():
	outputs = model(**combined_input)

	# Process outputs
	# Note: Adjust this based on the actual model's output format
	logits = outputs.logits if hasattr(outputs, 'logits') else outputs
	probabilities = torch.softmax(logits, dim=1)
	sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
	confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()

	return sentiment, confidence, graph

	# Rest of the Streamlit app (text input, analysis button, etc.) remains the same...