Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

eaglelandsonce commited on Jul 9, 2024

Commit

510db06

verified ·

1 Parent(s): 4bf193c

Update pages/21_GraphRag.py

Browse files

Files changed (1) hide show

pages/21_GraphRag.py +38 -20

pages/21_GraphRag.py CHANGED Viewed

@@ -1,21 +1,27 @@
 import streamlit as st
-from transformers import GraphormerForGraphClassification, GraphormerFeatureExtractor
-from datasets import Dataset
-from transformers.models.graphormer.collating_graphormer import preprocess_item, GraphormerDataCollator
 import torch
 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
 @st.cache_resource
 def load_model():
-    model = GraphormerForGraphClassification.from_pretrained(
-        "clefourrier/pcqm4mv2_graphormer_base",
-        num_classes=2,  # Binary classification (positive/negative sentiment)
-        ignore_mismatched_sizes=True,
     )
-    feature_extractor = GraphormerFeatureExtractor.from_pretrained("clefourrier/pcqm4mv2_graphormer_base")
-    return model, feature_extractor
 def text_to_graph(text):
     words = text.split()
@@ -33,36 +39,48 @@ def text_to_graph(text):
         "num_nodes": len(G.nodes()),
         "node_feat": [[ord(word[0])] for word in words],  # Use ASCII value of first letter as feature
         "edge_attr": [[1] for _ in range(len(G.edges()) * 2)],  # All edges have the same attribute
-        "y": [1]  # Placeholder label, will be ignored during inference
     }
-def analyze_text(text, model, feature_extractor):
     graph = text_to_graph(text)
-    dataset = Dataset.from_dict({"train": [graph]})
-    dataset_processed = dataset.map(preprocess_item, batched=False)
-    inputs = GraphormerDataCollator()(dataset_processed["train"])
-    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.no_grad():
-        outputs = model(**inputs)
-    logits = outputs.logits
     probabilities = torch.softmax(logits, dim=1)
     sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
     confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
     return sentiment, confidence, graph
-st.title("Graph-based Text Analysis")
-model, feature_extractor = load_model()
 text_input = st.text_area("Enter text for analysis:", height=200)
 if st.button("Analyze Text"):
     if text_input:
-        sentiment, confidence, graph = analyze_text(text_input, model, feature_extractor)
         st.write(f"Sentiment: {sentiment}")
         st.write(f"Confidence: {confidence:.2f}")

 import streamlit as st
+from transformers import AutoTokenizer, AutoModel
 import torch
 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
+import graphrag  # Import the graphrag library
 @st.cache_resource
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+    bert_model = AutoModel.from_pretrained("bert-base-uncased")
+    # Initialize GraphRAG model
+    # Note: You may need to adjust these parameters based on GraphRAG's actual interface
+    graph_rag_model = graphrag.GraphRAG(
+        bert_model,
+        num_labels=2,  # For binary sentiment classification
+        num_hidden_layers=2,
+        hidden_size=768,
+        intermediate_size=3072,
     )
+    return tokenizer, graph_rag_model
 def text_to_graph(text):
     words = text.split()
         "num_nodes": len(G.nodes()),
         "node_feat": [[ord(word[0])] for word in words],  # Use ASCII value of first letter as feature
         "edge_attr": [[1] for _ in range(len(G.edges()) * 2)],  # All edges have the same attribute
     }
+def analyze_text(text, tokenizer, model):
+    # Tokenize the text
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    # Create graph representation
     graph = text_to_graph(text)
+    # Combine tokenized input with graph representation
+    # Note: You may need to adjust this based on GraphRAG's actual input requirements
+    combined_input = {
+        "input_ids": inputs["input_ids"],
+        "attention_mask": inputs["attention_mask"],
+        "edge_index": torch.tensor(graph["edge_index"], dtype=torch.long),
+        "node_feat": torch.tensor(graph["node_feat"], dtype=torch.float),
+        "edge_attr": torch.tensor(graph["edge_attr"], dtype=torch.float),
+        "num_nodes": graph["num_nodes"]
+    }
+    # Perform inference
     with torch.no_grad():
+        outputs = model(**combined_input)
+    # Process outputs
+    # Note: Adjust this based on GraphRAG's actual output format
+    logits = outputs.logits if hasattr(outputs, 'logits') else outputs
     probabilities = torch.softmax(logits, dim=1)
     sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
     confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
     return sentiment, confidence, graph
+st.title("GraphRAG-based Text Analysis")
+tokenizer, model = load_model()
 text_input = st.text_area("Enter text for analysis:", height=200)
 if st.button("Analyze Text"):
     if text_input:
+        sentiment, confidence, graph = analyze_text(text_input, tokenizer, model)
         st.write(f"Sentiment: {sentiment}")
         st.write(f"Confidence: {confidence:.2f}")