Spaces:
Sleeping
Sleeping
Update pages/21_GraphRag.py
Browse files- pages/21_GraphRag.py +59 -26
pages/21_GraphRag.py
CHANGED
@@ -1,42 +1,68 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import
|
|
|
|
|
3 |
import torch
|
|
|
|
|
4 |
from collections import Counter
|
5 |
-
import nltk
|
6 |
-
from nltk.corpus import stopwords
|
7 |
|
8 |
@st.cache_resource
|
9 |
def load_model():
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
14 |
|
15 |
-
def
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
with torch.no_grad():
|
18 |
outputs = model(**inputs)
|
19 |
|
20 |
-
|
|
|
21 |
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
|
22 |
confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
|
23 |
|
24 |
-
return sentiment, confidence
|
25 |
|
26 |
-
st.title("Text
|
27 |
|
28 |
-
|
29 |
|
30 |
-
|
31 |
-
Lincoln was born into poverty in a log cabin in Kentucky and was raised on the frontier, mainly in Indiana. He was self-educated and became a lawyer, Whig Party leader, Illinois state legislator, and U.S. representative from Illinois. In 1849, he returned to his successful law practice in Springfield, Illinois. In 1854, angered by the Kansas–Nebraska Act, which opened the territories to slavery, he re-entered politics. He soon became a leader of the new Republican Party. He reached a national audience in the 1858 Senate campaign debates against Stephen A. Douglas. Lincoln ran for president in 1860, sweeping the North to gain victory. Pro-slavery elements in the South viewed his election as a threat to slavery, and Southern states began seceding from the nation. They formed the Confederate States of America, which began seizing federal military bases in the South. A little over one month after Lincoln assumed the presidency, Confederate forces attacked Fort Sumter, a U.S. fort in South Carolina. Following the bombardment, Lincoln mobilized forces to suppress the rebellion and restore the union.
|
32 |
-
Lincoln, a moderate Republican, had to navigate a contentious array of factions with friends and opponents from both the Democratic and Republican parties. His allies, the War Democrats and the Radical Republicans, demanded harsh treatment of the Southern Confederates. He managed the factions by exploiting their mutual enmity, carefully distributing political patronage, and by appealing to the American people. Anti-war Democrats (called "Copperheads") despised Lincoln, and some irreconcilable pro-Confederate elements went so far as to plot his assassination. His Gettysburg Address came to be seen as one of the greatest and most influential statements of American national purpose. Lincoln closely supervised the strategy and tactics in the war effort, including the selection of generals, and implemented a naval blockade of the South's trade. He suspended habeas corpus in Maryland and elsewhere, and he averted war with Britain by defusing the Trent Affair. In 1863, he issued the Emancipation Proclamation, which declared the slaves in the states "in rebellion" to be free. It also directed the Army and Navy to "recognize and maintain the freedom of said persons" and to receive them "into the armed service of the United States." Lincoln pressured border states to outlaw slavery, and he promoted the Thirteenth Amendment to the U.S. Constitution, which abolished slavery, except as punishment for a crime.
|
33 |
-
Lincoln managed his own successful re-election campaign. He sought to heal the war-torn nation through reconciliation. On April 14, 1865, just five days after the Confederate surrender at Appomattox, he was attending a play at Ford's Theatre in Washington, D.C., with his wife, Mary, when he was fatally shot by Confederate sympathizer John Wilkes Booth. Lincoln is remembered as a martyr and a national hero for his wartime leadership and for his efforts to preserve the Union and abolish slavery. Lincoln is often ranked in both popular and scholarly polls as the greatest president in American history."""
|
34 |
-
|
35 |
-
text_input = st.text_area("Enter text for analysis:", value=lincoln_text, height=300)
|
36 |
|
37 |
if st.button("Analyze Text"):
|
38 |
if text_input:
|
39 |
-
sentiment, confidence =
|
40 |
st.write(f"Sentiment: {sentiment}")
|
41 |
st.write(f"Confidence: {confidence:.2f}")
|
42 |
|
@@ -44,15 +70,22 @@ if st.button("Analyze Text"):
|
|
44 |
word_count = len(text_input.split())
|
45 |
st.write(f"Word count: {word_count}")
|
46 |
|
47 |
-
# Most common words
|
48 |
-
|
49 |
-
stop_words = set(stopwords.words('english'))
|
50 |
-
|
51 |
-
words = [word.lower() for word in text_input.split() if word.isalnum() and word.lower() not in stop_words]
|
52 |
word_freq = Counter(words).most_common(5)
|
53 |
|
54 |
-
st.write("Top 5 most common words
|
55 |
for word, freq in word_freq:
|
56 |
st.write(f"- {word}: {freq}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
else:
|
58 |
st.write("Please enter some text to analyze.")
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import GraphormerForGraphClassification, GraphormerTokenizer
|
3 |
+
from datasets import Dataset
|
4 |
+
from transformers.models.graphormer.collating_graphormer import preprocess_item, GraphormerDataCollator
|
5 |
import torch
|
6 |
+
import networkx as nx
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
from collections import Counter
|
|
|
|
|
9 |
|
10 |
@st.cache_resource
|
11 |
def load_model():
|
12 |
+
model = GraphormerForGraphClassification.from_pretrained(
|
13 |
+
"clefourrier/pcqm4mv2_graphormer_base",
|
14 |
+
num_classes=2, # Binary classification (positive/negative sentiment)
|
15 |
+
ignore_mismatched_sizes=True,
|
16 |
+
)
|
17 |
+
tokenizer = GraphormerTokenizer.from_pretrained("clefourrier/pcqm4mv2_graphormer_base")
|
18 |
+
return model, tokenizer
|
19 |
|
20 |
+
def text_to_graph(text):
|
21 |
+
words = text.split()
|
22 |
+
G = nx.Graph()
|
23 |
+
for i, word in enumerate(words):
|
24 |
+
G.add_node(i, word=word)
|
25 |
+
if i > 0:
|
26 |
+
G.add_edge(i-1, i)
|
27 |
+
|
28 |
+
edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
|
29 |
+
[e[1] for e in G.edges()] + [e[0] for e in G.edges()]]
|
30 |
+
|
31 |
+
return {
|
32 |
+
"edge_index": edge_index,
|
33 |
+
"num_nodes": len(G.nodes()),
|
34 |
+
"node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature
|
35 |
+
"edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute
|
36 |
+
"y": [1] # Placeholder label, will be ignored during inference
|
37 |
+
}
|
38 |
+
|
39 |
+
def analyze_text(text, model, tokenizer):
|
40 |
+
graph = text_to_graph(text)
|
41 |
+
dataset = Dataset.from_dict({"train": [graph]})
|
42 |
+
dataset_processed = dataset.map(preprocess_item, batched=False)
|
43 |
+
|
44 |
+
inputs = GraphormerDataCollator()(dataset_processed["train"])
|
45 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
46 |
+
|
47 |
with torch.no_grad():
|
48 |
outputs = model(**inputs)
|
49 |
|
50 |
+
logits = outputs.logits
|
51 |
+
probabilities = torch.softmax(logits, dim=1)
|
52 |
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
|
53 |
confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
|
54 |
|
55 |
+
return sentiment, confidence, graph
|
56 |
|
57 |
+
st.title("Graph-based Text Analysis")
|
58 |
|
59 |
+
model, tokenizer = load_model()
|
60 |
|
61 |
+
text_input = st.text_area("Enter text for analysis:", height=200)
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
if st.button("Analyze Text"):
|
64 |
if text_input:
|
65 |
+
sentiment, confidence, graph = analyze_text(text_input, model, tokenizer)
|
66 |
st.write(f"Sentiment: {sentiment}")
|
67 |
st.write(f"Confidence: {confidence:.2f}")
|
68 |
|
|
|
70 |
word_count = len(text_input.split())
|
71 |
st.write(f"Word count: {word_count}")
|
72 |
|
73 |
+
# Most common words
|
74 |
+
words = [word.lower() for word in text_input.split() if word.isalnum()]
|
|
|
|
|
|
|
75 |
word_freq = Counter(words).most_common(5)
|
76 |
|
77 |
+
st.write("Top 5 most common words:")
|
78 |
for word, freq in word_freq:
|
79 |
st.write(f"- {word}: {freq}")
|
80 |
+
|
81 |
+
# Visualize graph
|
82 |
+
G = nx.Graph()
|
83 |
+
G.add_edges_from(zip(graph["edge_index"][0], graph["edge_index"][1]))
|
84 |
+
|
85 |
+
plt.figure(figsize=(10, 6))
|
86 |
+
nx.draw(G, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
|
87 |
+
plt.title("Text as Graph")
|
88 |
+
st.pyplot(plt)
|
89 |
+
|
90 |
else:
|
91 |
st.write("Please enter some text to analyze.")
|