eaglelandsonce commited on
Commit
cb06d03
·
verified ·
1 Parent(s): 87c68a6

Update pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +59 -26
pages/21_GraphRag.py CHANGED
@@ -1,42 +1,68 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
3
  import torch
 
 
4
  from collections import Counter
5
- import nltk
6
- from nltk.corpus import stopwords
7
 
8
  @st.cache_resource
9
  def load_model():
10
- model_name = "distilbert-base-uncased-finetuned-sst-2-english"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
- return tokenizer, model
 
 
 
14
 
15
- def analyze_sentiment(text, tokenizer, model):
16
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  with torch.no_grad():
18
  outputs = model(**inputs)
19
 
20
- probabilities = torch.softmax(outputs.logits, dim=1)
 
21
  sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
22
  confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
23
 
24
- return sentiment, confidence
25
 
26
- st.title("Text Sentiment Analysis")
27
 
28
- tokenizer, model = load_model()
29
 
30
- lincoln_text = """Abraham Lincoln (⫽ˈlɪŋkən⫽ LING-kən; February 12, 1809 – April 15, 1865) was an American lawyer, politician, and statesman who served as the 16th president of the United States from 1861 until his assassination in 1865. Lincoln led the United States through the American Civil War, defending the nation as a constitutional union, defeating the insurgent Confederacy, playing a major role in the abolition of slavery, expanding the power of the federal government, and modernizing the U.S. economy.
31
- Lincoln was born into poverty in a log cabin in Kentucky and was raised on the frontier, mainly in Indiana. He was self-educated and became a lawyer, Whig Party leader, Illinois state legislator, and U.S. representative from Illinois. In 1849, he returned to his successful law practice in Springfield, Illinois. In 1854, angered by the Kansas–Nebraska Act, which opened the territories to slavery, he re-entered politics. He soon became a leader of the new Republican Party. He reached a national audience in the 1858 Senate campaign debates against Stephen A. Douglas. Lincoln ran for president in 1860, sweeping the North to gain victory. Pro-slavery elements in the South viewed his election as a threat to slavery, and Southern states began seceding from the nation. They formed the Confederate States of America, which began seizing federal military bases in the South. A little over one month after Lincoln assumed the presidency, Confederate forces attacked Fort Sumter, a U.S. fort in South Carolina. Following the bombardment, Lincoln mobilized forces to suppress the rebellion and restore the union.
32
- Lincoln, a moderate Republican, had to navigate a contentious array of factions with friends and opponents from both the Democratic and Republican parties. His allies, the War Democrats and the Radical Republicans, demanded harsh treatment of the Southern Confederates. He managed the factions by exploiting their mutual enmity, carefully distributing political patronage, and by appealing to the American people. Anti-war Democrats (called "Copperheads") despised Lincoln, and some irreconcilable pro-Confederate elements went so far as to plot his assassination. His Gettysburg Address came to be seen as one of the greatest and most influential statements of American national purpose. Lincoln closely supervised the strategy and tactics in the war effort, including the selection of generals, and implemented a naval blockade of the South's trade. He suspended habeas corpus in Maryland and elsewhere, and he averted war with Britain by defusing the Trent Affair. In 1863, he issued the Emancipation Proclamation, which declared the slaves in the states "in rebellion" to be free. It also directed the Army and Navy to "recognize and maintain the freedom of said persons" and to receive them "into the armed service of the United States." Lincoln pressured border states to outlaw slavery, and he promoted the Thirteenth Amendment to the U.S. Constitution, which abolished slavery, except as punishment for a crime.
33
- Lincoln managed his own successful re-election campaign. He sought to heal the war-torn nation through reconciliation. On April 14, 1865, just five days after the Confederate surrender at Appomattox, he was attending a play at Ford's Theatre in Washington, D.C., with his wife, Mary, when he was fatally shot by Confederate sympathizer John Wilkes Booth. Lincoln is remembered as a martyr and a national hero for his wartime leadership and for his efforts to preserve the Union and abolish slavery. Lincoln is often ranked in both popular and scholarly polls as the greatest president in American history."""
34
-
35
- text_input = st.text_area("Enter text for analysis:", value=lincoln_text, height=300)
36
 
37
  if st.button("Analyze Text"):
38
  if text_input:
39
- sentiment, confidence = analyze_sentiment(text_input, tokenizer, model)
40
  st.write(f"Sentiment: {sentiment}")
41
  st.write(f"Confidence: {confidence:.2f}")
42
 
@@ -44,15 +70,22 @@ if st.button("Analyze Text"):
44
  word_count = len(text_input.split())
45
  st.write(f"Word count: {word_count}")
46
 
47
- # Most common words (excluding stop words)
48
- nltk.download('stopwords', quiet=True)
49
- stop_words = set(stopwords.words('english'))
50
-
51
- words = [word.lower() for word in text_input.split() if word.isalnum() and word.lower() not in stop_words]
52
  word_freq = Counter(words).most_common(5)
53
 
54
- st.write("Top 5 most common words (excluding stop words):")
55
  for word, freq in word_freq:
56
  st.write(f"- {word}: {freq}")
 
 
 
 
 
 
 
 
 
 
57
  else:
58
  st.write("Please enter some text to analyze.")
 
1
  import streamlit as st
2
+ from transformers import GraphormerForGraphClassification, GraphormerTokenizer
3
+ from datasets import Dataset
4
+ from transformers.models.graphormer.collating_graphormer import preprocess_item, GraphormerDataCollator
5
  import torch
6
+ import networkx as nx
7
+ import matplotlib.pyplot as plt
8
  from collections import Counter
 
 
9
 
10
  @st.cache_resource
11
  def load_model():
12
+ model = GraphormerForGraphClassification.from_pretrained(
13
+ "clefourrier/pcqm4mv2_graphormer_base",
14
+ num_classes=2, # Binary classification (positive/negative sentiment)
15
+ ignore_mismatched_sizes=True,
16
+ )
17
+ tokenizer = GraphormerTokenizer.from_pretrained("clefourrier/pcqm4mv2_graphormer_base")
18
+ return model, tokenizer
19
 
20
+ def text_to_graph(text):
21
+ words = text.split()
22
+ G = nx.Graph()
23
+ for i, word in enumerate(words):
24
+ G.add_node(i, word=word)
25
+ if i > 0:
26
+ G.add_edge(i-1, i)
27
+
28
+ edge_index = [[e[0] for e in G.edges()] + [e[1] for e in G.edges()],
29
+ [e[1] for e in G.edges()] + [e[0] for e in G.edges()]]
30
+
31
+ return {
32
+ "edge_index": edge_index,
33
+ "num_nodes": len(G.nodes()),
34
+ "node_feat": [[ord(word[0])] for word in words], # Use ASCII value of first letter as feature
35
+ "edge_attr": [[1] for _ in range(len(G.edges()) * 2)], # All edges have the same attribute
36
+ "y": [1] # Placeholder label, will be ignored during inference
37
+ }
38
+
39
+ def analyze_text(text, model, tokenizer):
40
+ graph = text_to_graph(text)
41
+ dataset = Dataset.from_dict({"train": [graph]})
42
+ dataset_processed = dataset.map(preprocess_item, batched=False)
43
+
44
+ inputs = GraphormerDataCollator()(dataset_processed["train"])
45
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
46
+
47
  with torch.no_grad():
48
  outputs = model(**inputs)
49
 
50
+ logits = outputs.logits
51
+ probabilities = torch.softmax(logits, dim=1)
52
  sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
53
  confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
54
 
55
+ return sentiment, confidence, graph
56
 
57
+ st.title("Graph-based Text Analysis")
58
 
59
+ model, tokenizer = load_model()
60
 
61
+ text_input = st.text_area("Enter text for analysis:", height=200)
 
 
 
 
 
62
 
63
  if st.button("Analyze Text"):
64
  if text_input:
65
+ sentiment, confidence, graph = analyze_text(text_input, model, tokenizer)
66
  st.write(f"Sentiment: {sentiment}")
67
  st.write(f"Confidence: {confidence:.2f}")
68
 
 
70
  word_count = len(text_input.split())
71
  st.write(f"Word count: {word_count}")
72
 
73
+ # Most common words
74
+ words = [word.lower() for word in text_input.split() if word.isalnum()]
 
 
 
75
  word_freq = Counter(words).most_common(5)
76
 
77
+ st.write("Top 5 most common words:")
78
  for word, freq in word_freq:
79
  st.write(f"- {word}: {freq}")
80
+
81
+ # Visualize graph
82
+ G = nx.Graph()
83
+ G.add_edges_from(zip(graph["edge_index"][0], graph["edge_index"][1]))
84
+
85
+ plt.figure(figsize=(10, 6))
86
+ nx.draw(G, with_labels=False, node_size=30, node_color='lightblue', edge_color='gray')
87
+ plt.title("Text as Graph")
88
+ st.pyplot(plt)
89
+
90
  else:
91
  st.write("Please enter some text to analyze.")