Spaces:
Sleeping
Sleeping
Update pages/21_GraphRag.py
Browse files- pages/21_GraphRag.py +40 -79
pages/21_GraphRag.py
CHANGED
@@ -1,97 +1,58 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
from transformers import AutoTokenizer, AutoModel
|
4 |
import torch
|
5 |
-
import
|
6 |
-
import
|
7 |
-
|
8 |
-
# Diagnostic Section
|
9 |
-
st.title("Graphrag Module Investigation")
|
10 |
-
|
11 |
-
st.write("Contents of graphrag module:")
|
12 |
-
module_contents = dir(graphrag)
|
13 |
-
st.write(module_contents)
|
14 |
-
|
15 |
-
st.write("Detailed information about graphrag module contents:")
|
16 |
-
for item in module_contents:
|
17 |
-
attr = getattr(graphrag, item)
|
18 |
-
st.write(f"Name: {item}")
|
19 |
-
st.write(f"Type: {type(attr)}")
|
20 |
-
|
21 |
-
if inspect.isclass(attr):
|
22 |
-
st.write("Class Methods and Attributes:")
|
23 |
-
for name, value in inspect.getmembers(attr):
|
24 |
-
if not name.startswith('_'): # Exclude private methods/attributes
|
25 |
-
st.write(f" - {name}: {type(value)}")
|
26 |
-
|
27 |
-
if callable(attr):
|
28 |
-
st.write("Signature:")
|
29 |
-
st.write(inspect.signature(attr))
|
30 |
-
st.write("Docstring:")
|
31 |
-
st.write(inspect.getdoc(attr))
|
32 |
-
|
33 |
-
st.write("---")
|
34 |
-
|
35 |
-
# Main Application Section
|
36 |
-
st.title("Graphrag Text Analysis")
|
37 |
|
38 |
@st.cache_resource
|
39 |
def load_model():
|
40 |
-
|
41 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
42 |
-
|
43 |
-
|
44 |
-
# Initialize Graphrag model
|
45 |
-
# Note: This part may need to be adjusted based on the actual structure of graphrag
|
46 |
-
model = None
|
47 |
-
for item in module_contents:
|
48 |
-
if 'model' in item.lower() or 'rag' in item.lower():
|
49 |
-
model_class = getattr(graphrag, item)
|
50 |
-
if inspect.isclass(model_class):
|
51 |
-
try:
|
52 |
-
# Attempt to initialize the model
|
53 |
-
# You may need to adjust the parameters based on the actual class signature
|
54 |
-
model = model_class(bert_model)
|
55 |
-
st.success(f"Successfully initialized {item}")
|
56 |
-
break
|
57 |
-
except Exception as e:
|
58 |
-
st.write(f"Tried initializing {item}, but got error: {str(e)}")
|
59 |
-
|
60 |
-
if model is None:
|
61 |
-
st.error("Could not initialize any Graphrag model. Please check the module structure.")
|
62 |
-
|
63 |
return tokenizer, model
|
64 |
|
65 |
-
def
|
66 |
-
if model is None:
|
67 |
-
return "Model not initialized"
|
68 |
-
|
69 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
70 |
with torch.no_grad():
|
71 |
outputs = model(**inputs)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
if
|
76 |
-
logits = outputs.logits
|
77 |
-
elif isinstance(outputs, torch.Tensor):
|
78 |
-
logits = outputs
|
79 |
-
else:
|
80 |
-
return f"Unexpected output format: {type(outputs)}"
|
81 |
|
82 |
-
|
83 |
-
|
|
|
84 |
|
85 |
tokenizer, model = load_model()
|
86 |
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
89 |
if st.button("Analyze Text"):
|
90 |
if text_input:
|
91 |
-
|
92 |
-
st.write(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
else:
|
94 |
-
st.write("Please enter some text to analyze.")
|
95 |
-
|
96 |
-
# Note about sample data
|
97 |
-
st.markdown("Note: To use a CSV file, you would typically upload it and process each row. For simplicity, we're using direct text input in this example.")
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
3 |
import torch
|
4 |
+
from collections import Counter
|
5 |
+
import nltk
|
6 |
+
from nltk.corpus import stopwords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
@st.cache_resource
|
9 |
def load_model():
|
10 |
+
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
return tokenizer, model
|
14 |
|
15 |
+
def analyze_sentiment(text, tokenizer, model):
|
|
|
|
|
|
|
16 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
17 |
with torch.no_grad():
|
18 |
outputs = model(**inputs)
|
19 |
|
20 |
+
probabilities = torch.softmax(outputs.logits, dim=1)
|
21 |
+
sentiment = "Positive" if probabilities[0][1] > probabilities[0][0] else "Negative"
|
22 |
+
confidence = probabilities[0][1].item() if sentiment == "Positive" else probabilities[0][0].item()
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
return sentiment, confidence
|
25 |
+
|
26 |
+
st.title("Text Sentiment Analysis")
|
27 |
|
28 |
tokenizer, model = load_model()
|
29 |
|
30 |
+
lincoln_text = """Abraham Lincoln (⫽ˈlɪŋkən⫽ LING-kən; February 12, 1809 – April 15, 1865) was an American lawyer, politician, and statesman who served as the 16th president of the United States from 1861 until his assassination in 1865. Lincoln led the United States through the American Civil War, defending the nation as a constitutional union, defeating the insurgent Confederacy, playing a major role in the abolition of slavery, expanding the power of the federal government, and modernizing the U.S. economy.
|
31 |
+
Lincoln was born into poverty in a log cabin in Kentucky and was raised on the frontier, mainly in Indiana. He was self-educated and became a lawyer, Whig Party leader, Illinois state legislator, and U.S. representative from Illinois. In 1849, he returned to his successful law practice in Springfield, Illinois. In 1854, angered by the Kansas–Nebraska Act, which opened the territories to slavery, he re-entered politics. He soon became a leader of the new Republican Party. He reached a national audience in the 1858 Senate campaign debates against Stephen A. Douglas. Lincoln ran for president in 1860, sweeping the North to gain victory. Pro-slavery elements in the South viewed his election as a threat to slavery, and Southern states began seceding from the nation. They formed the Confederate States of America, which began seizing federal military bases in the South. A little over one month after Lincoln assumed the presidency, Confederate forces attacked Fort Sumter, a U.S. fort in South Carolina. Following the bombardment, Lincoln mobilized forces to suppress the rebellion and restore the union.
|
32 |
+
Lincoln, a moderate Republican, had to navigate a contentious array of factions with friends and opponents from both the Democratic and Republican parties. His allies, the War Democrats and the Radical Republicans, demanded harsh treatment of the Southern Confederates. He managed the factions by exploiting their mutual enmity, carefully distributing political patronage, and by appealing to the American people. Anti-war Democrats (called "Copperheads") despised Lincoln, and some irreconcilable pro-Confederate elements went so far as to plot his assassination. His Gettysburg Address came to be seen as one of the greatest and most influential statements of American national purpose. Lincoln closely supervised the strategy and tactics in the war effort, including the selection of generals, and implemented a naval blockade of the South's trade. He suspended habeas corpus in Maryland and elsewhere, and he averted war with Britain by defusing the Trent Affair. In 1863, he issued the Emancipation Proclamation, which declared the slaves in the states "in rebellion" to be free. It also directed the Army and Navy to "recognize and maintain the freedom of said persons" and to receive them "into the armed service of the United States." Lincoln pressured border states to outlaw slavery, and he promoted the Thirteenth Amendment to the U.S. Constitution, which abolished slavery, except as punishment for a crime.
|
33 |
+
Lincoln managed his own successful re-election campaign. He sought to heal the war-torn nation through reconciliation. On April 14, 1865, just five days after the Confederate surrender at Appomattox, he was attending a play at Ford's Theatre in Washington, D.C., with his wife, Mary, when he was fatally shot by Confederate sympathizer John Wilkes Booth. Lincoln is remembered as a martyr and a national hero for his wartime leadership and for his efforts to preserve the Union and abolish slavery. Lincoln is often ranked in both popular and scholarly polls as the greatest president in American history."""
|
34 |
+
|
35 |
+
text_input = st.text_area("Enter text for analysis:", value=lincoln_text, height=300)
|
36 |
+
|
37 |
if st.button("Analyze Text"):
|
38 |
if text_input:
|
39 |
+
sentiment, confidence = analyze_sentiment(text_input, tokenizer, model)
|
40 |
+
st.write(f"Sentiment: {sentiment}")
|
41 |
+
st.write(f"Confidence: {confidence:.2f}")
|
42 |
+
|
43 |
+
# Additional analysis
|
44 |
+
word_count = len(text_input.split())
|
45 |
+
st.write(f"Word count: {word_count}")
|
46 |
+
|
47 |
+
# Most common words (excluding stop words)
|
48 |
+
nltk.download('stopwords', quiet=True)
|
49 |
+
stop_words = set(stopwords.words('english'))
|
50 |
+
|
51 |
+
words = [word.lower() for word in text_input.split() if word.isalnum() and word.lower() not in stop_words]
|
52 |
+
word_freq = Counter(words).most_common(5)
|
53 |
+
|
54 |
+
st.write("Top 5 most common words (excluding stop words):")
|
55 |
+
for word, freq in word_freq:
|
56 |
+
st.write(f"- {word}: {freq}")
|
57 |
else:
|
58 |
+
st.write("Please enter some text to analyze.")
|
|
|
|
|
|