Spaces:
Build error
Build error
pip install streamlit pandas numpy scikit-learn nltk | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.model_selection import train_test_split | |
from sklearn.tree import DecisionTreeClassifier | |
import re | |
from nltk.corpus import stopwords | |
from nltk.stem import SnowballStemmer | |
# Download NLTK resources | |
import nltk | |
nltk.download('stopwords') | |
# Load stopwords | |
stopword = set(stopwords.words('english')) | |
# Load dataset | |
data = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/twitter.csv") | |
# Map labels | |
data["labels"] = data["class"].map({0: "Hate Speech", | |
1: "Offensive Language", | |
2: "No Hate and Offensive"}) | |
# Select relevant columns | |
data = data[["tweet", "labels"]] | |
# Clean text function | |
stemmer = SnowballStemmer("english") | |
def clean(text): | |
text = str(text).lower() | |
text = re.sub('\[.*?\]', '', text) | |
text = re.sub('https?://\S+|www\.\S+', '', text) | |
text = re.sub('<.*?>+', '', text) | |
text = re.sub('[%s]' % re.escape(string.punctuation), '', text) | |
text = re.sub('\n', '', text) | |
text = re.sub('\w*\d\w*', '', text) | |
text = [word for word in text.split(' ') if word not in stopword] | |
text = " ".join(text) | |
text = [stemmer.stem(word) for word in text.split(' ')] | |
text = " ".join(text) | |
return text | |
# Apply text cleaning | |
data["tweet"] = data["tweet"].apply(clean) | |
# Prepare data for model | |
x = np.array(data["tweet"]) | |
y = np.array(data["labels"]) | |
cv = CountVectorizer() | |
X = cv.fit_transform(x) # Fit the Data | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) | |
# Train the model | |
clf = DecisionTreeClassifier() | |
clf.fit(X_train, y_train) | |
# Streamlit app | |
st.title("Sentiment Analysis App") | |
# User input | |
sample = st.text_area("Enter a sentence for sentiment analysis:") | |
# Predict and display result | |
if st.button("Predict"): | |
sample_cleaned = clean(sample) | |
data_sample = cv.transform([sample_cleaned]).toarray() | |
prediction = clf.predict(data_sample)[0] | |
st.success(f"Sentiment: {prediction}") | |
# Display dataset | |
st.subheader("Dataset") | |
st.write(data.head()) | |
streamlit run sentiment_analysis_app.py | |