Spaces:

syedabdullah32
/

midprojectnlp

Build error

App Files Files Community

syedabdullah32 commited on Dec 5, 2023

Commit

13f9afa

1 Parent(s): 47f00bf

Create sentiment_analysis_app.py

Browse files

Files changed (1) hide show

sentiment_analysis_app.py +77 -0

sentiment_analysis_app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+pip install streamlit pandas numpy scikit-learn nltk
+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeClassifier
+import re
+from nltk.corpus import stopwords
+from nltk.stem import SnowballStemmer
+# Download NLTK resources
+import nltk
+nltk.download('stopwords')
+# Load stopwords
+stopword = set(stopwords.words('english'))
+# Load dataset
+data = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/twitter.csv")
+# Map labels
+data["labels"] = data["class"].map({0: "Hate Speech",
+                                    1: "Offensive Language",
+                                    2: "No Hate and Offensive"})
+# Select relevant columns
+data = data[["tweet", "labels"]]
+# Clean text function
+stemmer = SnowballStemmer("english")
+def clean(text):
+    text = str(text).lower()
+    text = re.sub('\[.*?\]', '', text)
+    text = re.sub('https?://\S+|www\.\S+', '', text)
+    text = re.sub('<.*?>+', '', text)
+    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
+    text = re.sub('\n', '', text)
+    text = re.sub('\w*\d\w*', '', text)
+    text = [word for word in text.split(' ') if word not in stopword]
+    text = " ".join(text)
+    text = [stemmer.stem(word) for word in text.split(' ')]
+    text = " ".join(text)
+    return text
+# Apply text cleaning
+data["tweet"] = data["tweet"].apply(clean)
+# Prepare data for model
+x = np.array(data["tweet"])
+y = np.array(data["labels"])
+cv = CountVectorizer()
+X = cv.fit_transform(x)  # Fit the Data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+# Train the model
+clf = DecisionTreeClassifier()
+clf.fit(X_train, y_train)
+# Streamlit app
+st.title("Sentiment Analysis App")
+# User input
+sample = st.text_area("Enter a sentence for sentiment analysis:")
+# Predict and display result
+if st.button("Predict"):
+    sample_cleaned = clean(sample)
+    data_sample = cv.transform([sample_cleaned]).toarray()
+    prediction = clf.predict(data_sample)[0]
+    st.success(f"Sentiment: {prediction}")
+# Display dataset
+st.subheader("Dataset")
+st.write(data.head())
+streamlit run sentiment_analysis_app.py