Spaces:
Sleeping
Sleeping
File size: 4,785 Bytes
571bf3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pickle
import matplotlib.pyplot as plt
st.title(":blue[IMDB Dataset of 50k reviews]")
@st.cache_data
def load_data():
return pd.read_csv('IMDB Dataset.csv')
if 'models' not in st.session_state:
st.session_state.models = {}
if 'vectorizer' not in st.session_state:
st.session_state.vectorizer = None
if 'accuracy' not in st.session_state:
st.session_state.accuracy = {}
if 'report' not in st.session_state:
st.session_state.report = {}
# Dataset
st.header("Dataset")
df = load_data()
with st.expander("Show Data"):
st.write(df)
df['sentiment'] = df['sentiment'].map({'positive':1,'negative':0})
X = df['review']
y = df['sentiment']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=41)
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
if not st.session_state.models:
st.session_state.vectorizer = TfidfVectorizer()
X_train_tfidf = st.session_state.vectorizer.fit_transform(X_train)
# models
models = {
# "SVM": SVC(kernel='linear'),
"Logistic Regression": LogisticRegression(max_iter=1000),
"Naive Bayes": MultinomialNB()
}
for name, model in models.items():
model.fit(X_train_tfidf, y_train)
st.session_state.models[name] = model
X_test_tfidf = st.session_state.vectorizer.transform(X_test)
y_pred = model.predict(X_test_tfidf)
st.session_state.accuracy[name] = accuracy_score(y_test, y_pred)
st.session_state.report[name] = classification_report(y_test, y_pred)
if st.session_state.accuracy:
plt.figure(figsize=(10, 5))
plt.bar(st.session_state.accuracy.keys(), st.session_state.accuracy.values(), color=['blue', 'orange', 'green'])
plt.ylabel('Accuracy')
plt.title('Model Accuracy Comparison')
st.pyplot(plt)
for name in st.session_state.report:
st.write(f"### Classification Report for {name}:")
# st.text(st.session_state.report[name])
st.dataframe(st.session_state.report[name])
st.header("Manual Tryouts",divider='orange')
# Input text from the user
user_input = st.text_area("Enter your Review", "")
if st.button("Predict"):
if user_input:
# Vectorize user input for all models
user_input_tfidf = st.session_state.vectorizer.transform([user_input])
# Predict using all models
predictions = {}
for name, model in st.session_state.models.items():
prediction = model.predict(user_input_tfidf)
predictions[name] = "Positive" if prediction[0] == 1 else "Negative"
# Display predictions for each model
st.write("Predicted Sentiment:")
for name in predictions:
st.write(f"{name}: **{predictions[name]}**")
else:
st.write("Please enter a review.")
# # Linear Regression
# st.header('Linear Regression',divider='orange')
# model = LogisticRegression()
# model.fit(X_train_tfidf, y_train)
# y_pred = model.predict(X_test_tfidf)
# print("Accuracy:", accuracy_score(y_test, y_pred))
# print(classification_report(y_test, y_pred))
# filename = 'linear_regression_model.pkl'
# with open(filename, 'wb') as model_file:
# pickle.dump(model, model_file)
# st.write("Accuracy:", accuracy_score(y_test, y_pred))
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True)
# # Naive Bayes
# st.header("Naive Bayes",divider='orange')
# model_nb = MultinomialNB()
# model_nb.fit(X_train_tfidf, y_train)
# # Evaluate the model
# y_pred = model_nb.predict(X_test_tfidf)
# st.write("Accuracy:", accuracy_score(y_test, y_pred))
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True)
# # SVM
# st.header("Support Vector Machine")
# st.caption("Kernal type is linear.")
# model = SVC(kernel='linear') # You can also try 'rbf', 'poly', etc.
# model.fit(X_train_tfidf, y_train)
# y_pred = model.predict(X_test_tfidf)
# st.write("Accuracy:", accuracy_score(y_test, y_pred))
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True)
|