diff --git "a/MLOps NLP.ipynb" "b/MLOps NLP.ipynb" new file mode 100644--- /dev/null +++ "b/MLOps NLP.ipynb" @@ -0,0 +1,879 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Importing Libraries***" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import preprocess_kgptalkie as ps" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | review | \n", + "sentiment | \n", + "
---|---|---|
0 | \n", + "One of the other reviewers has mentioned that ... | \n", + "positive | \n", + "
1 | \n", + "A wonderful little production. <br /><br />The... | \n", + "positive | \n", + "
2 | \n", + "I thought this was a wonderful way to spend ti... | \n", + "positive | \n", + "
3 | \n", + "Basically there's a family where a little boy ... | \n", + "negative | \n", + "
4 | \n", + "Petter Mattei's \"Love in the Time of Money\" is... | \n", + "positive | \n", + "
... | \n", + "... | \n", + "... | \n", + "
49995 | \n", + "I thought this movie did a down right good job... | \n", + "positive | \n", + "
49996 | \n", + "Bad plot, bad dialogue, bad acting, idiotic di... | \n", + "negative | \n", + "
49997 | \n", + "I am a Catholic taught in parochial elementary... | \n", + "negative | \n", + "
49998 | \n", + "I'm going to have to disagree with the previou... | \n", + "negative | \n", + "
49999 | \n", + "No one expects the Star Trek movies to be high... | \n", + "negative | \n", + "
50000 rows × 2 columns
\n", + "\n", + " | review | \n", + "sentiment | \n", + "
---|---|---|
0 | \n", + "One of the other reviewers has mentioned that ... | \n", + "positive | \n", + "
1 | \n", + "A wonderful little production. <br /><br />The... | \n", + "positive | \n", + "
2 | \n", + "I thought this was a wonderful way to spend ti... | \n", + "positive | \n", + "
3 | \n", + "Basically there's a family where a little boy ... | \n", + "negative | \n", + "
4 | \n", + "Petter Mattei's \"Love in the Time of Money\" is... | \n", + "positive | \n", + "
\n", + " | review | \n", + "sentiment | \n", + "word_counts | \n", + "char_counts | \n", + "avg_wordlength | \n", + "stopwords_counts | \n", + "review_without_stopwords | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "one of the other reviewers has mentioned that ... | \n", + "positive | \n", + "307 | \n", + "1455 | \n", + "4.739414 | \n", + "135 | \n", + "reviewers mentioned watching 1 oz episode you'... | \n", + "
1 | \n", + "a wonderful little production. the filming tec... | \n", + "positive | \n", + "162 | \n", + "837 | \n", + "5.166667 | \n", + "71 | \n", + "wonderful little production. filming technique... | \n", + "
2 | \n", + "i thought this was a wonderful way to spend ti... | \n", + "positive | \n", + "166 | \n", + "761 | \n", + "4.584337 | \n", + "81 | \n", + "thought wonderful way spend time hot summer we... | \n", + "
3 | \n", + "basically there's a family where a little boy ... | \n", + "negative | \n", + "138 | \n", + "611 | \n", + "4.427536 | \n", + "63 | \n", + "basically there's family little boy (jake) thi... | \n", + "
4 | \n", + "petter mattei's \"love in the time of money\" is... | \n", + "positive | \n", + "230 | \n", + "1088 | \n", + "4.730435 | \n", + "107 | \n", + "petter mattei's \"love time money\" visually stu... | \n", + "
Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])
TfidfVectorizer()
LinearSVC()