Spaces:

Steph974
/

Marrakech_sentiment_analysis

Sleeping

App Files Files Community

Steph974 commited on Mar 9, 2024

Commit

4aa6561

verified ·

1 Parent(s): da28fbe

Upload 5 files

Browse files

Files changed (5) hide show

config.json +31 -0
gradio.ipynb +306 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "roberta-large",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 1,
+  "type_vocab_size": 1,
+  "vocab_size": 50265,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  }
+}

gradio.ipynb ADDED Viewed

	@@ -0,0 +1,306 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<center>\n",
+    "\n",
+    "## [S. Mussard](https://sites.google.com/view/cv-stphane-mussard/accueil \"Homepage\")\n",
+    "\n",
+    "# UM6P\n",
+    "\n",
+    "# Natural Language Processing: LOGIT\n",
+    "\n",
+    "\n",
+    "<center> <a href=\"https://www.fgses-um6p.ma/\"><img src=\"UM6P.png\",style=\"float: left; max-width: 500px; width: 20\" />\n",
+    "\n",
+    "\n",
+    "\n",
+    "<div align=\"center\"> \n",
+    "<a href=\"https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html\"><img src=\"http://scikit-learn.org/stable/_static/scikit-learn-logo-small.png\" style=\"max-width: 180px; display: inline\" alt=\"Scikit-Learn\"/></a>\n",
+    "</div>\n",
+    "<div align=\"center\"> <a href=\"https://www.python.org/\"><img src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/f/f8/Python_logo_and_wordmark.svg/390px-Python_logo_and_wordmark.svg.png\" style=\"max-width: 150px; display: inline\" alt=\"Python\"/></a> \n",
+    "</div>\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div align=\"center\">\n",
+    "\n",
+    "## Sentiment Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\smussa01\\AppData\\Roaming\\Python\\Python37\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Importation  \n",
+    "\n",
+    "%matplotlib inline \n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn import metrics\n",
+    "import torch\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from transformers import AutoModel, AutoTokenizer\n",
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "\n",
+    "import gradio as gr\n",
+    "from gradio.components import Label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at S:\\Mes Documents\\Cours\\Cours-NLP\\PFE kenza\\poids were not used when initializing RobertaModel: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']\n",
+      "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at S:\\Mes Documents\\Cours\\Cours-NLP\\PFE kenza\\poids and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "path = \".\\poids\"\n",
+    "model = AutoModel.from_pretrained(path, trust_remote_code=True)\n",
+    "class CamembertClass(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(CamembertClass, self).__init__()\n",
+    "        self.l1 = model\n",
+    "        self.dropout = torch.nn.Dropout(0.1)\n",
+    "        self.pre_classifier = torch.nn.Linear(1024, 1024)\n",
+    "        self.classifier = torch.nn.Linear(1024, 3)\n",
+    "\n",
+    "    def forward(self, input_ids, attention_mask, token_type_ids):\n",
+    "        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)\n",
+    "        hidden_state = output_1[0]\n",
+    "        pooler = hidden_state[:, 0]\n",
+    "        pooler = self.pre_classifier(pooler)\n",
+    "        pooler = torch.nn.ReLU()(pooler)\n",
+    "        pooler = self.dropout(pooler)\n",
+    "        output = self.classifier(pooler)\n",
+    "        return output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#model_gradio = CamembertClass()\n",
+    "path = \"S:\\Mes Documents\\Cours\\Cours-NLP\\PFE kenza\\pytorch_model.bin\"\n",
+    "model = torch.load(path, map_location=\"cpu\")\n",
+    "path_tokenizer = \"S:\\Mes Documents\\Cours\\Cours-NLP\\PFE kenza\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(path_tokenizer)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#pip install pydantic==1.10.7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
+      "Running on public URL: https://c6de28517ce6caf32f.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://c6de28517ce6caf32f.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.eval()  # Mettez votre modèle en mode évaluation\n",
+    "\n",
+    "# Fonction d'inférence pour Gradio\n",
+    "def predict(text):\n",
+    "    inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n",
+    "    \n",
+    "    # Extract necessary inputs for the model\n",
+    "    input_ids = inputs['input_ids']\n",
+    "    attention_mask = inputs['attention_mask']\n",
+    "    token_type_ids = inputs.get('token_type_ids', None)  # Some models do not use segment IDs\n",
+    "    \n",
+    "    # Make prediction\n",
+    "    with torch.no_grad():\n",
+    "    # Directly use outputs if your model returns logits directly\n",
+    "        logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)\n",
+    "\n",
+    " \n",
+    "    # Convert logits to probabilities\n",
+    "    probabilities = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]\n",
+    "    # Replace the following with your actual classes\n",
+    "    classes = ['Negative Sentiment', 'Positive Sentiment']\n",
+    "    return {classes[i]: float(probabilities[i]) for i in range(len(classes))}\n",
+    "\n",
+    "# Création de l'interface Gradio\n",
+    "iface = gr.Interface(fn=predict,\n",
+    "                     inputs=gr.components.Textbox(placeholder=\"Enter your text here...\"),\n",
+    "                     outputs=gr.components.Label(num_top_classes=2))\n",
+    "iface.launch(share=True)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###  <span style=\"color:blue\">Dataset importation : absences.csv</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Negative Sentiment': 0.8629835844039917,\n",
+       " 'Positive Sentiment': 0.1370164006948471}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(\"Marrakech is a poop\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7868\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7868/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def image_clf(inp):\n",
+    "    return {'cat': 0.3 , 'dog': 0.7}\n",
+    "demo = gr.Interface(fn=image_clf, inputs=\"image\", outputs=\"label\")\n",
+    "demo.launch()\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.8"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {
+    "height": "244px",
+    "width": "252px"
+   },
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b89cef2de03b23b80a2163335e82b692af1e92a8ff30d318dfd17e017f1fa63
+size 1425885920

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff