Spaces:

Steph974
/

Marrakech_sentiment_analysis

Sleeping

App Files Files Community

Steph974 commited on Mar 9, 2024

Commit

35abf20

verified ·

1 Parent(s): bca559d

Upload gradio - Copie.ipynb

Browse files

Files changed (1) hide show

gradio - Copie.ipynb +226 -0

gradio - Copie.ipynb ADDED Viewed

	@@ -0,0 +1,226 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<center>\n",
+    "\n",
+    "## [S. Mussard](https://sites.google.com/view/cv-stphane-mussard/accueil \"Homepage\")\n",
+    "\n",
+    "# UM6P\n",
+    "\n",
+    "# Natural Language Processing: LOGIT\n",
+    "\n",
+    "\n",
+    "<center> <a href=\"https://www.fgses-um6p.ma/\"><img src=\"UM6P.png\",style=\"float: left; max-width: 500px; width: 20\" />\n",
+    "\n",
+    "\n",
+    "\n",
+    "<div align=\"center\"> \n",
+    "<a href=\"https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html\"><img src=\"http://scikit-learn.org/stable/_static/scikit-learn-logo-small.png\" style=\"max-width: 180px; display: inline\" alt=\"Scikit-Learn\"/></a>\n",
+    "</div>\n",
+    "<div align=\"center\"> <a href=\"https://www.python.org/\"><img src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/f/f8/Python_logo_and_wordmark.svg/390px-Python_logo_and_wordmark.svg.png\" style=\"max-width: 150px; display: inline\" alt=\"Python\"/></a> \n",
+    "</div>\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div align=\"center\">\n",
+    "\n",
+    "## Sentiment Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Importation  \n",
+    "\n",
+    "%matplotlib inline \n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn import metrics\n",
+    "import torch\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from transformers import AutoModel, AutoTokenizer\n",
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "\n",
+    "import gradio as gr\n",
+    "from gradio.components import Label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at ./poids were not used when initializing RobertaModel: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']\n",
+      "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of RobertaModel were not initialized from the model checkpoint at ./poids and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "path = \"./weights\"\n",
+    "model = AutoModel.from_pretrained(path, trust_remote_code=True)\n",
+    "class CamembertClass(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(CamembertClass, self).__init__()\n",
+    "        self.l1 = model\n",
+    "        self.dropout = torch.nn.Dropout(0.1)\n",
+    "        self.pre_classifier = torch.nn.Linear(1024, 1024)\n",
+    "        self.classifier = torch.nn.Linear(1024, 3)\n",
+    "\n",
+    "    def forward(self, input_ids, attention_mask, token_type_ids):\n",
+    "        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)\n",
+    "        hidden_state = output_1[0]\n",
+    "        pooler = hidden_state[:, 0]\n",
+    "        pooler = self.pre_classifier(pooler)\n",
+    "        pooler = torch.nn.ReLU()(pooler)\n",
+    "        pooler = self.dropout(pooler)\n",
+    "        output = self.classifier(pooler)\n",
+    "        return output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#model_gradio = CamembertClass()\n",
+    "path = \"./pytorch_model.bin\"\n",
+    "model = torch.load(path, map_location=\"cpu\")\n",
+    "path_tokenizer = \"./\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(path_tokenizer)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#pip install pydantic==1.10.7"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7860\n",
+      "Running on public URL: https://93ecddda8853b625c0.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://93ecddda8853b625c0.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.eval()  # Mettez votre modèle en mode évaluation\n",
+    "\n",
+    "# Fonction d'inférence pour Gradio\n",
+    "def predict(text):\n",
+    "    inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n",
+    "    \n",
+    "    # Extract necessary inputs for the model\n",
+    "    input_ids = inputs['input_ids']\n",
+    "    attention_mask = inputs['attention_mask']\n",
+    "    token_type_ids = inputs.get('token_type_ids', None)  # Some models do not use segment IDs\n",
+    "    \n",
+    "    # Make prediction\n",
+    "    with torch.no_grad():\n",
+    "    # Directly use outputs if your model returns logits directly\n",
+    "        logits = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)\n",
+    "\n",
+    " \n",
+    "    # Convert logits to probabilities\n",
+    "    probabilities = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]\n",
+    "    # Replace the following with your actual classes\n",
+    "    classes = ['Negative Sentiment', 'Positive Sentiment']\n",
+    "    return {classes[i]: float(probabilities[i]) for i in range(len(classes))}\n",
+    "\n",
+    "# Création de l'interface Gradio\n",
+    "iface = gr.Interface(fn=predict,\n",
+    "                     inputs=gr.components.Textbox(placeholder=\"Enter your text here...\"),\n",
+    "                     outputs=gr.components.Label(num_top_classes=2))\n",
+    "iface.launch(share=True)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "hide_input": false,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.8"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {
+    "height": "244px",
+    "width": "252px"
+   },
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": "block",
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}