Spaces:

menikev
/

TestApp

Sleeping

App Files Files Community

menikev commited on Apr 15, 2024

Commit

ec4a718

verified ·

1 Parent(s): d2ed505

Upload full_inference_pipeline.ipynb

Browse files

Files changed (1) hide show

notebook/full_inference_pipeline.ipynb +989 -0

notebook/full_inference_pipeline.ipynb ADDED Viewed

	@@ -0,0 +1,989 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "! pip install faknow sentence-transformers chromadb\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "83T0FpMEgAK7",
+        "outputId": "4efafed8-69d4-4575-b473-825e6931b4c5"
+      },
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: faknow in /usr/local/lib/python3.10/dist-packages (0.0.3)\n",
+            "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (2.6.1)\n",
+            "Requirement already satisfied: chromadb in /usr/local/lib/python3.10/dist-packages (0.4.24)\n",
+            "Requirement already satisfied: transformers>=4.26.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.38.2)\n",
+            "Requirement already satisfied: numpy>=1.23.4 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.25.2)\n",
+            "Requirement already satisfied: pandas>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.5.3)\n",
+            "Requirement already satisfied: scikit-learn>=1.1.3 in /usr/local/lib/python3.10/dist-packages (from faknow) (1.2.2)\n",
+            "Requirement already satisfied: tensorboard>=2.10.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (2.15.2)\n",
+            "Requirement already satisfied: tqdm>=4.64.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.66.2)\n",
+            "Requirement already satisfied: jieba>=0.42.1 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.42.1)\n",
+            "Requirement already satisfied: gensim>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (4.3.2)\n",
+            "Requirement already satisfied: pillow>=9.3.0 in /usr/local/lib/python3.10/dist-packages (from faknow) (9.4.0)\n",
+            "Requirement already satisfied: nltk>=3.7 in /usr/local/lib/python3.10/dist-packages (from faknow) (3.8.1)\n",
+            "Requirement already satisfied: sphinx-markdown-tables>=0.0.17 in /usr/local/lib/python3.10/dist-packages (from faknow) (0.0.17)\n",
+            "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.2.1+cu121)\n",
+            "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.11.4)\n",
+            "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.20.3)\n",
+            "Requirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.2.1)\n",
+            "Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.10/dist-packages (from chromadb) (2.31.0)\n",
+            "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (2.6.4)\n",
+            "Requirement already satisfied: chroma-hnswlib==0.7.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.7.3)\n",
+            "Requirement already satisfied: fastapi>=0.95.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.110.0)\n",
+            "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.29.0)\n",
+            "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.5.0)\n",
+            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.10.0)\n",
+            "Requirement already satisfied: pulsar-client>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.4.0)\n",
+            "Requirement already satisfied: onnxruntime>=1.14.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.17.1)\n",
+            "Requirement already satisfied: opentelemetry-api>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
+            "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
+            "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.45b0)\n",
+            "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
+            "Requirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.15.2)\n",
+            "Requirement already satisfied: pypika>=0.48.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.48.9)\n",
+            "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (7.7.0)\n",
+            "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.4.0)\n",
+            "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.62.1)\n",
+            "Requirement already satisfied: bcrypt>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.2)\n",
+            "Requirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.9.4)\n",
+            "Requirement already satisfied: kubernetes>=28.1.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (29.0.0)\n",
+            "Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (8.2.3)\n",
+            "Requirement already satisfied: PyYAML>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.0.1)\n",
+            "Requirement already satisfied: mmh3>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.0)\n",
+            "Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.10.0)\n",
+            "Requirement already satisfied: packaging>=19.1 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (24.0)\n",
+            "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.0.0)\n",
+            "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n",
+            "Requirement already satisfied: starlette<0.37.0,>=0.36.3 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (0.36.3)\n",
+            "Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim>=4.2.0->faknow) (6.4.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (3.13.3)\n",
+            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2023.6.0)\n",
+            "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2024.2.2)\n",
+            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n",
+            "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n",
+            "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.27.0)\n",
+            "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.7.0)\n",
+            "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.4.1)\n",
+            "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n",
+            "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.0.7)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (8.1.7)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (1.3.2)\n",
+            "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk>=3.7->faknow) (2023.12.25)\n",
+            "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n",
+            "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n",
+            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n",
+            "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n",
+            "Requirement already satisfied: importlib-metadata<=7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (7.0.0)\n",
+            "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.0)\n",
+            "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.24.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.24.0)\n",
+            "Requirement already satisfied: opentelemetry-proto==1.24.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.24.0)\n",
+            "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
+            "Requirement already satisfied: opentelemetry-instrumentation==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
+            "Requirement already satisfied: opentelemetry-semantic-conventions==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
+            "Requirement already satisfied: opentelemetry-util-http==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
+            "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n",
+            "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n",
+            "Requirement already satisfied: asgiref~=3.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-asgi==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n",
+            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.5.2->faknow) (2023.4)\n",
+            "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n",
+            "Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n",
+            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb) (0.6.0)\n",
+            "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb) (2.16.3)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.6)\n",
+            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.1.3->faknow) (3.4.0)\n",
+            "Requirement already satisfied: markdown>=3.4 in /usr/local/lib/python3.10/dist-packages (from sphinx-markdown-tables>=0.0.17->faknow) (3.6)\n",
+            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.4.0)\n",
+            "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (1.2.0)\n",
+            "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (0.7.2)\n",
+            "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.10.0->faknow) (3.0.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.2.1)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.3)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (8.9.2.26)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.19.3 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.19.3)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n",
+            "Requirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.2.0)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers) (12.4.99)\n",
+            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.26.1->faknow) (0.4.2)\n",
+            "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n",
+            "Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n",
+            "Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.1)\n",
+            "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n",
+            "Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.21.0)\n",
+            "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n",
+            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n",
+            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.0)\n",
+            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.18.1)\n",
+            "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (3.7.1)\n",
+            "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.10.0->faknow) (2.1.5)\n",
+            "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n",
+            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n",
+            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (1.3.1)\n",
+            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (1.2.0)\n",
+            "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.6.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "kG2sAMShgAOV"
+      },
+      "execution_count": 27,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import os\n",
+        "import chromadb\n",
+        "from chromadb.utils import embedding_functions\n",
+        "import math\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def create_domain_identification_database(vdb_path: str,collection_name:str , df: pd.DataFrame) -> None:\n",
+        "    \"\"\"This function processes the dataframe into the required format, and then creates the following collections in a ChromaDB instance\n",
+        "    1. domain_identification_collection - Contains input text embeddings, and the metadata the other columns\n",
+        "\n",
+        "    Args:\n",
+        "        collection_name (str) : name of database collection\n",
+        "        vdb_path (str): Relative path of the location of the ChromaDB instance.\n",
+        "        df (pd.DataFrame): task scheduling dataset.\n",
+        "\n",
+        "    \"\"\"\n",
+        "\n",
+        "    #identify the saving location of the ChromaDB\n",
+        "    chroma_client = chromadb.PersistentClient(path=vdb_path)\n",
+        "\n",
+        "    #extract the embedding from hugging face\n",
+        "    embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=\"sentence-transformers/LaBSE\")\n",
+        "\n",
+        "    #creating the collection\n",
+        "    domain_identification_collection = chroma_client.create_collection(\n",
+        "        name=collection_name,\n",
+        "        embedding_function=embedding_function,\n",
+        "    )\n",
+        "\n",
+        "\n",
+        "    # the main text \"query\" that will be embedded\n",
+        "    domain_identification_documents = [row.query for row in df.itertuples()]\n",
+        "\n",
+        "    # the metadata\n",
+        "    domain_identification_metadata = [\n",
+        "        {\"domain\": row.domain , \"label\": row.label}\n",
+        "        for row in df.itertuples()\n",
+        "    ]\n",
+        "\n",
+        "    #index\n",
+        "    domain_ids = [\"domain_id \" + str(row.Index) for row in df.itertuples()]\n",
+        "\n",
+        "\n",
+        "    length = len(df)\n",
+        "    num_iteration = length / 166\n",
+        "    num_iteration = math.ceil(num_iteration)\n",
+        "\n",
+        "    start = 0\n",
+        "    # start adding the the vectors\n",
+        "    for i in range(num_iteration):\n",
+        "        if i == num_iteration - 1 :\n",
+        "            domain_identification_collection.add(documents=domain_identification_documents[start:], metadatas=domain_identification_metadata[start:], ids=domain_ids[start:])\n",
+        "        else:\n",
+        "            end = start + 166\n",
+        "            domain_identification_collection.add(documents=domain_identification_documents[start:end], metadatas=domain_identification_metadata[start:end], ids=domain_ids[start:end])\n",
+        "            start = end\n",
+        "    return None\n",
+        "\n",
+        "\n",
+        "\n",
+        "def delete_collection_from_vector_db(vdb_path: str, collection_name: str) -> None:\n",
+        "    \"\"\"Deletes a particular collection from the persistent ChromaDB instance.\n",
+        "\n",
+        "    Args:\n",
+        "        vdb_path (str): Path of the persistent ChromaDB instance.\n",
+        "        collection_name (str): Name of the collection to be deleted.\n",
+        "    \"\"\"\n",
+        "    chroma_client = chromadb.PersistentClient(path=vdb_path)\n",
+        "    chroma_client.delete_collection(collection_name)\n",
+        "    return None\n",
+        "\n",
+        "\n",
+        "def list_collections_from_vector_db(vdb_path: str) -> None:\n",
+        "    \"\"\"Lists all the available collections from the persistent ChromaDB instance.\n",
+        "\n",
+        "    Args:\n",
+        "        vdb_path (str): Path of the persistent ChromaDB instance.\n",
+        "    \"\"\"\n",
+        "    chroma_client = chromadb.PersistentClient(path=vdb_path)\n",
+        "    print(chroma_client.list_collections())\n",
+        "\n",
+        "\n",
+        "def get_collection_from_vector_db(\n",
+        "    vdb_path: str, collection_name: str\n",
+        ") -> chromadb.Collection:\n",
+        "    \"\"\"Fetches a particular ChromaDB collection object from the persistent ChromaDB instance.\n",
+        "\n",
+        "    Args:\n",
+        "        vdb_path (str): Path of the persistent ChromaDB instance.\n",
+        "        collection_name (str): Name of the collection which needs to be retrieved.\n",
+        "    \"\"\"\n",
+        "    chroma_client = chromadb.PersistentClient(path=vdb_path)\n",
+        "\n",
+        "    huggingface_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=\"sentence-transformers/LaBSE\")\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "    collection = chroma_client.get_collection(\n",
+        "        name=collection_name, embedding_function=huggingface_ef\n",
+        "    )\n",
+        "\n",
+        "    return collection\n",
+        "\n",
+        "\n",
+        "def retrieval( input_text : str,\n",
+        "              num_results : int,\n",
+        "              collection: chromadb.Collection ):\n",
+        "\n",
+        "    \"\"\"fetches the domain name from the collection based on the semantic similarity\n",
+        "\n",
+        "    args:\n",
+        "        input_text : the received text  which can be news , posts , or tweets\n",
+        "        num_results : number of fetched examples from the collection\n",
+        "        collection : the extracted collection from the database that we will fetch examples from\n",
+        "\n",
+        "    \"\"\"\n",
+        "\n",
+        "\n",
+        "    fetched_domain = collection.query(\n",
+        "            query_texts = [input_text],\n",
+        "            n_results = num_results,\n",
+        "            )\n",
+        "\n",
+        "    #extracting domain name  and label from the featched domains\n",
+        "\n",
+        "    domain = fetched_domain[\"metadatas\"][0][0][\"domain\"]\n",
+        "    label = fetched_domain[\"metadatas\"][0][0][\"label\"]\n",
+        "    distance = fetched_domain[\"distances\"][0][0]\n",
+        "\n",
+        "    return domain , label , distance"
+      ],
+      "metadata": {
+        "id": "-_UqusZqgAQP"
+      },
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import pipeline\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def english_information_extraction(text: str):\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "  zeroshot_classifier = pipeline(\"zero-shot-classification\", model=\"MoritzLaurer/deberta-v3-large-zeroshot-v1.1-all-33\")\n",
+        "\n",
+        "  hypothesis_template_domain = \"This text is about {}\"\n",
+        "  domain_classes = [\"women\" , \"muslims\" , \"tamil\" , \"sinhala\" , \"other\"]\n",
+        "  domains_output= zeroshot_classifier(text, domain_classes , hypothesis_template=hypothesis_template_domain, multi_label=False)\n",
+        "\n",
+        "  sentiment_discrimination_prompt = f\"the content of this text about {domains_output['labels'][0]} \"\n",
+        "  hypothesis_template_sentiment = \"is {} sentiment\"\n",
+        "  hypothesis_template_sentiment = sentiment_discrimination_prompt + hypothesis_template_sentiment\n",
+        "\n",
+        "  sentiment_classes = [\"positive\" ,\"neutral\", \"negative\"]\n",
+        "  sentiment_output= zeroshot_classifier(text, sentiment_classes , hypothesis_template=hypothesis_template_sentiment, multi_label=False)\n",
+        "\n",
+        "  hypothesis_template_discrimination = \"is {}\"\n",
+        "  hypothesis_template_discrimination = sentiment_discrimination_prompt + hypothesis_template_discrimination\n",
+        "\n",
+        "  discrimination_classes = [\"hateful\" , \"not hateful\"]\n",
+        "\n",
+        "  discrimination_output= zeroshot_classifier(text, discrimination_classes , hypothesis_template=hypothesis_template_discrimination, multi_label=False)\n",
+        "\n",
+        "  domain_label , domain_score = domains_output[\"labels\"][0] , domains_output[\"scores\"][0]\n",
+        "  sentiment_label , sentiment_score = sentiment_output[\"labels\"][0] , sentiment_output[\"scores\"][0]\n",
+        "  discrimination_label , discrimination_score = discrimination_output[\"labels\"][0] , discrimination_output[\"scores\"][0]\n",
+        "\n",
+        "  return {\"domain_label\" : domain_label,\n",
+        "          \"domain_score\" : domain_score,\n",
+        "          \"sentiment_label\" : sentiment_label,\n",
+        "          \"sentiment_score\" : sentiment_score,\n",
+        "          \"discrimination_label\" : discrimination_label,\n",
+        "          \"discrimination_score\": discrimination_score}\n",
+        "\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "G9EL047MfDDY"
+      },
+      "execution_count": 29,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "jmzyvmLQgASa"
+      },
+      "execution_count": 29,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#the model\n",
+        "from typing import List, Optional, Tuple\n",
+        "\n",
+        "import torch\n",
+        "from torch import Tensor\n",
+        "from torch import nn\n",
+        "from transformers import RobertaModel\n",
+        "\n",
+        "from faknow.model.layers.layer import TextCNNLayer\n",
+        "from faknow.model.model import AbstractModel\n",
+        "from faknow.data.process.text_process import TokenizerFromPreTrained\n",
+        "import pandas as pd\n",
+        "import gdown\n",
+        "import os\n",
+        "\n",
+        "class _MLP(nn.Module):\n",
+        "    def __init__(self,\n",
+        "                 input_dim: int,\n",
+        "                 embed_dims: List[int],\n",
+        "                 dropout_rate: float,\n",
+        "                 output_layer=True):\n",
+        "        super().__init__()\n",
+        "        layers = list()\n",
+        "        for embed_dim in embed_dims:\n",
+        "            layers.append(nn.Linear(input_dim, embed_dim))\n",
+        "            layers.append(nn.BatchNorm1d(embed_dim))\n",
+        "            layers.append(nn.ReLU())\n",
+        "            layers.append(nn.Dropout(p=dropout_rate))\n",
+        "            input_dim = embed_dim\n",
+        "        if output_layer:\n",
+        "            layers.append(torch.nn.Linear(input_dim, 1))\n",
+        "        self.mlp = torch.nn.Sequential(*layers)\n",
+        "\n",
+        "    def forward(self, x: Tensor) -> Tensor:\n",
+        "        \"\"\"\n",
+        "\n",
+        "        Args:\n",
+        "            x (Tensor): shared feature from domain and text, shape=(batch_size, embed_dim)\n",
+        "\n",
+        "        \"\"\"\n",
+        "        return self.mlp(x)\n",
+        "\n",
+        "\n",
+        "class _MaskAttentionLayer(torch.nn.Module):\n",
+        "    \"\"\"\n",
+        "    Compute attention layer\n",
+        "    \"\"\"\n",
+        "    def __init__(self, input_size: int):\n",
+        "        super(_MaskAttentionLayer, self).__init__()\n",
+        "        self.attention_layer = torch.nn.Linear(input_size, 1)\n",
+        "\n",
+        "    def forward(self,\n",
+        "                inputs: Tensor,\n",
+        "                mask: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:\n",
+        "        weights = self.attention_layer(inputs).view(-1, inputs.size(1))\n",
+        "        if mask is not None:\n",
+        "            weights = weights.masked_fill(mask == 0, float(\"-inf\"))\n",
+        "        weights = torch.softmax(weights, dim=-1).unsqueeze(1)\n",
+        "        outputs = torch.matmul(weights, inputs).squeeze(1)\n",
+        "        return outputs, weights\n",
+        "\n",
+        "\n",
+        "class MDFEND(AbstractModel):\n",
+        "    r\"\"\"\n",
+        "    MDFEND: Multi-domain Fake News Detection, CIKM 2021\n",
+        "    paper: https://dl.acm.org/doi/10.1145/3459637.3482139\n",
+        "    code: https://github.com/kennqiang/MDFEND-Weibo21\n",
+        "    \"\"\"\n",
+        "    def __init__(self,\n",
+        "                 pre_trained_bert_name: str,\n",
+        "                 domain_num: int,\n",
+        "                 mlp_dims: Optional[List[int]] = None,\n",
+        "                 dropout_rate=0.2,\n",
+        "                 expert_num=5):\n",
+        "        \"\"\"\n",
+        "\n",
+        "        Args:\n",
+        "            pre_trained_bert_name (str): the name or local path of pre-trained bert model\n",
+        "            domain_num (int): total number of all domains\n",
+        "            mlp_dims (List[int]): a list of the dimensions in MLP layer, if None, [384] will be taken as default, default=384\n",
+        "            dropout_rate (float): rate of Dropout layer, default=0.2\n",
+        "            expert_num (int): number of experts also called TextCNNLayer, default=5\n",
+        "        \"\"\"\n",
+        "        super(MDFEND, self).__init__()\n",
+        "        self.domain_num = domain_num\n",
+        "        self.expert_num = expert_num\n",
+        "        self.bert = RobertaModel.from_pretrained(\n",
+        "            pre_trained_bert_name).requires_grad_(False)\n",
+        "        self.embedding_size = self.bert.config.hidden_size\n",
+        "        self.loss_func = nn.BCELoss()\n",
+        "        if mlp_dims is None:\n",
+        "            mlp_dims = [384]\n",
+        "\n",
+        "        filter_num = 64\n",
+        "        filter_sizes = [1, 2, 3, 5, 10]\n",
+        "        experts = [\n",
+        "            TextCNNLayer(self.embedding_size, filter_num, filter_sizes)\n",
+        "            for _ in range(self.expert_num)\n",
+        "        ]\n",
+        "        self.experts = nn.ModuleList(experts)\n",
+        "\n",
+        "        self.gate = nn.Sequential(\n",
+        "            nn.Linear(self.embedding_size * 2, mlp_dims[-1]), nn.ReLU(),\n",
+        "            nn.Linear(mlp_dims[-1], self.expert_num), nn.Softmax(dim=1))\n",
+        "\n",
+        "        self.attention = _MaskAttentionLayer(self.embedding_size)\n",
+        "\n",
+        "        self.domain_embedder = nn.Embedding(num_embeddings=self.domain_num,\n",
+        "                                            embedding_dim=self.embedding_size)\n",
+        "        self.classifier = _MLP(320, mlp_dims, dropout_rate)\n",
+        "\n",
+        "    def forward(self, token_id: Tensor, mask: Tensor,\n",
+        "                domain: Tensor) -> Tensor:\n",
+        "        \"\"\"\n",
+        "\n",
+        "        Args:\n",
+        "            token_id (Tensor): token ids from bert tokenizer, shape=(batch_size, max_len)\n",
+        "            mask (Tensor): mask from bert tokenizer, shape=(batch_size, max_len)\n",
+        "            domain (Tensor): domain id, shape=(batch_size,)\n",
+        "\n",
+        "        Returns:\n",
+        "            FloatTensor: the prediction of being fake, shape=(batch_size,)\n",
+        "        \"\"\"\n",
+        "        text_embedding = self.bert(token_id,\n",
+        "                                   attention_mask=mask).last_hidden_state\n",
+        "        attention_feature, _ = self.attention(text_embedding, mask)\n",
+        "\n",
+        "        domain_embedding = self.domain_embedder(domain.view(-1, 1)).squeeze(1)\n",
+        "\n",
+        "        gate_input = torch.cat([domain_embedding, attention_feature], dim=-1)\n",
+        "        gate_output = self.gate(gate_input)\n",
+        "\n",
+        "        shared_feature = 0\n",
+        "        for i in range(self.expert_num):\n",
+        "            expert_feature = self.experts[i](text_embedding)\n",
+        "            shared_feature += (expert_feature * gate_output[:, i].unsqueeze(1))\n",
+        "\n",
+        "        label_pred = self.classifier(shared_feature)\n",
+        "\n",
+        "        return torch.sigmoid(label_pred.squeeze(1))\n",
+        "\n",
+        "    def calculate_loss(self, data) -> Tensor:\n",
+        "        \"\"\"\n",
+        "        calculate loss via BCELoss\n",
+        "\n",
+        "        Args:\n",
+        "            data (dict): batch data dict\n",
+        "\n",
+        "        Returns:\n",
+        "            loss (Tensor): loss value\n",
+        "        \"\"\"\n",
+        "\n",
+        "        token_ids = data['text']['token_id']\n",
+        "        masks = data['text']['mask']\n",
+        "        domains = data['domain']\n",
+        "        labels = data['label']\n",
+        "        output = self.forward(token_ids, masks, domains)\n",
+        "        return self.loss_func(output, labels.float())\n",
+        "\n",
+        "    def predict(self, data_without_label) -> Tensor:\n",
+        "        \"\"\"\n",
+        "        predict the probability of being fake news\n",
+        "\n",
+        "        Args:\n",
+        "            data_without_label (Dict[str, Any]): batch data dict\n",
+        "\n",
+        "        Returns:\n",
+        "            Tensor: one-hot probability, shape=(batch_size, 2)\n",
+        "        \"\"\"\n",
+        "\n",
+        "        token_ids = data_without_label['text']['token_id']\n",
+        "        masks = data_without_label['text']['mask']\n",
+        "        domains = data_without_label['domain']\n",
+        "\n",
+        "        # shape=(n,), data = 1 or 0\n",
+        "        round_pred = torch.round(self.forward(token_ids, masks,\n",
+        "                                              domains)).long()\n",
+        "        # after one hot: shape=(n,2), data = [0,1] or [1,0]\n",
+        "        one_hot_pred = torch.nn.functional.one_hot(round_pred, num_classes=2)\n",
+        "        return one_hot_pred\n",
+        "\n",
+        "\n",
+        "def download_from_gdrive(file_id, output_path):\n",
+        "        output = os.path.join(output_path)\n",
+        "\n",
+        "        # Check if the file already exists\n",
+        "        if not os.path.exists(output):\n",
+        "            gdown.download(id=file_id, output=output, quiet=False)\n",
+        "\n",
+        "\n",
+        "        return output\n",
+        "\n",
+        "\n",
+        "\n",
+        "def loading_model_and_tokenizer():\n",
+        "    max_len, bert = 160, 'FacebookAI/xlm-roberta-base'\n",
+        "    #https://drive.google.com/file/d/1--6GB3Ff81sILwtuvVTuAW3shGW_5VWC/view\n",
+        "\n",
+        "    file_id = \"1--6GB3Ff81sILwtuvVTuAW3shGW_5VWC\"\n",
+        "\n",
+        "    model_path = '/content/drive/MyDrive/models/last-epoch-model-2024-03-17-01_00_32_1.pth'\n",
+        "\n",
+        "    MODEL_SAVE_PATH = download_from_gdrive(file_id, model_path)\n",
+        "    domain_num = 4\n",
+        "\n",
+        "\n",
+        "\n",
+        "    tokenizer = TokenizerFromPreTrained(max_len, bert)\n",
+        "\n",
+        "    model = MDFEND(bert, domain_num , expert_num=12 , mlp_dims = [3010, 2024 ,1012 ,606 , 400])\n",
+        "\n",
+        "    model.load_state_dict(torch.load(f=MODEL_SAVE_PATH , map_location=torch.device('cpu')))\n",
+        "\n",
+        "    model.requires_grad_(False)\n",
+        "\n",
+        "    return tokenizer , model"
+      ],
+      "metadata": {
+        "id": "A4zYbG-AmxQd"
+      },
+      "execution_count": 51,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "import torch\n",
+        "def preparing_data(text:str , domain: int):\n",
+        "    \"\"\"\n",
+        "\n",
+        "\n",
+        "\n",
+        "    Args:\n",
+        "        text (_str_): input text from the user\n",
+        "        domain (_int_): output domain from domain identification pipeline\n",
+        "\n",
+        "    Returns:\n",
+        "        _DataFrame_: dataframe contains texts and domain\n",
+        "    \"\"\"\n",
+        "    # Let's assume you have the following dictionary\n",
+        "    # the model can't do inference with only one example so this dummy example must be put\n",
+        "    dict_data = {\n",
+        "        'text': ['hello world' ] ,\n",
+        "        'domain': [0] ,\n",
+        "    }\n",
+        "\n",
+        "    dict_data[\"text\"].append(text)\n",
+        "    dict_data[\"domain\"].append(domain)\n",
+        "    # Convert the dictionary to a DataFrame\n",
+        "    df = pd.DataFrame(dict_data)\n",
+        "\n",
+        "    # return the dataframe\n",
+        "    return df\n",
+        "\n",
+        "\n",
+        "def loading_data(tokenizer , df: pd.DataFrame ):\n",
+        "    ids = []\n",
+        "    masks = []\n",
+        "    domain_list = []\n",
+        "\n",
+        "    texts = df[\"text\"]\n",
+        "    domains= df[\"domain\"]\n",
+        "\n",
+        "\n",
+        "    for i in range(len(df)):\n",
+        "        text = texts[i]\n",
+        "        token = tokenizer(text)\n",
+        "        ids.append(token[\"token_id\"])\n",
+        "        masks.append(token[\"mask\"])\n",
+        "        domain_list.append(domains[i])\n",
+        "\n",
+        "        input_ids = torch.cat(ids , dim=0)\n",
+        "        input_masks = torch.cat(masks ,dim = 0)\n",
+        "        input_domains = torch.tensor(domain_list)\n",
+        "\n",
+        "\n",
+        "    return input_ids , input_masks , input_domains"
+      ],
+      "metadata": {
+        "id": "63oO220bidnk"
+      },
+      "execution_count": 31,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch\n",
+        "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+        "\n",
+        "def language_identification(texts):\n",
+        "  text = [\n",
+        "      texts,\n",
+        "\n",
+        "  ]\n",
+        "\n",
+        "  model_ckpt = \"papluca/xlm-roberta-base-language-detection\"\n",
+        "  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)\n",
+        "  model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)\n",
+        "\n",
+        "  inputs = tokenizer(text, padding=True, truncation=True, return_tensors=\"pt\")\n",
+        "\n",
+        "  with torch.no_grad():\n",
+        "      logits = model(**inputs).logits\n",
+        "\n",
+        "  preds = torch.softmax(logits, dim=-1)\n",
+        "\n",
+        "  # Map raw predictions to languages\n",
+        "  id2lang = model.config.id2label\n",
+        "  vals, idxs = torch.max(preds, dim=1)\n",
+        "  lang_dict = {id2lang[k.item()]: v.item() for k, v in zip(idxs, vals)}\n",
+        "  return lang_dict"
+      ],
+      "metadata": {
+        "id": "mBrwFI_wPxtU"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yuFVY6cZidqI",
+        "outputId": "766ef226-ad9a-444c-eff8-d02923ff1b7d"
+      },
+      "execution_count": 33,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "def run_pipeline(input_text:str):\n",
+        "\n",
+        "    language_dict = language_identification(input_text)\n",
+        "    language_code = next(iter(language_dict))\n",
+        "\n",
+        "    if language_code == \"en\":\n",
+        "\n",
+        "      output_english = english_information_extraction(input_text)\n",
+        "\n",
+        "      return output_english\n",
+        "\n",
+        "    else:\n",
+        "\n",
+        "\n",
+        "        num_results = 1\n",
+        "        path = \"/content/drive/MyDrive/general_domains/vector_database\"\n",
+        "        collection_name = \"general_domains\"\n",
+        "\n",
+        "\n",
+        "        collection = get_collection_from_vector_db(path , collection_name)\n",
+        "\n",
+        "        domain , label_domain , distance  = retrieval(input_text  , num_results , collection )\n",
+        "\n",
+        "        if distance >1.45:\n",
+        "          domain = \"undetermined\"\n",
+        "\n",
+        "        tokenizer , model = loading_model_and_tokenizer()\n",
+        "\n",
+        "        df = preparing_data(input_text , label_domain)\n",
+        "\n",
+        "        input_ids , input_masks , input_domains = loading_data(tokenizer , df )\n",
+        "\n",
+        "        labels = []\n",
+        "        outputs = []\n",
+        "        with torch.no_grad():\n",
+        "\n",
+        "            pred = model.forward(input_ids, input_masks , input_domains)\n",
+        "            labels.append([])\n",
+        "\n",
+        "            for output in pred:\n",
+        "                number = output.item()\n",
+        "                label = int(1) if number >= 0.5 else int(0)\n",
+        "                labels[-1].append(label)\n",
+        "            outputs.append(pred)\n",
+        "\n",
+        "        discrimination_class = [\"discriminative\" if i == int(1) else \"not discriminative\" for i in labels[0]]\n",
+        "\n",
+        "\n",
+        "        return { \"domain_label\" :domain ,\n",
+        "                \"domain_score\":distance ,\n",
+        "                 \"discrimination_label\" : discrimination_class[-1],\n",
+        "                 \"discrimination_score\" : outputs[0][1:].item(),\n",
+        "        }\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "HlBJF4NQgAVy"
+      },
+      "execution_count": 34,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "input_text_1 = input(\"input text:\")\n",
+        "\n",
+        "output_1 = run_pipeline( input_text_1)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1BVBXyRDnDg4",
+        "outputId": "de9a4f3e-4ad4-4d03-8d51-b3df05daa685"
+      },
+      "execution_count": 35,
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "input text:muslims loves their prophet muhammed\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "output_1"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TnnB40tEnIHI",
+        "outputId": "752bc3bd-93ac-46be-9d1a-308c6fc267ed"
+      },
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'domain_label': 'muslims',\n",
+              " 'domain_score': 0.9989225268363953,\n",
+              " 'sentiment_label': 'positive',\n",
+              " 'sentiment_score': 0.9239600300788879,\n",
+              " 'discrimination_label': 'not hateful',\n",
+              " 'discrimination_score': 0.9917498826980591}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 36
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "input_text_2 = input(\"input text:\")\n",
+        "\n",
+        "output_2 = run_pipeline( input_text_2)"
+      ],
+      "metadata": {
+        "id": "LBAvmrE1QxM3",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "45056e2c-701c-40c0-9a04-36710cc1bdbd"
+      },
+      "execution_count": 54,
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "input text:මුස්ලිම්වරු ඔවුන්ගේ අනාගතවක්තෘ මුහම්මද්ට ආදරෙයි\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Downloading...\n",
+            "From (original): https://drive.google.com/uc?id=1--6GB3Ff81sILwtuvVTuAW3shGW_5VWC\n",
+            "From (redirected): https://drive.google.com/uc?id=1--6GB3Ff81sILwtuvVTuAW3shGW_5VWC&confirm=t&uuid=4bc00ac8-29e3-458b-a64d-c0f583a18df7\n",
+            "To: /content/drive/MyDrive/models/last-epoch-model-2024-03-17-01_00_32_1.pth\n",
+            "100%|██████████| 1.20G/1.20G [00:17<00:00, 69.1MB/s]\n",
+            "You are using a model of type xlm-roberta to instantiate a model of type roberta. This is not supported for all configurations of models and can yield errors.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "output_2"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ienC5lZvYjcu",
+        "outputId": "25eb47ee-f219-4ce0-915b-5fd3acb54414"
+      },
+      "execution_count": 55,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'domain_label': 'muslims',\n",
+              " 'domain_score': 0.9477148933517974,\n",
+              " 'discrimination_label': 'not discriminative',\n",
+              " 'discrimination_score': 0.016480498015880585}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 55
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "input_text_3 = input(\"input text:\")\n",
+        "\n",
+        "output_3 = run_pipeline( input_text_3)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kCe3FS5lYyQ7",
+        "outputId": "5ec7d2fd-3aa9-4e35-b4bf-2d1db4777aba"
+      },
+      "execution_count": 56,
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "input text:முஸ்லீம்கள் தங்கள் தீர்க்கதரிசியை நேசிக்கிறார்கள்\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "You are using a model of type xlm-roberta to instantiate a model of type roberta. This is not supported for all configurations of models and can yield errors.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "output_3"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4gCBAROLaDNK",
+        "outputId": "dd50be33-030c-4ea4-d2ca-5cd513eb3f0b"
+      },
+      "execution_count": 57,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'domain_label': 'muslims',\n",
+              " 'domain_score': 0.9295339941122466,\n",
+              " 'discrimination_label': 'not discriminative',\n",
+              " 'discrimination_score': 0.011930261738598347}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 57
+        }
+      ]
+    }
+  ]
+}