{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import re\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "from presidio_anonymizer.entities import (RecognizerResult,\n",
    "    OperatorResult,\n",
    "    OperatorConfig)\n",
    "from privacy.service.service import PrivacyService"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"John Doe's Social Security number is 123-45-6789 and his email is johndoe@example.com.\"\n",
    "\n",
    "# Define regular expressions for different types of PII\n",
    "ssn_pattern = r\"\\d{3}-\\d{2}-\\d{4}\"\n",
    "email_pattern = r\"\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Find matches for Social Security numbers\n",
    "ssn_matches = re.findall(ssn_pattern, text)\n",
    "\n",
    "# Find matches for email addresses\n",
    "email_matches = re.findall(email_pattern, text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Apply differential privacy to the detected PII counts\n",
    "epsilon = 0.1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_noise(value):\n",
    "    scale = 1 / epsilon\n",
    "    laplace_noise = np.random.laplace(loc=0, scale=scale)\n",
    "    print(value)\n",
    "    print(laplace_noise)\n",
    "    return value + laplace_noise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "123-45-6789\n",
      "-6.338424074647873\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "can only concatenate str (not \"float\") to str",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 6\u001b[0m line \u001b[0;36m5\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m ssn_matches \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39mfindall(ssn_pattern, text)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Add differential privacy to the Social Security numbers\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m noisy_ssn_matches \u001b[39m=\u001b[39m [add_noise(ssn) \u001b[39mfor\u001b[39;00m ssn \u001b[39min\u001b[39;00m ssn_matches]\n",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 6\u001b[0m line \u001b[0;36m5\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m ssn_matches \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39mfindall(ssn_pattern, text)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Add differential privacy to the Social Security numbers\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m noisy_ssn_matches \u001b[39m=\u001b[39m [add_noise(ssn) \u001b[39mfor\u001b[39;00m ssn \u001b[39min\u001b[39;00m ssn_matches]\n",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 6\u001b[0m line \u001b[0;36m6\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mprint\u001b[39m(value)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mprint\u001b[39m(laplace_noise)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m \u001b[39mreturn\u001b[39;00m value \u001b[39m+\u001b[39;49m laplace_noise\n",
      "\u001b[1;31mTypeError\u001b[0m: can only concatenate str (not \"float\") to str"
     ]
    }
   ],
   "source": [
    "\n",
    "# Find matches for Social Security numbers\n",
    "ssn_matches = re.findall(ssn_pattern, text)\n",
    "\n",
    "# Add differential privacy to the Social Security numbers\n",
    "noisy_ssn_matches = [add_noise(ssn) for ssn in ssn_matches]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "johndoe@example.com\n",
      "21.357718997606124\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "can only concatenate str (not \"float\") to str",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 7\u001b[0m line \u001b[0;36m5\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m email_matches \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39mfindall(email_pattern, text)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Add differential privacy to the email addresses\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m noisy_email_matches \u001b[39m=\u001b[39m [add_noise(email) \u001b[39mfor\u001b[39;00m email \u001b[39min\u001b[39;00m email_matches]\n",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 7\u001b[0m line \u001b[0;36m5\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m email_matches \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39mfindall(email_pattern, text)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Add differential privacy to the email addresses\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m noisy_email_matches \u001b[39m=\u001b[39m [add_noise(email) \u001b[39mfor\u001b[39;00m email \u001b[39min\u001b[39;00m email_matches]\n",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 7\u001b[0m line \u001b[0;36m6\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mprint\u001b[39m(value)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mprint\u001b[39m(laplace_noise)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#W6sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m \u001b[39mreturn\u001b[39;00m value \u001b[39m+\u001b[39;49m laplace_noise\n",
      "\u001b[1;31mTypeError\u001b[0m: can only concatenate str (not \"float\") to str"
     ]
    }
   ],
   "source": [
    "# Find matches for email addresses\n",
    "email_matches = re.findall(email_pattern, text)\n",
    "\n",
    "# Add differential privacy to the email addresses\n",
    "noisy_email_matches = [add_noise(email) for email in email_matches]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Noisy SSN count: 1.5109753118487679\n",
      "Anonymized SSN count: text: 1.5109753118487679\n",
      "items:\n",
      "[\n",
      "    \n",
      "]\n",
      "\n",
      "Noisy email count: 1.5109753118487679\n",
      "Anonymized email count: text: 1.5109753118487679\n",
      "items:\n",
      "[\n",
      "    \n",
      "]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from presidio_anonymizer import AnonymizerEngine\n",
    "\n",
    "# Initialize the anonymizer engine\n",
    "anonymizer = AnonymizerEngine()\n",
    "\n",
    "# Define the text containing potential PII\n",
    "text = \"John Doe's Social Security number is 123-45-6789 and his email is johndoe@example.com.\"\n",
    "\n",
    "# Apply differential privacy to the PII detection process\n",
    "epsilon = 0.1  # Privacy parameter for differential privacy\n",
    "sensitivity = 1  # Sensitivity of the PII detection result\n",
    "delta = 1e-6  # Privacy parameter for differential privacy\n",
    "\n",
    "# Calculate the noise to be added\n",
    "scale = sensitivity / epsilon\n",
    "laplace_noise = np.random.laplace(loc=0, scale=scale)\n",
    "\n",
    "# Detect PII in the text\n",
    "# Example rule-based matching for SSN and email\n",
    "ssn_pattern = r\"\\d{3}-\\d{2}-\\d{4}\"\n",
    "email_pattern = r\"\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b\"\n",
    "\n",
    "# Apply noise to the PII detection results\n",
    "noisy_ssn_count = len(re.findall(ssn_pattern, text)) + laplace_noise\n",
    "noisy_email_count = len(re.findall(email_pattern, text)) + laplace_noise\n",
    "\n",
    "# Anonymize the PII detection results using Presidio\n",
    "anonymized_ssn_count = anonymizer.anonymize(\n",
    "    str(noisy_ssn_count),\n",
    "    analyzer_results=[],\n",
    "    operators={\"anonymizer_config\": {\"type\": \"replace\", \"value\": \"<COUNT>\"}},\n",
    ")\n",
    "\n",
    "anonymized_email_count = anonymizer.anonymize(\n",
    "    str(noisy_email_count),\n",
    "    analyzer_results=[],\n",
    "    operators={\"anonymizer_config\": {\"type\": \"replace\", \"value\": \"<COUNT>\"}},\n",
    ")\n",
    "\n",
    "# Print the anonymized PII detection results\n",
    "print(\"Noisy SSN count:\", noisy_ssn_count)\n",
    "print(\"Anonymized SSN count:\", anonymized_ssn_count)\n",
    "\n",
    "print(\"Noisy email count:\", noisy_email_count)\n",
    "print(\"Anonymized email count:\", anonymized_email_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text: Px(Z~fm#[n\u001eXndl.kZ^@j{lplsX(`gx\n",
      "items:\n",
      "[\n",
      "    \n",
      "]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from presidio_anonymizer import AnonymizerEngine\n",
    "\n",
    "# Initialize the anonymizer engine\n",
    "anonymizer = AnonymizerEngine()\n",
    "\n",
    "# Define the text containing PII\n",
    "text = \"My email is john.doe@example.com\"\n",
    "\n",
    "# Apply differential privacy to the PII value\n",
    "epsilon = 0.1  # Privacy parameter for differential privacy\n",
    "\n",
    "# Generate Laplace noise for each character in the email\n",
    "laplace_noise = np.random.laplace(loc=0, scale=1/epsilon, size=len(text))\n",
    "\n",
    "# Add the noise to each character in the email\n",
    "noisy_email = ''.join(chr(ord(c) + int(round(n))) for c, n in zip(text, laplace_noise))\n",
    "\n",
    "# Anonymize the noisy email using Presidio\n",
    "anonymized_text = anonymizer.anonymize(\n",
    "    noisy_email,\n",
    "    analyzer_results=[],\n",
    "    operators=\n",
    "        {\"Email\": {\"type\": \"replace\", \"value\": \"<EMAIL>\"}}\n",
    "    ,\n",
    ")\n",
    "\n",
    "# Print the anonymized text\n",
    "print(anonymized_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "========= []\n",
      "========= []\n",
      "John Doe'S Social Security Number Is 123-45-6789 And His Email Is Johndoe@Example.Com.\n",
      "[type: EMAIL_ADDRESS, start: 66, end: 85, score: 1.0, type: PERSON, start: 0, end: 10, score: 0.85, type: URL, start: 74, end: 85, score: 0.5]\n",
      "type: EMAIL_ADDRESS, start: 66, end: 85, score: 1.0\n",
      "type: PERSON, start: 0, end: 10, score: 0.85\n",
      "type: URL, start: 74, end: 85, score: 0.5\n",
      "text: -6.311321244615104 Social Security number is 123-45-6789 and his email is -11.671955800130334.\n",
      "items:\n",
      "[\n",
      "    {'start': 74, 'end': 93, 'entity_type': 'EMAIL_ADDRESS', 'text': '-11.671955800130334', 'operator': 'replace'},\n",
      "    {'start': 0, 'end': 18, 'entity_type': 'PERSON', 'text': '-6.311321244615104', 'operator': 'replace'}\n",
      "]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from presidio_analyzer import AnalyzerEngine, RecognizerRegistry\n",
    "from presidio_anonymizer import AnonymizerEngine\n",
    "\n",
    "# Initialize the anonymizer engine\n",
    "anonymizer = AnonymizerEngine()\n",
    "\n",
    "# Define the text containing PII\n",
    "text = \"John Doe's Social Security number is 123-45-6789 and his email is johndoe@example.com.\"\n",
    "\n",
    "# Apply differential privacy to the PII value\n",
    "epsilon = 0.1  # Privacy parameter for differential privacy\n",
    "sensitivity = 2  # Sensitivity of the PII value\n",
    "delta = 1e-6  # Privacy parameter for differential privacy\n",
    "\n",
    "# Calculate the noise to be added\n",
    "def run():\n",
    "    scale = sensitivity / epsilon\n",
    "    laplace_noise = np.random.laplace(loc=0, scale=scale)\n",
    "\n",
    "# Add the noise to the PII value\n",
    "    noisy_value =laplace_noise\n",
    "    return noisy_value\n",
    "# results = PrivacyService.__analyze(text=text)\n",
    "# Anonymize the noisy value using Presidio\n",
    "\n",
    "registry = RecognizerRegistry()\n",
    "analyzer = AnalyzerEngine(registry=registry)\n",
    "registry.load_predefined_recognizers()\n",
    "\n",
    "results = analyzer.analyze(text=text, language=\"en\")\n",
    "        \n",
    "print(results)\n",
    "op={}\n",
    "for i in results:\n",
    "    print(i)\n",
    "    op[i.entity_type]=OperatorConfig(\"replace\", {\"new_value\": str(run())})\n",
    "anonymized_text = anonymizer.anonymize(\n",
    "    text,\n",
    "    analyzer_results=results,\n",
    "    operators=op\n",
    "              \n",
    "\n",
    "    ,\n",
    ")\n",
    "\n",
    "# Print the anonymized text\n",
    "print(anonymized_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            Name  Age  s                      Email\n",
      "0       John Doe   25  1       john.doe@example.com\n",
      "1     Jane Smith   30  2     jane.smith@example.com\n",
      "2  Alice Johnson   50  3  alice.johnson@example.com\n",
      "['20-30', '30-40', '40-50']\n",
      "[0, 30, 40, 50]\n",
      "            Name  Age  s   Age1\n",
      "0       John Doe   25  1  20-30\n",
      "1     Jane Smith   30  2  20-30\n",
      "2  Alice Johnson   50  3  40-50\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "# Sample dataset with PII\n",
    "data = pd.DataFrame({\n",
    "    'Name': ['John Doe', 'Jane Smith', 'Alice Johnson'],\n",
    "    'Age': [25, 30, 50],\n",
    "    \"s\":[1,2,3],\n",
    "    'Email': ['john.doe@example.com', 'jane.smith@example.com', 'alice.johnson@example.com']\n",
    "})\n",
    "print(data)\n",
    "\n",
    "# Generalization\n",
    "# Generalize age into age ranges\n",
    "data['Age1'] = pd.cut(data['Age'], bins=[0, 30, 40,50], labels=['20-30', '30-40','40-50'])\n",
    "\n",
    "# Suppression\n",
    "# Suppress or remove email column\n",
    "data = data.drop('Email', axis=1)\n",
    "\n",
    "# Perturbation\n",
    "# Perturb age values by adding Laplace noise\n",
    "epsilon = 1.0  # Privacy parameter for differential privacy\n",
    "sensitivity = 1  # Sensitivity of the age values\n",
    "scale = sensitivity / epsilon\n",
    "laplace_noise = np.random.laplace(loc=0, scale=scale, size=len(data))\n",
    "# data['Age','s'] += laplace_noise\n",
    "\n",
    "# print(data['Age'])\n",
    "\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "========= []\n",
      "0         John Doe\n",
      "1       Jane Smith\n",
      "2    Alice Johnson\n",
      "Name: Name, dtype: object\n",
      "========= []\n",
      "0         John Doe\n",
      "1       Jane Smith\n",
      "2    Alice Johnson\n",
      "Name: Name, Dtype: Object\n",
      "[type: PERSON, start: 10, end: 18, score: 0.85, type: PERSON, start: 27, end: 37, score: 0.85, type: PERSON, start: 43, end: 56, score: 0.85]\n"
     ]
    }
   ],
   "source": [
    "from presidio_analyzer import AnalyzerEngine, RecognizerRegistry\n",
    "\n",
    "registry = RecognizerRegistry()\n",
    "analyzer = AnalyzerEngine(registry=registry)\n",
    "registry.load_predefined_recognizers()\n",
    "\n",
    "print(str(data[\"Name\"]))\n",
    "results = analyzer.analyze(text=str(data[\"Name\"]), language=\"en\")\n",
    "print(results)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0         John Doe\n",
      "1       Jane Smith\n",
      "2    Alice Johnson\n",
      "0    0         John Doe\\n1       Jane Smith\\n2    A...\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "s=data[\"Name\"].to_string()\n",
    "print(s)\n",
    "p=pd.Series(s)\n",
    "print(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "========= []\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "[E1041] Expected a string, Doc, or bytes as input, but got: <class 'list'>",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 18\u001b[0m line \u001b[0;36m1\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m anonymizer \u001b[39m=\u001b[39m AnonymizerEngine()\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m dataset \u001b[39m=\u001b[39m [\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m     {\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mJohn Doe\u001b[39m\u001b[39m'\u001b[39m\u001b[39ms email is john.doe@example.com and his phone number is 555-123-4567.\u001b[39m\u001b[39m\"\u001b[39m},\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m     {\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mAlice Smith\u001b[39m\u001b[39m'\u001b[39m\u001b[39ms social security number is 123-45-6789.\u001b[39m\u001b[39m\"\u001b[39m},\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m ]\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m analyzed_dataset \u001b[39m=\u001b[39m analyzer\u001b[39m.\u001b[39;49manalyze(dataset,language\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39men\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m masked_dataset \u001b[39m=\u001b[39m anonymizer\u001b[39m.\u001b[39manonymize(analyzed_dataset, dataset)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X23sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m masked_dataset:\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\presidio_analyzer\\analyzer_engine.py:189\u001b[0m, in \u001b[0;36mAnalyzerEngine.analyze\u001b[1;34m(self, text, language, entities, correlation_id, score_threshold, return_decision_process, ad_hoc_recognizers, context, allow_list, nlp_artifacts)\u001b[0m\n\u001b[0;32m    186\u001b[0m \u001b[39m# run the nlp pipeline over the given text, store the results in\u001b[39;00m\n\u001b[0;32m    187\u001b[0m \u001b[39m# a NlpArtifacts instance\u001b[39;00m\n\u001b[0;32m    188\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m nlp_artifacts:\n\u001b[1;32m--> 189\u001b[0m     nlp_artifacts \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnlp_engine\u001b[39m.\u001b[39;49mprocess_text(text, language)\n\u001b[0;32m    191\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlog_decision_process:\n\u001b[0;32m    192\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapp_tracer\u001b[39m.\u001b[39mtrace(\n\u001b[0;32m    193\u001b[0m         correlation_id, \u001b[39m\"\u001b[39m\u001b[39mnlp artifacts:\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m+\u001b[39m nlp_artifacts\u001b[39m.\u001b[39mto_json()\n\u001b[0;32m    194\u001b[0m     )\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\presidio_analyzer\\nlp_engine\\client_nlp_engine.py:57\u001b[0m, in \u001b[0;36mClientNlpEngine.process_text\u001b[1;34m(self, text, language)\u001b[0m\n\u001b[0;32m     54\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mprocess_text\u001b[39m(\u001b[39mself\u001b[39m, text: \u001b[39mstr\u001b[39m, language: \u001b[39mstr\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m NlpArtifacts:\n\u001b[0;32m     55\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Execute the SpaCy NLP pipeline on the given text and language.\"\"\"\u001b[39;00m\n\u001b[1;32m---> 57\u001b[0m     doc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnlp[language](text)\n\u001b[0;32m     58\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_doc_to_nlp_artifact(doc, language)\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\spacy\\language.py:1007\u001b[0m, in \u001b[0;36mLanguage.__call__\u001b[1;34m(self, text, disable, component_cfg)\u001b[0m\n\u001b[0;32m    986\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\n\u001b[0;32m    987\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[0;32m    988\u001b[0m     text: Union[\u001b[39mstr\u001b[39m, Doc],\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    991\u001b[0m     component_cfg: Optional[Dict[\u001b[39mstr\u001b[39m, Dict[\u001b[39mstr\u001b[39m, Any]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m    992\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Doc:\n\u001b[0;32m    993\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Apply the pipeline to some text. The text can span multiple sentences,\u001b[39;00m\n\u001b[0;32m    994\u001b[0m \u001b[39m    and can contain arbitrary whitespace. Alignment into the original string\u001b[39;00m\n\u001b[0;32m    995\u001b[0m \u001b[39m    is preserved.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1005\u001b[0m \u001b[39m    DOCS: https://spacy.io/api/language#call\u001b[39;00m\n\u001b[0;32m   1006\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 1007\u001b[0m     doc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_ensure_doc(text)\n\u001b[0;32m   1008\u001b[0m     \u001b[39mif\u001b[39;00m component_cfg \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m   1009\u001b[0m         component_cfg \u001b[39m=\u001b[39m {}\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\spacy\\language.py:1101\u001b[0m, in \u001b[0;36mLanguage._ensure_doc\u001b[1;34m(self, doc_like)\u001b[0m\n\u001b[0;32m   1099\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(doc_like, \u001b[39mbytes\u001b[39m):\n\u001b[0;32m   1100\u001b[0m     \u001b[39mreturn\u001b[39;00m Doc(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvocab)\u001b[39m.\u001b[39mfrom_bytes(doc_like)\n\u001b[1;32m-> 1101\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(Errors\u001b[39m.\u001b[39mE1041\u001b[39m.\u001b[39mformat(\u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m(doc_like)))\n",
      "\u001b[1;31mValueError\u001b[0m: [E1041] Expected a string, Doc, or bytes as input, but got: <class 'list'>"
     ]
    }
   ],
   "source": [
    "from presidio_analyzer import AnalyzerEngine\n",
    "from presidio_anonymizer import AnonymizerEngine\n",
    "\n",
    "analyzer = AnalyzerEngine()\n",
    "anonymizer = AnonymizerEngine()\n",
    "dataset = [\n",
    "    {\"text\": \"John Doe's email is john.doe@example.com and his phone number is 555-123-4567.\"},\n",
    "    {\"text\": \"Alice Smith's social security number is 123-45-6789.\"},\n",
    "]\n",
    "analyzed_dataset = analyzer.analyze(dataset,language='en')\n",
    "masked_dataset = anonymizer.anonymize(analyzed_dataset, dataset)\n",
    "for item in masked_dataset:\n",
    "    print(item[\"text\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "off\n",
    "hashyfy\n",
    "diffrential_pryivacy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x\n",
      "x\n"
     ]
    }
   ],
   "source": [
    "class A:\n",
    "    def x():\n",
    "        print(\"x\")\n",
    "        return \"x\"\n",
    "\n",
    "    def y():\n",
    "        return \"y\"\n",
    "\n",
    "# def fun(s):\n",
    "    \n",
    "#     print(s())\n",
    "    \n",
    "# fun(\"x\")\n",
    "s=getattr(A,\"x\")\n",
    "# s=globals()[\"x\"]\n",
    "print(s())\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://infyartifactory.ad.infosys.com/artifactory/api/pypi/pypi-remote/simple, https://infyartifactory.ad.infosys.com/artifactory/api/pypi/pypi-remote/simple"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip is available: 23.0.1 -> 23.3.1\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Collecting diffprivlib\n",
      "  Downloading https://infyartifactory.ad.infosys.com/artifactory/api/pypi/pypi-remote/packages/packages/a9/10/200015b77240c50f6f438e2b9e54a7179fdbf56f6ca9f40a11d90fd2c8f9/diffprivlib-0.6.3-py3-none-any.whl (176 kB)\n",
      "     ---------------------------------------- 0.0/176.0 kB ? eta -:--:--\n",
      "     -------------------------------------  174.1/176.0 kB 5.1 MB/s eta 0:00:01\n",
      "     -------------------------------------  174.1/176.0 kB 5.1 MB/s eta 0:00:01\n",
      "     -------------------------------------- 176.0/176.0 kB 1.8 MB/s eta 0:00:00\n",
      "Requirement already satisfied: scikit-learn>=0.24.2 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from diffprivlib) (1.3.2)\n",
      "Requirement already satisfied: joblib>=0.16.0 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from diffprivlib) (1.3.2)\n",
      "Requirement already satisfied: numpy>=1.21.6 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from diffprivlib) (1.26.2)\n",
      "Requirement already satisfied: scipy>=1.7.3 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from diffprivlib) (1.11.4)\n",
      "Requirement already satisfied: setuptools>=49.0.0 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from diffprivlib) (65.5.0)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\work\\git\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages (from scikit-learn>=0.24.2->diffprivlib) (3.2.0)\n",
      "Installing collected packages: diffprivlib\n",
      "Successfully installed diffprivlib-0.6.3\n"
     ]
    }
   ],
   "source": [
    "!pip install diffprivlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from diffprivlib.mechanisms import binary\n",
    "import pandas as pd\n",
    "df=pd.read_csv(r\"C:\\WORK\\GIT\\responsible-ai-admin\\responsible-ai-admin\\src\\rai_admin\\temp\\emplist.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Employee_ID Gender  Age  Education_Level Relationship_Status     Hometown  \\\n",
      "0   EID_22713      F   32                5              Single  Springfield   \n",
      "1    EID_9658      M   65                2              Single      Lebanon   \n",
      "2   EID_22203      M   52                3             Married  Springfield   \n",
      "3    EID_7652      M   50                5              Single   Washington   \n",
      "4    EID_6516      F   44                3             Married     Franklin   \n",
      "5   EID_20283      F   22                4             Married     Franklin   \n",
      "6   EID_21014      M   42                3             Married   Washington   \n",
      "7    EID_7693      F   41                2             Married  Springfield   \n",
      "8   EID_13232      M   31                1              Single  Springfield   \n",
      "\n",
      "         Unit Decision_skill_possess  Time_of_service  Time_since_promotion  \\\n",
      "0         R&D             Conceptual                7                     4   \n",
      "1          IT              Directive               41                     2   \n",
      "2       Sales              Directive               21                     3   \n",
      "3   Marketing             Analytical               11                     4   \n",
      "4         R&D             Conceptual               12                     4   \n",
      "5          IT             Behavioral                3                     1   \n",
      "6  Purchasing             Analytical                6                     4   \n",
      "7       Sales             Conceptual                4                     4   \n",
      "8          IT             Analytical                7                     3   \n",
      "\n",
      "   growth_rate  Travel_Rate  Post_Level  Pay_Scale Compensation_and_Benefits  \\\n",
      "0           30            1           5          4                     type2   \n",
      "1           72            1           1          1                     type2   \n",
      "2           25            0           1          8                     type3   \n",
      "3           28            1           1          2                     type0   \n",
      "4           47            1           3          2                     type2   \n",
      "5           53            0           3          6                     type2   \n",
      "6           35            1           3          4                     type2   \n",
      "7           35            1           4          8                     type2   \n",
      "8           73            2           3          8                     type2   \n",
      "\n",
      "   Work_Life_balance  \n",
      "0                  1  \n",
      "1                  1  \n",
      "2                  1  \n",
      "3                  4  \n",
      "4                  4  \n",
      "5                  1  \n",
      "6                  1  \n",
      "7                  1  \n",
      "8                  3  \n"
     ]
    }
   ],
   "source": [
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'M'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b=binary.Binary(epsilon=0.1,value0=\"F\",value1=\"M\",random_state=None)\n",
    "b.randomise(\"F\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Gender', 'Relationship_Status']\n"
     ]
    }
   ],
   "source": [
    "binaryList=[]\n",
    "for c in df.columns:\n",
    "    # print(s)\n",
    "    if(len(df[c].unique())==2):\n",
    "        binaryList.append(c)\n",
    "print(binaryList)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "Value to be randomised must be a string",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 26\u001b[0m line \u001b[0;36m2\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X34sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m mechanism \u001b[39m=\u001b[39m binary\u001b[39m.\u001b[39mBinary(epsilon\u001b[39m=\u001b[39m\u001b[39m1.0\u001b[39m,value0\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mF\u001b[39m\u001b[39m\"\u001b[39m,value1\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mM\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X34sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m df[\u001b[39m\"\u001b[39m\u001b[39mGender\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m mechanism\u001b[39m.\u001b[39;49mrandomise(df[\u001b[39m\"\u001b[39;49m\u001b[39mGender\u001b[39;49m\u001b[39m\"\u001b[39;49m])\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\diffprivlib\\mechanisms\\binary.py:110\u001b[0m, in \u001b[0;36mBinary.randomise\u001b[1;34m(self, value)\u001b[0m\n\u001b[0;32m     96\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrandomise\u001b[39m(\u001b[39mself\u001b[39m, value):\n\u001b[0;32m     97\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Randomise `value` with the mechanism.\u001b[39;00m\n\u001b[0;32m     98\u001b[0m \n\u001b[0;32m     99\u001b[0m \u001b[39m    Parameters\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    108\u001b[0m \n\u001b[0;32m    109\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 110\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_check_all(value)\n\u001b[0;32m    112\u001b[0m     indicator \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m \u001b[39mif\u001b[39;00m value \u001b[39m==\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue0 \u001b[39melse\u001b[39;00m \u001b[39m1\u001b[39m\n\u001b[0;32m    114\u001b[0m     unif_rv \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_rng\u001b[39m.\u001b[39mrandom() \u001b[39m*\u001b[39m (np\u001b[39m.\u001b[39mexp(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mepsilon) \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m)\n",
      "File \u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\myenv\\lib\\site-packages\\diffprivlib\\mechanisms\\binary.py:80\u001b[0m, in \u001b[0;36mBinary._check_all\u001b[1;34m(self, value)\u001b[0m\n\u001b[0;32m     77\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_labels(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue0, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue1)\n\u001b[0;32m     79\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(value, \u001b[39mstr\u001b[39m):\n\u001b[1;32m---> 80\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mValue to be randomised must be a string\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m     82\u001b[0m \u001b[39mif\u001b[39;00m value \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m [\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue0, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue1]:\n\u001b[0;32m     83\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mValue to be randomised is not in the domain \u001b[39m\u001b[39m{{\u001b[39;00m\u001b[39m\\\"\u001b[39;00m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue0\u001b[39m}\u001b[39;00m\u001b[39m\\\"\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m\\\"\u001b[39;00m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvalue1\u001b[39m}\u001b[39;00m\u001b[39m\\\"\u001b[39;00m\u001b[39m}}\u001b[39;00m\u001b[39m, \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m     84\u001b[0m                      \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgot \u001b[39m\u001b[39m\\\"\u001b[39;00m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m\\\"\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n",
      "\u001b[1;31mTypeError\u001b[0m: Value to be randomised must be a string"
     ]
    }
   ],
   "source": [
    "mechanism = binary.Binary(epsilon=1.0,value0=\"F\",value1=\"M\")\n",
    "df[\"Gender\"] = mechanism.randomise(df[\"Gender\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['F', 'M'], dtype=object)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"Gender\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "def binaryCheck(df,col):\n",
    "        data=list(df[col].unique())\n",
    "        # print(data)\n",
    "        mechanism = binary.Binary(epsilon=1.0,value0=data[0],value1=data[1])\n",
    "        for d in range(len(df[col])):\n",
    "            temp=df.loc[d,col]\n",
    "            # print(\"==/\",temp)\n",
    "            df.loc[d,col]=mechanism.randomise(temp)\n",
    "            # print(\"=====\",temp,df.loc[d,col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "binaryCheck(df,\"Gender\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Employee_ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education_Level</th>\n",
       "      <th>Relationship_Status</th>\n",
       "      <th>Hometown</th>\n",
       "      <th>Unit</th>\n",
       "      <th>Decision_skill_possess</th>\n",
       "      <th>Time_of_service</th>\n",
       "      <th>Time_since_promotion</th>\n",
       "      <th>growth_rate</th>\n",
       "      <th>Travel_Rate</th>\n",
       "      <th>Post_Level</th>\n",
       "      <th>Pay_Scale</th>\n",
       "      <th>Compensation_and_Benefits</th>\n",
       "      <th>Work_Life_balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EID_22713</td>\n",
       "      <td>F</td>\n",
       "      <td>32</td>\n",
       "      <td>5</td>\n",
       "      <td>Single</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>R&amp;D</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>30</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EID_9658</td>\n",
       "      <td>M</td>\n",
       "      <td>65</td>\n",
       "      <td>2</td>\n",
       "      <td>Single</td>\n",
       "      <td>Lebanon</td>\n",
       "      <td>IT</td>\n",
       "      <td>Directive</td>\n",
       "      <td>41</td>\n",
       "      <td>2</td>\n",
       "      <td>72</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>EID_22203</td>\n",
       "      <td>M</td>\n",
       "      <td>52</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Directive</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>type3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>EID_7652</td>\n",
       "      <td>M</td>\n",
       "      <td>50</td>\n",
       "      <td>5</td>\n",
       "      <td>Single</td>\n",
       "      <td>Washington</td>\n",
       "      <td>Marketing</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>type0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EID_6516</td>\n",
       "      <td>F</td>\n",
       "      <td>44</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Franklin</td>\n",
       "      <td>R&amp;D</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>47</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>type2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>EID_20283</td>\n",
       "      <td>F</td>\n",
       "      <td>22</td>\n",
       "      <td>4</td>\n",
       "      <td>Married</td>\n",
       "      <td>Franklin</td>\n",
       "      <td>IT</td>\n",
       "      <td>Behavioral</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>53</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>EID_21014</td>\n",
       "      <td>M</td>\n",
       "      <td>42</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Washington</td>\n",
       "      <td>Purchasing</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>35</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>EID_7693</td>\n",
       "      <td>F</td>\n",
       "      <td>41</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>35</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>EID_13232</td>\n",
       "      <td>M</td>\n",
       "      <td>31</td>\n",
       "      <td>1</td>\n",
       "      <td>Single</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>IT</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>type2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Employee_ID Gender  Age  Education_Level Relationship_Status     Hometown  \\\n",
       "0   EID_22713      F   32                5              Single  Springfield   \n",
       "1    EID_9658      M   65                2              Single      Lebanon   \n",
       "2   EID_22203      M   52                3             Married  Springfield   \n",
       "3    EID_7652      M   50                5              Single   Washington   \n",
       "4    EID_6516      F   44                3             Married     Franklin   \n",
       "5   EID_20283      F   22                4             Married     Franklin   \n",
       "6   EID_21014      M   42                3             Married   Washington   \n",
       "7    EID_7693      F   41                2             Married  Springfield   \n",
       "8   EID_13232      M   31                1              Single  Springfield   \n",
       "\n",
       "         Unit Decision_skill_possess  Time_of_service  Time_since_promotion  \\\n",
       "0         R&D             Conceptual                7                     4   \n",
       "1          IT              Directive               41                     2   \n",
       "2       Sales              Directive               21                     3   \n",
       "3   Marketing             Analytical               11                     4   \n",
       "4         R&D             Conceptual               12                     4   \n",
       "5          IT             Behavioral                3                     1   \n",
       "6  Purchasing             Analytical                6                     4   \n",
       "7       Sales             Conceptual                4                     4   \n",
       "8          IT             Analytical                7                     3   \n",
       "\n",
       "   growth_rate  Travel_Rate  Post_Level  Pay_Scale Compensation_and_Benefits  \\\n",
       "0           30            1           5          4                     type2   \n",
       "1           72            1           1          1                     type2   \n",
       "2           25            0           1          8                     type3   \n",
       "3           28            1           1          2                     type0   \n",
       "4           47            1           3          2                     type2   \n",
       "5           53            0           3          6                     type2   \n",
       "6           35            1           3          4                     type2   \n",
       "7           35            1           4          8                     type2   \n",
       "8           73            2           3          8                     type2   \n",
       "\n",
       "   Work_Life_balance  \n",
       "0                  1  \n",
       "1                  1  \n",
       "2                  1  \n",
       "3                  4  \n",
       "4                  4  \n",
       "5                  1  \n",
       "6                  1  \n",
       "7                  1  \n",
       "8                  3  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "minv=df.Age.min()\n",
    "maxv=df.Age.max()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "70 20\n"
     ]
    }
   ],
   "source": [
    "import math\n",
    "\n",
    "base=10\n",
    "maxrange=math.ceil(maxv / base) * base\n",
    "minrange=round(minv/base)*base\n",
    "\n",
    "print(maxrange,minrange)\n",
    "diff=maxrange-minrange\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50\n",
      "4\n",
      "['20.0-30.0', '30.0-40.0', '40.0-50.0', '50.0-60.0', '60.0-70']\n",
      "[20.0, 30.0, 40.0, 50.0, 60.0, 70]\n"
     ]
    }
   ],
   "source": [
    "range_magnitude = abs(maxrange - minrange)\n",
    "# print(range_magnitude)\n",
    "# Determine the number of ranges based on the magnitude``\n",
    "num_ranges = max(range_magnitude // 10, 1)  # Assuming a minimum range size of 10\n",
    "\n",
    "# Calculate the interval\n",
    "interval = range_magnitude / num_ranges\n",
    "\n",
    "ranges = []\n",
    "binlist=set()\n",
    "lablelist=[]\n",
    "\n",
    "for i in range(num_ranges):\n",
    "    start = minrange + i * interval\n",
    "    end = minrange + (i + 1) * interval\n",
    "    if(i==num_ranges-1):\n",
    "        # print(i)\n",
    "        end=maxrange\n",
    "    binlist.add(start)\n",
    "    binlist.add(end)\n",
    "    lablelist.append(f\"{start}-{end}\")\n",
    "    # ranges.append((start, end))\n",
    "binlist=sorted(list(binlist))\n",
    "print(lablelist)\n",
    "print(binlist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['20.0-30.0', '30.0-40.0', '40.0-50.0', '50.0-60.0', '60.0-70.0']\n",
      "[20.0, 30.0, 40.0, 50.0, 60.0, 70.0]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Employee_ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education_Level</th>\n",
       "      <th>Relationship_Status</th>\n",
       "      <th>Hometown</th>\n",
       "      <th>Unit</th>\n",
       "      <th>Decision_skill_possess</th>\n",
       "      <th>Time_of_service</th>\n",
       "      <th>Time_since_promotion</th>\n",
       "      <th>growth_rate</th>\n",
       "      <th>Travel_Rate</th>\n",
       "      <th>Post_Level</th>\n",
       "      <th>Pay_Scale</th>\n",
       "      <th>Compensation_and_Benefits</th>\n",
       "      <th>Work_Life_balance</th>\n",
       "      <th>Age1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EID_22713</td>\n",
       "      <td>F</td>\n",
       "      <td>32</td>\n",
       "      <td>5</td>\n",
       "      <td>Single</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>R&amp;D</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>30</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "      <td>30.0-40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EID_9658</td>\n",
       "      <td>M</td>\n",
       "      <td>65</td>\n",
       "      <td>2</td>\n",
       "      <td>Single</td>\n",
       "      <td>Lebanon</td>\n",
       "      <td>IT</td>\n",
       "      <td>Directive</td>\n",
       "      <td>41</td>\n",
       "      <td>2</td>\n",
       "      <td>72</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "      <td>60.0-70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>EID_22203</td>\n",
       "      <td>M</td>\n",
       "      <td>52</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Directive</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>type3</td>\n",
       "      <td>1</td>\n",
       "      <td>50.0-60.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>EID_7652</td>\n",
       "      <td>M</td>\n",
       "      <td>50</td>\n",
       "      <td>5</td>\n",
       "      <td>Single</td>\n",
       "      <td>Washington</td>\n",
       "      <td>Marketing</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>type0</td>\n",
       "      <td>4</td>\n",
       "      <td>40.0-50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>EID_6516</td>\n",
       "      <td>F</td>\n",
       "      <td>44</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Franklin</td>\n",
       "      <td>R&amp;D</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>47</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>type2</td>\n",
       "      <td>4</td>\n",
       "      <td>40.0-50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>EID_20283</td>\n",
       "      <td>F</td>\n",
       "      <td>22</td>\n",
       "      <td>4</td>\n",
       "      <td>Married</td>\n",
       "      <td>Franklin</td>\n",
       "      <td>IT</td>\n",
       "      <td>Behavioral</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>53</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "      <td>20.0-30.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>EID_21014</td>\n",
       "      <td>M</td>\n",
       "      <td>42</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>Washington</td>\n",
       "      <td>Purchasing</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>35</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "      <td>40.0-50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>EID_7693</td>\n",
       "      <td>F</td>\n",
       "      <td>41</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>Sales</td>\n",
       "      <td>Conceptual</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>35</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>type2</td>\n",
       "      <td>1</td>\n",
       "      <td>40.0-50.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>EID_13232</td>\n",
       "      <td>M</td>\n",
       "      <td>31</td>\n",
       "      <td>1</td>\n",
       "      <td>Single</td>\n",
       "      <td>Springfield</td>\n",
       "      <td>IT</td>\n",
       "      <td>Analytical</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>type2</td>\n",
       "      <td>3</td>\n",
       "      <td>30.0-40.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Employee_ID Gender  Age  Education_Level Relationship_Status     Hometown  \\\n",
       "0   EID_22713      F   32                5              Single  Springfield   \n",
       "1    EID_9658      M   65                2              Single      Lebanon   \n",
       "2   EID_22203      M   52                3             Married  Springfield   \n",
       "3    EID_7652      M   50                5              Single   Washington   \n",
       "4    EID_6516      F   44                3             Married     Franklin   \n",
       "5   EID_20283      F   22                4             Married     Franklin   \n",
       "6   EID_21014      M   42                3             Married   Washington   \n",
       "7    EID_7693      F   41                2             Married  Springfield   \n",
       "8   EID_13232      M   31                1              Single  Springfield   \n",
       "\n",
       "         Unit Decision_skill_possess  Time_of_service  Time_since_promotion  \\\n",
       "0         R&D             Conceptual                7                     4   \n",
       "1          IT              Directive               41                     2   \n",
       "2       Sales              Directive               21                     3   \n",
       "3   Marketing             Analytical               11                     4   \n",
       "4         R&D             Conceptual               12                     4   \n",
       "5          IT             Behavioral                3                     1   \n",
       "6  Purchasing             Analytical                6                     4   \n",
       "7       Sales             Conceptual                4                     4   \n",
       "8          IT             Analytical                7                     3   \n",
       "\n",
       "   growth_rate  Travel_Rate  Post_Level  Pay_Scale Compensation_and_Benefits  \\\n",
       "0           30            1           5          4                     type2   \n",
       "1           72            1           1          1                     type2   \n",
       "2           25            0           1          8                     type3   \n",
       "3           28            1           1          2                     type0   \n",
       "4           47            1           3          2                     type2   \n",
       "5           53            0           3          6                     type2   \n",
       "6           35            1           3          4                     type2   \n",
       "7           35            1           4          8                     type2   \n",
       "8           73            2           3          8                     type2   \n",
       "\n",
       "   Work_Life_balance       Age1  \n",
       "0                  1  30.0-40.0  \n",
       "1                  1  60.0-70.0  \n",
       "2                  1  50.0-60.0  \n",
       "3                  4  40.0-50.0  \n",
       "4                  4  40.0-50.0  \n",
       "5                  1  20.0-30.0  \n",
       "6                  1  40.0-50.0  \n",
       "7                  1  40.0-50.0  \n",
       "8                  3  30.0-40.0  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Age1'] = pd.cut(df['Age'], bins=binlist, labels=lablelist)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'numpy.int64' object is not callable",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mc:\\WORK\\GIT\\cpy1\\responsible-ai-privacy\\responsible-ai-privacy\\src\\test.ipynb Cell 35\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/WORK/GIT/cpy1/responsible-ai-privacy/responsible-ai-privacy/src/test.ipynb#X50sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mmax\u001b[39;49m(\u001b[39m1\u001b[39;49m,\u001b[39m2\u001b[39;49m)\n",
      "\u001b[1;31mTypeError\u001b[0m: 'numpy.int64' object is not callable"
     ]
    }
   ],
   "source": [
    "max(1,2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "myenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}