ibnummuhammad commited on
Commit
423984a
1 Parent(s): 90e79c8

Add 'Logistic Regression Model Fitting'

Browse files
Files changed (1) hide show
  1. forecasting_logistic_regression.ipynb +637 -7
forecasting_logistic_regression.ipynb CHANGED
@@ -1550,11 +1550,30 @@
1550
  "metadata": {},
1551
  "outputs": [],
1552
  "source": [
1553
- "cols=['euribor3m', 'job_blue-collar', 'job_housemaid', 'marital_unknown', 'education_illiterate', 'default_no', 'default_unknown', \n",
1554
- " 'contact_cellular', 'contact_telephone', 'month_apr', 'month_aug', 'month_dec', 'month_jul', 'month_jun', 'month_mar', \n",
1555
- " 'month_may', 'month_nov', 'month_oct', \"poutcome_failure\", \"poutcome_success\"] \n",
1556
- "X=os_data_X[cols]\n",
1557
- "y=os_data_y['y']"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1558
  ]
1559
  },
1560
  {
@@ -1609,11 +1628,622 @@
1609
  ],
1610
  "source": [
1611
  "import statsmodels.api as sm\n",
1612
- "logit_model=sm.Logit(y,X.astype(float))\n",
1613
- "result=logit_model.fit()\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
  "print(result.summary2())"
1615
  ]
1616
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1617
  {
1618
  "cell_type": "code",
1619
  "execution_count": null,
 
1550
  "metadata": {},
1551
  "outputs": [],
1552
  "source": [
1553
+ "cols = [\n",
1554
+ " \"euribor3m\",\n",
1555
+ " \"job_blue-collar\",\n",
1556
+ " \"job_housemaid\",\n",
1557
+ " \"marital_unknown\",\n",
1558
+ " \"education_illiterate\",\n",
1559
+ " \"default_no\",\n",
1560
+ " \"default_unknown\",\n",
1561
+ " \"contact_cellular\",\n",
1562
+ " \"contact_telephone\",\n",
1563
+ " \"month_apr\",\n",
1564
+ " \"month_aug\",\n",
1565
+ " \"month_dec\",\n",
1566
+ " \"month_jul\",\n",
1567
+ " \"month_jun\",\n",
1568
+ " \"month_mar\",\n",
1569
+ " \"month_may\",\n",
1570
+ " \"month_nov\",\n",
1571
+ " \"month_oct\",\n",
1572
+ " \"poutcome_failure\",\n",
1573
+ " \"poutcome_success\",\n",
1574
+ "]\n",
1575
+ "X = os_data_X[cols]\n",
1576
+ "y = os_data_y[\"y\"]"
1577
  ]
1578
  },
1579
  {
 
1628
  ],
1629
  "source": [
1630
  "import statsmodels.api as sm\n",
1631
+ "\n",
1632
+ "logit_model = sm.Logit(y, X.astype(float))\n",
1633
+ "result = logit_model.fit()\n",
1634
+ "print(result.summary2())"
1635
+ ]
1636
+ },
1637
+ {
1638
+ "cell_type": "code",
1639
+ "execution_count": 29,
1640
+ "metadata": {},
1641
+ "outputs": [
1642
+ {
1643
+ "name": "stdout",
1644
+ "output_type": "stream",
1645
+ "text": [
1646
+ "Optimization terminated successfully.\n",
1647
+ " Current function value: 0.452566\n",
1648
+ " Iterations 7\n",
1649
+ " Results: Logit\n",
1650
+ "=====================================================================\n",
1651
+ "Model: Logit Method: MLE \n",
1652
+ "Dependent Variable: y Pseudo R-squared: 0.347 \n",
1653
+ "Date: 2024-04-06 21:12 AIC: 46314.9963\n",
1654
+ "No. Observations: 51134 BIC: 46456.4716\n",
1655
+ "Df Model: 15 Log-Likelihood: -23141. \n",
1656
+ "Df Residuals: 51118 LL-Null: -35443. \n",
1657
+ "Converged: 1.0000 LLR p-value: 0.0000 \n",
1658
+ "No. Iterations: 7.0000 Scale: 1.0000 \n",
1659
+ "---------------------------------------------------------------------\n",
1660
+ " Coef. Std.Err. z P>|z| [0.025 0.975]\n",
1661
+ "---------------------------------------------------------------------\n",
1662
+ "euribor3m -0.7705 0.0079 -97.4685 0.0000 -0.7860 -0.7550\n",
1663
+ "job_blue-collar 0.3321 0.0270 12.3210 0.0000 0.2793 0.3850\n",
1664
+ "job_housemaid 0.3507 0.0711 4.9350 0.0000 0.2114 0.4900\n",
1665
+ "marital_unknown 0.6770 0.2173 3.1160 0.0018 0.2512 1.1029\n",
1666
+ "education_illiterate 1.7304 0.3651 4.7396 0.0000 1.0148 2.4459\n",
1667
+ "month_apr 1.4746 0.0351 42.0361 0.0000 1.4059 1.5434\n",
1668
+ "month_aug 2.3491 0.0408 57.6216 0.0000 2.2692 2.4290\n",
1669
+ "month_dec 1.9147 0.1285 14.9013 0.0000 1.6629 2.1665\n",
1670
+ "month_jul 2.7454 0.0414 66.3519 0.0000 2.6643 2.8265\n",
1671
+ "month_jun 2.3108 0.0392 58.9984 0.0000 2.2340 2.3875\n",
1672
+ "month_mar 2.5770 0.0775 33.2690 0.0000 2.4252 2.7288\n",
1673
+ "month_may 1.1476 0.0282 40.7396 0.0000 1.0924 1.2028\n",
1674
+ "month_nov 2.3547 0.0422 55.7868 0.0000 2.2720 2.4374\n",
1675
+ "month_oct 2.9015 0.0745 38.9684 0.0000 2.7555 3.0474\n",
1676
+ "poutcome_failure -0.4338 0.0323 -13.4275 0.0000 -0.4971 -0.3705\n",
1677
+ "poutcome_success 1.1880 0.0615 19.3210 0.0000 1.0675 1.3085\n",
1678
+ "=====================================================================\n",
1679
+ "\n"
1680
+ ]
1681
+ }
1682
+ ],
1683
+ "source": [
1684
+ "cols = [\n",
1685
+ " \"euribor3m\",\n",
1686
+ " \"job_blue-collar\",\n",
1687
+ " \"job_housemaid\",\n",
1688
+ " \"marital_unknown\",\n",
1689
+ " \"education_illiterate\",\n",
1690
+ " \"month_apr\",\n",
1691
+ " \"month_aug\",\n",
1692
+ " \"month_dec\",\n",
1693
+ " \"month_jul\",\n",
1694
+ " \"month_jun\",\n",
1695
+ " \"month_mar\",\n",
1696
+ " \"month_may\",\n",
1697
+ " \"month_nov\",\n",
1698
+ " \"month_oct\",\n",
1699
+ " \"poutcome_failure\",\n",
1700
+ " \"poutcome_success\",\n",
1701
+ "]\n",
1702
+ "X = os_data_X[cols]\n",
1703
+ "y = os_data_y[\"y\"]\n",
1704
+ "\n",
1705
+ "logit_model = sm.Logit(y, X.astype(float))\n",
1706
+ "result = logit_model.fit()\n",
1707
  "print(result.summary2())"
1708
  ]
1709
  },
1710
+ {
1711
+ "cell_type": "code",
1712
+ "execution_count": 32,
1713
+ "metadata": {},
1714
+ "outputs": [
1715
+ {
1716
+ "data": {
1717
+ "text/html": [
1718
+ "<style>#sk-container-id-2 {\n",
1719
+ " /* Definition of color scheme common for light and dark mode */\n",
1720
+ " --sklearn-color-text: black;\n",
1721
+ " --sklearn-color-line: gray;\n",
1722
+ " /* Definition of color scheme for unfitted estimators */\n",
1723
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
1724
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
1725
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
1726
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
1727
+ " /* Definition of color scheme for fitted estimators */\n",
1728
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
1729
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
1730
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
1731
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
1732
+ "\n",
1733
+ " /* Specific color for light theme */\n",
1734
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
1735
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
1736
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
1737
+ " --sklearn-color-icon: #696969;\n",
1738
+ "\n",
1739
+ " @media (prefers-color-scheme: dark) {\n",
1740
+ " /* Redefinition of color scheme for dark theme */\n",
1741
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
1742
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
1743
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
1744
+ " --sklearn-color-icon: #878787;\n",
1745
+ " }\n",
1746
+ "}\n",
1747
+ "\n",
1748
+ "#sk-container-id-2 {\n",
1749
+ " color: var(--sklearn-color-text);\n",
1750
+ "}\n",
1751
+ "\n",
1752
+ "#sk-container-id-2 pre {\n",
1753
+ " padding: 0;\n",
1754
+ "}\n",
1755
+ "\n",
1756
+ "#sk-container-id-2 input.sk-hidden--visually {\n",
1757
+ " border: 0;\n",
1758
+ " clip: rect(1px 1px 1px 1px);\n",
1759
+ " clip: rect(1px, 1px, 1px, 1px);\n",
1760
+ " height: 1px;\n",
1761
+ " margin: -1px;\n",
1762
+ " overflow: hidden;\n",
1763
+ " padding: 0;\n",
1764
+ " position: absolute;\n",
1765
+ " width: 1px;\n",
1766
+ "}\n",
1767
+ "\n",
1768
+ "#sk-container-id-2 div.sk-dashed-wrapped {\n",
1769
+ " border: 1px dashed var(--sklearn-color-line);\n",
1770
+ " margin: 0 0.4em 0.5em 0.4em;\n",
1771
+ " box-sizing: border-box;\n",
1772
+ " padding-bottom: 0.4em;\n",
1773
+ " background-color: var(--sklearn-color-background);\n",
1774
+ "}\n",
1775
+ "\n",
1776
+ "#sk-container-id-2 div.sk-container {\n",
1777
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
1778
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
1779
+ " so we also need the `!important` here to be able to override the\n",
1780
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
1781
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
1782
+ " display: inline-block !important;\n",
1783
+ " position: relative;\n",
1784
+ "}\n",
1785
+ "\n",
1786
+ "#sk-container-id-2 div.sk-text-repr-fallback {\n",
1787
+ " display: none;\n",
1788
+ "}\n",
1789
+ "\n",
1790
+ "div.sk-parallel-item,\n",
1791
+ "div.sk-serial,\n",
1792
+ "div.sk-item {\n",
1793
+ " /* draw centered vertical line to link estimators */\n",
1794
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
1795
+ " background-size: 2px 100%;\n",
1796
+ " background-repeat: no-repeat;\n",
1797
+ " background-position: center center;\n",
1798
+ "}\n",
1799
+ "\n",
1800
+ "/* Parallel-specific style estimator block */\n",
1801
+ "\n",
1802
+ "#sk-container-id-2 div.sk-parallel-item::after {\n",
1803
+ " content: \"\";\n",
1804
+ " width: 100%;\n",
1805
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
1806
+ " flex-grow: 1;\n",
1807
+ "}\n",
1808
+ "\n",
1809
+ "#sk-container-id-2 div.sk-parallel {\n",
1810
+ " display: flex;\n",
1811
+ " align-items: stretch;\n",
1812
+ " justify-content: center;\n",
1813
+ " background-color: var(--sklearn-color-background);\n",
1814
+ " position: relative;\n",
1815
+ "}\n",
1816
+ "\n",
1817
+ "#sk-container-id-2 div.sk-parallel-item {\n",
1818
+ " display: flex;\n",
1819
+ " flex-direction: column;\n",
1820
+ "}\n",
1821
+ "\n",
1822
+ "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
1823
+ " align-self: flex-end;\n",
1824
+ " width: 50%;\n",
1825
+ "}\n",
1826
+ "\n",
1827
+ "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
1828
+ " align-self: flex-start;\n",
1829
+ " width: 50%;\n",
1830
+ "}\n",
1831
+ "\n",
1832
+ "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
1833
+ " width: 0;\n",
1834
+ "}\n",
1835
+ "\n",
1836
+ "/* Serial-specific style estimator block */\n",
1837
+ "\n",
1838
+ "#sk-container-id-2 div.sk-serial {\n",
1839
+ " display: flex;\n",
1840
+ " flex-direction: column;\n",
1841
+ " align-items: center;\n",
1842
+ " background-color: var(--sklearn-color-background);\n",
1843
+ " padding-right: 1em;\n",
1844
+ " padding-left: 1em;\n",
1845
+ "}\n",
1846
+ "\n",
1847
+ "\n",
1848
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
1849
+ "clickable and can be expanded/collapsed.\n",
1850
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
1851
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
1852
+ "*/\n",
1853
+ "\n",
1854
+ "/* Pipeline and ColumnTransformer style (default) */\n",
1855
+ "\n",
1856
+ "#sk-container-id-2 div.sk-toggleable {\n",
1857
+ " /* Default theme specific background. It is overwritten whether we have a\n",
1858
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
1859
+ " background-color: var(--sklearn-color-background);\n",
1860
+ "}\n",
1861
+ "\n",
1862
+ "/* Toggleable label */\n",
1863
+ "#sk-container-id-2 label.sk-toggleable__label {\n",
1864
+ " cursor: pointer;\n",
1865
+ " display: block;\n",
1866
+ " width: 100%;\n",
1867
+ " margin-bottom: 0;\n",
1868
+ " padding: 0.5em;\n",
1869
+ " box-sizing: border-box;\n",
1870
+ " text-align: center;\n",
1871
+ "}\n",
1872
+ "\n",
1873
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
1874
+ " /* Arrow on the left of the label */\n",
1875
+ " content: \"▸\";\n",
1876
+ " float: left;\n",
1877
+ " margin-right: 0.25em;\n",
1878
+ " color: var(--sklearn-color-icon);\n",
1879
+ "}\n",
1880
+ "\n",
1881
+ "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
1882
+ " color: var(--sklearn-color-text);\n",
1883
+ "}\n",
1884
+ "\n",
1885
+ "/* Toggleable content - dropdown */\n",
1886
+ "\n",
1887
+ "#sk-container-id-2 div.sk-toggleable__content {\n",
1888
+ " max-height: 0;\n",
1889
+ " max-width: 0;\n",
1890
+ " overflow: hidden;\n",
1891
+ " text-align: left;\n",
1892
+ " /* unfitted */\n",
1893
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1894
+ "}\n",
1895
+ "\n",
1896
+ "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
1897
+ " /* fitted */\n",
1898
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1899
+ "}\n",
1900
+ "\n",
1901
+ "#sk-container-id-2 div.sk-toggleable__content pre {\n",
1902
+ " margin: 0.2em;\n",
1903
+ " border-radius: 0.25em;\n",
1904
+ " color: var(--sklearn-color-text);\n",
1905
+ " /* unfitted */\n",
1906
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1907
+ "}\n",
1908
+ "\n",
1909
+ "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
1910
+ " /* unfitted */\n",
1911
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1912
+ "}\n",
1913
+ "\n",
1914
+ "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
1915
+ " /* Expand drop-down */\n",
1916
+ " max-height: 200px;\n",
1917
+ " max-width: 100%;\n",
1918
+ " overflow: auto;\n",
1919
+ "}\n",
1920
+ "\n",
1921
+ "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
1922
+ " content: \"▾\";\n",
1923
+ "}\n",
1924
+ "\n",
1925
+ "/* Pipeline/ColumnTransformer-specific style */\n",
1926
+ "\n",
1927
+ "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1928
+ " color: var(--sklearn-color-text);\n",
1929
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1930
+ "}\n",
1931
+ "\n",
1932
+ "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1933
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1934
+ "}\n",
1935
+ "\n",
1936
+ "/* Estimator-specific style */\n",
1937
+ "\n",
1938
+ "/* Colorize estimator box */\n",
1939
+ "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1940
+ " /* unfitted */\n",
1941
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1942
+ "}\n",
1943
+ "\n",
1944
+ "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
1945
+ " /* fitted */\n",
1946
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1947
+ "}\n",
1948
+ "\n",
1949
+ "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
1950
+ "#sk-container-id-2 div.sk-label label {\n",
1951
+ " /* The background is the default theme color */\n",
1952
+ " color: var(--sklearn-color-text-on-default-background);\n",
1953
+ "}\n",
1954
+ "\n",
1955
+ "/* On hover, darken the color of the background */\n",
1956
+ "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
1957
+ " color: var(--sklearn-color-text);\n",
1958
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
1959
+ "}\n",
1960
+ "\n",
1961
+ "/* Label box, darken color on hover, fitted */\n",
1962
+ "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
1963
+ " color: var(--sklearn-color-text);\n",
1964
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
1965
+ "}\n",
1966
+ "\n",
1967
+ "/* Estimator label */\n",
1968
+ "\n",
1969
+ "#sk-container-id-2 div.sk-label label {\n",
1970
+ " font-family: monospace;\n",
1971
+ " font-weight: bold;\n",
1972
+ " display: inline-block;\n",
1973
+ " line-height: 1.2em;\n",
1974
+ "}\n",
1975
+ "\n",
1976
+ "#sk-container-id-2 div.sk-label-container {\n",
1977
+ " text-align: center;\n",
1978
+ "}\n",
1979
+ "\n",
1980
+ "/* Estimator-specific */\n",
1981
+ "#sk-container-id-2 div.sk-estimator {\n",
1982
+ " font-family: monospace;\n",
1983
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
1984
+ " border-radius: 0.25em;\n",
1985
+ " box-sizing: border-box;\n",
1986
+ " margin-bottom: 0.5em;\n",
1987
+ " /* unfitted */\n",
1988
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
1989
+ "}\n",
1990
+ "\n",
1991
+ "#sk-container-id-2 div.sk-estimator.fitted {\n",
1992
+ " /* fitted */\n",
1993
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
1994
+ "}\n",
1995
+ "\n",
1996
+ "/* on hover */\n",
1997
+ "#sk-container-id-2 div.sk-estimator:hover {\n",
1998
+ " /* unfitted */\n",
1999
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
2000
+ "}\n",
2001
+ "\n",
2002
+ "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
2003
+ " /* fitted */\n",
2004
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
2005
+ "}\n",
2006
+ "\n",
2007
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
2008
+ "\n",
2009
+ "/* Common style for \"i\" and \"?\" */\n",
2010
+ "\n",
2011
+ ".sk-estimator-doc-link,\n",
2012
+ "a:link.sk-estimator-doc-link,\n",
2013
+ "a:visited.sk-estimator-doc-link {\n",
2014
+ " float: right;\n",
2015
+ " font-size: smaller;\n",
2016
+ " line-height: 1em;\n",
2017
+ " font-family: monospace;\n",
2018
+ " background-color: var(--sklearn-color-background);\n",
2019
+ " border-radius: 1em;\n",
2020
+ " height: 1em;\n",
2021
+ " width: 1em;\n",
2022
+ " text-decoration: none !important;\n",
2023
+ " margin-left: 1ex;\n",
2024
+ " /* unfitted */\n",
2025
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2026
+ " color: var(--sklearn-color-unfitted-level-1);\n",
2027
+ "}\n",
2028
+ "\n",
2029
+ ".sk-estimator-doc-link.fitted,\n",
2030
+ "a:link.sk-estimator-doc-link.fitted,\n",
2031
+ "a:visited.sk-estimator-doc-link.fitted {\n",
2032
+ " /* fitted */\n",
2033
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2034
+ " color: var(--sklearn-color-fitted-level-1);\n",
2035
+ "}\n",
2036
+ "\n",
2037
+ "/* On hover */\n",
2038
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
2039
+ ".sk-estimator-doc-link:hover,\n",
2040
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
2041
+ ".sk-estimator-doc-link:hover {\n",
2042
+ " /* unfitted */\n",
2043
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
2044
+ " color: var(--sklearn-color-background);\n",
2045
+ " text-decoration: none;\n",
2046
+ "}\n",
2047
+ "\n",
2048
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
2049
+ ".sk-estimator-doc-link.fitted:hover,\n",
2050
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
2051
+ ".sk-estimator-doc-link.fitted:hover {\n",
2052
+ " /* fitted */\n",
2053
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
2054
+ " color: var(--sklearn-color-background);\n",
2055
+ " text-decoration: none;\n",
2056
+ "}\n",
2057
+ "\n",
2058
+ "/* Span, style for the box shown on hovering the info icon */\n",
2059
+ ".sk-estimator-doc-link span {\n",
2060
+ " display: none;\n",
2061
+ " z-index: 9999;\n",
2062
+ " position: relative;\n",
2063
+ " font-weight: normal;\n",
2064
+ " right: .2ex;\n",
2065
+ " padding: .5ex;\n",
2066
+ " margin: .5ex;\n",
2067
+ " width: min-content;\n",
2068
+ " min-width: 20ex;\n",
2069
+ " max-width: 50ex;\n",
2070
+ " color: var(--sklearn-color-text);\n",
2071
+ " box-shadow: 2pt 2pt 4pt #999;\n",
2072
+ " /* unfitted */\n",
2073
+ " background: var(--sklearn-color-unfitted-level-0);\n",
2074
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
2075
+ "}\n",
2076
+ "\n",
2077
+ ".sk-estimator-doc-link.fitted span {\n",
2078
+ " /* fitted */\n",
2079
+ " background: var(--sklearn-color-fitted-level-0);\n",
2080
+ " border: var(--sklearn-color-fitted-level-3);\n",
2081
+ "}\n",
2082
+ "\n",
2083
+ ".sk-estimator-doc-link:hover span {\n",
2084
+ " display: block;\n",
2085
+ "}\n",
2086
+ "\n",
2087
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
2088
+ "\n",
2089
+ "#sk-container-id-2 a.estimator_doc_link {\n",
2090
+ " float: right;\n",
2091
+ " font-size: 1rem;\n",
2092
+ " line-height: 1em;\n",
2093
+ " font-family: monospace;\n",
2094
+ " background-color: var(--sklearn-color-background);\n",
2095
+ " border-radius: 1rem;\n",
2096
+ " height: 1rem;\n",
2097
+ " width: 1rem;\n",
2098
+ " text-decoration: none;\n",
2099
+ " /* unfitted */\n",
2100
+ " color: var(--sklearn-color-unfitted-level-1);\n",
2101
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2102
+ "}\n",
2103
+ "\n",
2104
+ "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
2105
+ " /* fitted */\n",
2106
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2107
+ " color: var(--sklearn-color-fitted-level-1);\n",
2108
+ "}\n",
2109
+ "\n",
2110
+ "/* On hover */\n",
2111
+ "#sk-container-id-2 a.estimator_doc_link:hover {\n",
2112
+ " /* unfitted */\n",
2113
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
2114
+ " color: var(--sklearn-color-background);\n",
2115
+ " text-decoration: none;\n",
2116
+ "}\n",
2117
+ "\n",
2118
+ "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
2119
+ " /* fitted */\n",
2120
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
2121
+ "}\n",
2122
+ "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>"
2123
+ ],
2124
+ "text/plain": [
2125
+ "LogisticRegression()"
2126
+ ]
2127
+ },
2128
+ "execution_count": 32,
2129
+ "metadata": {},
2130
+ "output_type": "execute_result"
2131
+ }
2132
+ ],
2133
+ "source": [
2134
+ "from sklearn.linear_model import LogisticRegression\n",
2135
+ "from sklearn import metrics\n",
2136
+ "\n",
2137
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\n",
2138
+ "logreg = LogisticRegression()\n",
2139
+ "logreg.fit(X_train, y_train)"
2140
+ ]
2141
+ },
2142
+ {
2143
+ "cell_type": "code",
2144
+ "execution_count": 34,
2145
+ "metadata": {},
2146
+ "outputs": [
2147
+ {
2148
+ "name": "stdout",
2149
+ "output_type": "stream",
2150
+ "text": [
2151
+ "Accuracy of logistic regression classifier on test set: 0.82\n"
2152
+ ]
2153
+ }
2154
+ ],
2155
+ "source": [
2156
+ "y_pred = logreg.predict(X_test)\n",
2157
+ "print(\n",
2158
+ " \"Accuracy of logistic regression classifier on test set: {:.2f}\".format(\n",
2159
+ " logreg.score(X_test, y_test)\n",
2160
+ " )\n",
2161
+ ")"
2162
+ ]
2163
+ },
2164
+ {
2165
+ "cell_type": "code",
2166
+ "execution_count": 35,
2167
+ "metadata": {},
2168
+ "outputs": [
2169
+ {
2170
+ "name": "stdout",
2171
+ "output_type": "stream",
2172
+ "text": [
2173
+ "[[6878 788]\n",
2174
+ " [2009 5666]]\n"
2175
+ ]
2176
+ }
2177
+ ],
2178
+ "source": [
2179
+ "from sklearn.metrics import confusion_matrix\n",
2180
+ "\n",
2181
+ "confusion_matrix = confusion_matrix(y_test, y_pred)\n",
2182
+ "print(confusion_matrix)"
2183
+ ]
2184
+ },
2185
+ {
2186
+ "cell_type": "code",
2187
+ "execution_count": 36,
2188
+ "metadata": {},
2189
+ "outputs": [
2190
+ {
2191
+ "name": "stdout",
2192
+ "output_type": "stream",
2193
+ "text": [
2194
+ " precision recall f1-score support\n",
2195
+ "\n",
2196
+ " 0 0.77 0.90 0.83 7666\n",
2197
+ " 1 0.88 0.74 0.80 7675\n",
2198
+ "\n",
2199
+ " accuracy 0.82 15341\n",
2200
+ " macro avg 0.83 0.82 0.82 15341\n",
2201
+ "weighted avg 0.83 0.82 0.82 15341\n",
2202
+ "\n"
2203
+ ]
2204
+ }
2205
+ ],
2206
+ "source": [
2207
+ "from sklearn.metrics import classification_report\n",
2208
+ "\n",
2209
+ "print(classification_report(y_test, y_pred))"
2210
+ ]
2211
+ },
2212
+ {
2213
+ "cell_type": "code",
2214
+ "execution_count": 37,
2215
+ "metadata": {},
2216
+ "outputs": [
2217
+ {
2218
+ "data": {
2219
+ "image/png": "",
2220
+ "text/plain": [
2221
+ "<Figure size 640x480 with 1 Axes>"
2222
+ ]
2223
+ },
2224
+ "metadata": {},
2225
+ "output_type": "display_data"
2226
+ }
2227
+ ],
2228
+ "source": [
2229
+ "from sklearn.metrics import roc_auc_score\n",
2230
+ "from sklearn.metrics import roc_curve\n",
2231
+ "\n",
2232
+ "logit_roc_auc = roc_auc_score(y_test, logreg.predict(X_test))\n",
2233
+ "fpr, tpr, thresholds = roc_curve(y_test, logreg.predict_proba(X_test)[:, 1])\n",
2234
+ "plt.figure()\n",
2235
+ "plt.plot(fpr, tpr, label=\"Logistic Regression (area = %0.2f)\" % logit_roc_auc)\n",
2236
+ "plt.plot([0, 1], [0, 1], \"r--\")\n",
2237
+ "plt.xlim([0.0, 1.0])\n",
2238
+ "plt.ylim([0.0, 1.05])\n",
2239
+ "plt.xlabel(\"False Positive Rate\")\n",
2240
+ "plt.ylabel(\"True Positive Rate\")\n",
2241
+ "plt.title(\"Receiver operating characteristic\")\n",
2242
+ "plt.legend(loc=\"lower right\")\n",
2243
+ "plt.savefig(\"Log_ROC\")\n",
2244
+ "plt.show()"
2245
+ ]
2246
+ },
2247
  {
2248
  "cell_type": "code",
2249
  "execution_count": null,