Spaces:
Running
Running
File size: 98,740 Bytes
a60f1c0 a1ed77d a60f1c0 6384d58 a1ed77d 7fa25f7 efb47b3 4d9b5c3 a74a30d a566db2 84e165d 27f90be 16ed767 b37a702 a60f1c0 b37a702 84e165d 7513911 a60f1c0 b37a702 4d8869d b37a702 4d8869d b37a702 4d8869d b37a702 08bd0cf 4d8869d b37a702 08bd0cf b37a702 a60f1c0 57cb1ac a60f1c0 87de8af a60f1c0 8ce2887 7513911 473c7a8 eb8c873 473c7a8 efb47b3 87de8af e509f96 473c7a8 e509f96 bc15b27 efb47b3 a60f1c0 b37a702 a60f1c0 f7bff1c b37a702 2af44a9 b37a702 f7bff1c b37a702 a2d2271 373b768 a2d2271 b37a702 a2d2271 16ed767 a2d2271 b37a702 a2d2271 16ed767 efb47b3 473c7a8 8ce2887 473c7a8 a60f1c0 473c7a8 eb8c873 473c7a8 eb8c873 473c7a8 eb8c873 a60f1c0 473c7a8 efb47b3 eb8c873 473c7a8 eb8c873 473c7a8 efb47b3 473c7a8 b37a702 473c7a8 66c3b07 a7058dd 66c3b07 473c7a8 66c3b07 473c7a8 66c3b07 a7058dd 473c7a8 a7058dd 66c3b07 a7058dd 66c3b07 473c7a8 8ce2887 66c3b07 8ce2887 473c7a8 a7058dd 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 eb8c873 473c7a8 76771b5 5cfd2b7 76771b5 5cfd2b7 473c7a8 5cfd2b7 76771b5 5cfd2b7 76771b5 473c7a8 76771b5 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 5cfd2b7 473c7a8 5cfd2b7 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 76771b5 a566db2 473c7a8 66c3b07 b37a702 66c3b07 473c7a8 66c3b07 473c7a8 66c3b07 473c7a8 eb8c873 473c7a8 eb8c873 a60f1c0 a2d2271 a60f1c0 eb8c873 473c7a8 66c3b07 eb8c873 473c7a8 a60f1c0 473c7a8 eb8c873 a60f1c0 eb8c873 473c7a8 66c3b07 b37a702 eb8c873 473c7a8 a60f1c0 eb8c873 b37a702 66c3b07 b37a702 66c3b07 b37a702 a2d2271 b37a702 eb8c873 473c7a8 eb8c873 66c3b07 eb8c873 efb47b3 87de8af f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 a1ed77d b37a702 a1ed77d b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 a1ed77d b37a702 a1ed77d b37a702 a1ed77d b37a702 f7bff1c a1ed77d f7bff1c a1ed77d f7bff1c a1ed77d f7bff1c b37a702 f7bff1c a1ed77d f7bff1c 08bd0cf f7bff1c a1ed77d 08bd0cf a1ed77d 08bd0cf 2af44a9 a1ed77d 08bd0cf 2af44a9 a1ed77d 08bd0cf a1ed77d b37a702 f7bff1c b37a702 f7bff1c b37a702 4d8869d b37a702 4d8869d b37a702 87de8af 473c7a8 1ca7717 473c7a8 1ca7717 a60f1c0 1ca7717 473c7a8 1ca7717 473c7a8 1ca7717 a60f1c0 1ca7717 473c7a8 1ca7717 473c7a8 1ca7717 a60f1c0 1ca7717 473c7a8 a2d2271 87de8af b37a702 87de8af 473c7a8 e6b6548 473c7a8 e6b6548 a60f1c0 e6b6548 ce399c7 e6b6548 a60f1c0 66c3b07 57cb1ac 473c7a8 a60f1c0 e6b6548 87de8af a60f1c0 e6b6548 a60f1c0 e6b6548 87de8af e6b6548 a60f1c0 e6b6548 a60f1c0 e6b6548 87de8af e6b6548 b37a702 8ce2887 b37a702 8ce2887 e6b6548 8ce2887 e6b6548 b37a702 8ce2887 b37a702 8ce2887 e6b6548 8ce2887 e6b6548 473c7a8 8ce2887 e6b6548 8ce2887 e6b6548 a60f1c0 f7bff1c 66c3b07 a60f1c0 eb8c873 f7bff1c eb8c873 f7bff1c eb8c873 f7bff1c eb8c873 7fa25f7 f7bff1c 7fa25f7 f7bff1c 7fa25f7 f7bff1c 7fa25f7 f7bff1c 7fa25f7 f7bff1c 7fa25f7 f7bff1c a2d2271 87de8af b37a702 87de8af 473c7a8 b37a702 e81fdea 15b9748 b37a702 57cb1ac b37a702 473c7a8 b37a702 66c3b07 b37a702 66c3b07 b37a702 66c3b07 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 b37a702 473c7a8 57cb1ac b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 08bd0cf b37a702 08bd0cf f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 611f47d 473c7a8 8ce2887 473c7a8 8ce2887 473c7a8 87de8af b37a702 87de8af 473c7a8 8ce2887 e81fdea 8ce2887 e81fdea a1ed77d b37a702 08bd0cf b37a702 8ce2887 08bd0cf 2af44a9 b37a702 a60f1c0 b37a702 a60f1c0 08bd0cf 2af44a9 8ce2887 08bd0cf 2af44a9 b37a702 08bd0cf 2af44a9 b37a702 08bd0cf f7bff1c b37a702 f7bff1c b37a702 08bd0cf b37a702 8d1d592 b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 f7bff1c b37a702 2af44a9 08bd0cf 2af44a9 f7bff1c 08bd0cf 2af44a9 8ef7489 2af44a9 4d8869d b37a702 2af44a9 4d8869d b37a702 2af44a9 4d8869d 08bd0cf 4d8869d 2af44a9 4d8869d 2af44a9 8ce2887 b37a702 8ce2887 b3a513b 8ce2887 2af44a9 8ce2887 b37a702 8ce2887 2af44a9 8ce2887 b37a702 8ce2887 2af44a9 8ce2887 b37a702 8ce2887 b37a702 8ce2887 2af44a9 b37a702 8ce2887 b37a702 e81fdea b37a702 8ce2887 b37a702 08bd0cf b37a702 8ce2887 b37a702 2af44a9 08bd0cf 2af44a9 8ce2887 08bd0cf f7bff1c 2af44a9 f7bff1c e81fdea f7bff1c e81fdea f7bff1c e81fdea f7bff1c 2af44a9 08bd0cf 2af44a9 f7bff1c 08bd0cf 2af44a9 f7bff1c 08bd0cf 2af44a9 f7bff1c 08bd0cf f7bff1c 2af44a9 f7bff1c 8ce2887 e81fdea 8ce2887 e81fdea 08bd0cf e81fdea 08bd0cf e81fdea 08bd0cf e81fdea 8ce2887 f7bff1c 08bd0cf f7bff1c 2af44a9 f7bff1c 08bd0cf f7bff1c 08bd0cf f7bff1c 2af44a9 f7bff1c 08bd0cf f7bff1c 8ce2887 a2d2271 473c7a8 a7058dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 |
import os
import argparse
import logging
import pickle
import threading
import time
import warnings
from datetime import datetime, timedelta
from collections import defaultdict
import csv
# Suppress warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning, module='umap')
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.interpolate import interp1d
import statsmodels.api as sm
import requests
import tempfile
import shutil
import xarray as xr
# NEW: Advanced ML imports
try:
import umap.umap_ as umap
UMAP_AVAILABLE = True
except ImportError:
UMAP_AVAILABLE = False
print("UMAP not available - clustering features limited")
# Optional CNN imports with robust error handling
CNN_AVAILABLE = False
try:
# Set environment variables before importing TensorFlow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
import tensorflow as tf
from tensorflow.keras import layers, models
# Test if TensorFlow actually works
tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts
CNN_AVAILABLE = True
print("TensorFlow successfully loaded - CNN features enabled")
except Exception as e:
CNN_AVAILABLE = False
print(f"TensorFlow not available - CNN features disabled: {str(e)[:100]}...")
try:
import cdsapi
CDSAPI_AVAILABLE = True
except ImportError:
CDSAPI_AVAILABLE = False
import tropycal.tracks as tracks
# -----------------------------
# Configuration and Setup
# -----------------------------
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Remove argument parser to simplify startup
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir()
# Ensure directory exists and is writable
try:
os.makedirs(DATA_PATH, exist_ok=True)
# Test write permissions
test_file = os.path.join(DATA_PATH, 'test_write.txt')
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
logging.info(f"Data directory is writable: {DATA_PATH}")
except Exception as e:
logging.warning(f"Data directory not writable, using temp dir: {e}")
DATA_PATH = tempfile.mkdtemp()
logging.info(f"Using temporary directory: {DATA_PATH}")
# Update file paths
ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv')
TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv')
MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
# IBTrACS settings
BASIN_FILES = {
'EP': 'ibtracs.EP.list.v04r01.csv',
'NA': 'ibtracs.NA.list.v04r01.csv',
'WP': 'ibtracs.WP.list.v04r01.csv'
}
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/'
LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv')
CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl')
CACHE_EXPIRY_DAYS = 1
# -----------------------------
# ENHANCED: Color Maps and Standards with TD Support
# -----------------------------
# Enhanced color mapping with TD support (for Plotly)
enhanced_color_map = {
'Unknown': 'rgb(200, 200, 200)',
'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD
'Tropical Storm': 'rgb(0, 0, 255)',
'C1 Typhoon': 'rgb(0, 255, 255)',
'C2 Typhoon': 'rgb(0, 255, 0)',
'C3 Strong Typhoon': 'rgb(255, 255, 0)',
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)',
'C5 Super Typhoon': 'rgb(255, 0, 0)'
}
# Matplotlib-compatible color mapping (hex colors)
matplotlib_color_map = {
'Unknown': '#C8C8C8',
'Tropical Depression': '#808080', # Gray for TD
'Tropical Storm': '#0000FF', # Blue
'C1 Typhoon': '#00FFFF', # Cyan
'C2 Typhoon': '#00FF00', # Green
'C3 Strong Typhoon': '#FFFF00', # Yellow
'C4 Very Strong Typhoon': '#FFA500', # Orange
'C5 Super Typhoon': '#FF0000' # Red
}
def rgb_string_to_hex(rgb_string):
"""Convert 'rgb(r,g,b)' string to hex color for matplotlib"""
try:
# Extract numbers from 'rgb(r,g,b)' format
import re
numbers = re.findall(r'\d+', rgb_string)
if len(numbers) == 3:
r, g, b = map(int, numbers)
return f'#{r:02x}{g:02x}{b:02x}'
else:
return '#808080' # Default gray
except:
return '#808080' # Default gray
def get_matplotlib_color(category):
"""Get matplotlib-compatible color for a storm category"""
return matplotlib_color_map.get(category, '#808080')
# Cluster colors for route visualization
CLUSTER_COLORS = [
'#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
'#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9',
'#F8C471', '#82E0AA', '#F1948A', '#85C1E9', '#D2B4DE'
]
# Original color map for backward compatibility
color_map = {
'C5 Super Typhoon': 'rgb(255, 0, 0)',
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)',
'C3 Strong Typhoon': 'rgb(255, 255, 0)',
'C2 Typhoon': 'rgb(0, 255, 0)',
'C1 Typhoon': 'rgb(0, 255, 255)',
'Tropical Storm': 'rgb(0, 0, 255)',
'Tropical Depression': 'rgb(128, 128, 128)'
}
atlantic_standard = {
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'},
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'},
'C3 Strong Typhoon': {'wind_speed': 96, 'color': 'Yellow', 'hex': '#FFFF00'},
'C2 Typhoon': {'wind_speed': 83, 'color': 'Green', 'hex': '#00FF00'},
'C1 Typhoon': {'wind_speed': 64, 'color': 'Cyan', 'hex': '#00FFFF'},
'Tropical Storm': {'wind_speed': 34, 'color': 'Blue', 'hex': '#0000FF'},
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'}
}
taiwan_standard = {
'Strong Typhoon': {'wind_speed': 51.0, 'color': 'Red', 'hex': '#FF0000'},
'Medium Typhoon': {'wind_speed': 33.7, 'color': 'Orange', 'hex': '#FFA500'},
'Mild Typhoon': {'wind_speed': 17.2, 'color': 'Yellow', 'hex': '#FFFF00'},
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'}
}
# -----------------------------
# Utility Functions for HF Spaces
# -----------------------------
def safe_file_write(file_path, data_frame, backup_dir=None):
"""Safely write DataFrame to CSV with backup and error handling"""
try:
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Try to write to a temporary file first
temp_path = file_path + '.tmp'
data_frame.to_csv(temp_path, index=False)
# If successful, rename to final file
os.rename(temp_path, file_path)
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}")
return True
except PermissionError as e:
logging.warning(f"Permission denied writing to {file_path}: {e}")
if backup_dir:
try:
backup_path = os.path.join(backup_dir, os.path.basename(file_path))
data_frame.to_csv(backup_path, index=False)
logging.info(f"Saved to backup location: {backup_path}")
return True
except Exception as backup_e:
logging.error(f"Failed to save to backup location: {backup_e}")
return False
except Exception as e:
logging.error(f"Error saving file {file_path}: {e}")
# Clean up temp file if it exists
temp_path = file_path + '.tmp'
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
return False
def get_fallback_data_dir():
"""Get a fallback data directory that's guaranteed to be writable"""
fallback_dirs = [
tempfile.gettempdir(),
'/tmp',
os.path.expanduser('~'),
os.getcwd()
]
for directory in fallback_dirs:
try:
test_dir = os.path.join(directory, 'typhoon_fallback')
os.makedirs(test_dir, exist_ok=True)
test_file = os.path.join(test_dir, 'test.txt')
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
return test_dir
except:
continue
# If all else fails, use current directory
return os.getcwd()
# -----------------------------
# ONI and Typhoon Data Functions
# -----------------------------
def download_oni_file(url, filename):
"""Download ONI file with retry logic"""
max_retries = 3
for attempt in range(max_retries):
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
return True
except Exception as e:
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
else:
logging.error(f"Failed to download ONI after {max_retries} attempts")
return False
def convert_oni_ascii_to_csv(input_file, output_file):
"""Convert ONI ASCII format to CSV"""
data = defaultdict(lambda: [''] * 12)
season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5,
'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11}
try:
with open(input_file, 'r') as f:
lines = f.readlines()[1:] # Skip header
for line in lines:
parts = line.split()
if len(parts) >= 4:
season, year, anom = parts[0], parts[1], parts[-1]
if season in season_to_month:
month = season_to_month[season]
if season == 'DJF':
year = str(int(year)-1)
data[year][month-1] = anom
# Write to CSV with safe write
df = pd.DataFrame(data).T.reset_index()
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df = df.sort_values('Year').reset_index(drop=True)
return safe_file_write(output_file, df, get_fallback_data_dir())
except Exception as e:
logging.error(f"Error converting ONI file: {e}")
return False
def update_oni_data():
"""Update ONI data with error handling"""
url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt")
input_file = os.path.join(DATA_PATH, "oni.ascii.txt")
output_file = ONI_DATA_PATH
try:
if download_oni_file(url, temp_file):
if not os.path.exists(input_file) or not os.path.exists(output_file):
os.rename(temp_file, input_file)
convert_oni_ascii_to_csv(input_file, output_file)
else:
os.remove(temp_file)
else:
# Create fallback ONI data if download fails
logging.warning("Creating fallback ONI data")
create_fallback_oni_data(output_file)
except Exception as e:
logging.error(f"Error updating ONI data: {e}")
create_fallback_oni_data(output_file)
def create_fallback_oni_data(output_file):
"""Create minimal ONI data for testing"""
years = range(2000, 2026) # Extended to include 2025
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
# Create synthetic ONI data
data = []
for year in years:
row = [year]
for month in months:
# Generate some realistic ONI values
value = np.random.normal(0, 1) * 0.5
row.append(f"{value:.2f}")
data.append(row)
df = pd.DataFrame(data, columns=['Year'] + months)
safe_file_write(output_file, df, get_fallback_data_dir())
# -----------------------------
# FIXED: IBTrACS Data Loading
# -----------------------------
def download_ibtracs_file(basin, force_download=False):
"""Download specific basin file from IBTrACS"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
url = IBTRACS_BASE_URL + filename
# Check if file exists and is recent (less than 7 days old)
if os.path.exists(local_path) and not force_download:
file_age = time.time() - os.path.getmtime(local_path)
if file_age < 7 * 24 * 3600: # 7 days
logging.info(f"Using cached {basin} basin file")
return local_path
try:
logging.info(f"Downloading {basin} basin file from {url}")
response = requests.get(url, timeout=60)
response.raise_for_status()
# Ensure directory exists
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, 'wb') as f:
f.write(response.content)
logging.info(f"Successfully downloaded {basin} basin file")
return local_path
except Exception as e:
logging.error(f"Failed to download {basin} basin file: {e}")
return None
def examine_ibtracs_structure(file_path):
"""Examine the actual structure of an IBTrACS CSV file"""
try:
with open(file_path, 'r') as f:
lines = f.readlines()
# Show first 5 lines
logging.info("First 5 lines of IBTrACS file:")
for i, line in enumerate(lines[:5]):
logging.info(f"Line {i}: {line.strip()}")
# The first line contains the actual column headers
# No need to skip rows for IBTrACS v04r01
df = pd.read_csv(file_path, nrows=5)
logging.info(f"Columns from first row: {list(df.columns)}")
return list(df.columns)
except Exception as e:
logging.error(f"Error examining IBTrACS structure: {e}")
return None
def load_ibtracs_csv_directly(basin='WP'):
"""Load IBTrACS data directly from CSV - FIXED VERSION"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
# Download if not exists
if not os.path.exists(local_path):
downloaded_path = download_ibtracs_file(basin)
if not downloaded_path:
return None
try:
# First, examine the structure
actual_columns = examine_ibtracs_structure(local_path)
if not actual_columns:
logging.error("Could not examine IBTrACS file structure")
return None
# Read IBTrACS CSV - DON'T skip any rows for v04r01
# The first row contains proper column headers
logging.info(f"Reading IBTrACS CSV file: {local_path}")
df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows
logging.info(f"Original columns: {list(df.columns)}")
logging.info(f"Data shape before cleaning: {df.shape}")
# Check which essential columns exist
required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
available_required = [col for col in required_cols if col in df.columns]
if len(available_required) < 2:
logging.error(f"Missing critical columns. Available: {list(df.columns)}")
return None
# Clean and standardize the data with format specification
if 'ISO_TIME' in df.columns:
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
# Clean numeric columns
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
for col in numeric_columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Filter out invalid/missing critical data
valid_rows = df['LAT'].notna() & df['LON'].notna()
df = df[valid_rows]
# Ensure LAT/LON are in reasonable ranges
df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
# Add basin info if missing
if 'BASIN' not in df.columns:
df['BASIN'] = basin
# Add default columns if missing
if 'NAME' not in df.columns:
df['NAME'] = 'UNNAMED'
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
df['SEASON'] = df['ISO_TIME'].dt.year
logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
return df
except Exception as e:
logging.error(f"Error reading IBTrACS CSV file: {e}")
return None
def load_ibtracs_data_fixed():
"""Fixed version of IBTrACS data loading"""
ibtracs_data = {}
# Try to load each basin, but prioritize WP for this application
load_order = ['WP', 'EP', 'NA']
for basin in load_order:
try:
logging.info(f"Loading {basin} basin data...")
df = load_ibtracs_csv_directly(basin)
if df is not None and not df.empty:
ibtracs_data[basin] = df
logging.info(f"Successfully loaded {basin} basin with {len(df)} records")
else:
logging.warning(f"No data loaded for basin {basin}")
ibtracs_data[basin] = None
except Exception as e:
logging.error(f"Failed to load basin {basin}: {e}")
ibtracs_data[basin] = None
return ibtracs_data
def load_data_fixed(oni_path, typhoon_path):
"""Fixed version of load_data function"""
# Load ONI data
oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [],
'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [],
'Oct': [], 'Nov': [], 'Dec': []})
if not os.path.exists(oni_path):
logging.warning(f"ONI data file not found: {oni_path}")
update_oni_data()
try:
oni_data = pd.read_csv(oni_path)
logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
except Exception as e:
logging.error(f"Error loading ONI data: {e}")
update_oni_data()
try:
oni_data = pd.read_csv(oni_path)
except Exception as e:
logging.error(f"Still can't load ONI data: {e}")
# Load typhoon data - NEW APPROACH
typhoon_data = None
# First, try to load from existing processed file
if os.path.exists(typhoon_path):
try:
typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
# Ensure basic columns exist and are valid
required_cols = ['LAT', 'LON']
if all(col in typhoon_data.columns for col in required_cols):
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
else:
logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
typhoon_data = None
except Exception as e:
logging.error(f"Error loading processed typhoon data: {e}")
typhoon_data = None
# If no valid processed data, load from IBTrACS
if typhoon_data is None or typhoon_data.empty:
logging.info("Loading typhoon data from IBTrACS...")
ibtracs_data = load_ibtracs_data_fixed()
# Combine all available basin data, prioritizing WP
combined_dfs = []
for basin in ['WP', 'EP', 'NA']:
if basin in ibtracs_data and ibtracs_data[basin] is not None:
df = ibtracs_data[basin].copy()
df['BASIN'] = basin
combined_dfs.append(df)
if combined_dfs:
typhoon_data = pd.concat(combined_dfs, ignore_index=True)
# Ensure SID has proper format
if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
# Create SID from basin and other identifiers if missing
if 'SEASON' in typhoon_data.columns:
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
typhoon_data.index.astype(str).str.zfill(2) +
typhoon_data['SEASON'].astype(str))
else:
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
typhoon_data.index.astype(str).str.zfill(2) +
'2000')
# Save the processed data for future use
safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records")
else:
logging.error("Failed to load any IBTrACS basin data")
# Create minimal fallback data
typhoon_data = create_fallback_typhoon_data()
# Final validation of typhoon data
if typhoon_data is not None:
# Ensure required columns exist with fallback values
required_columns = {
'SID': 'UNKNOWN',
'ISO_TIME': pd.Timestamp('2000-01-01'),
'LAT': 0.0,
'LON': 0.0,
'USA_WIND': np.nan,
'USA_PRES': np.nan,
'NAME': 'UNNAMED',
'SEASON': 2000
}
for col, default_val in required_columns.items():
if col not in typhoon_data.columns:
typhoon_data[col] = default_val
logging.warning(f"Added missing column {col} with default value")
# Ensure data types
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
# Remove rows with invalid coordinates
typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON'])
logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
return oni_data, typhoon_data
def create_fallback_typhoon_data():
"""Create minimal fallback typhoon data - FIXED VERSION"""
# Use proper pandas date_range instead of numpy
dates = pd.date_range(start='2000-01-01', end='2025-12-31', freq='D') # Extended to 2025
storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)]
data = []
for i, date in enumerate(storm_dates):
# Create realistic WP storm tracks
base_lat = np.random.uniform(10, 30)
base_lon = np.random.uniform(130, 160)
# Generate 20-50 data points per storm
track_length = np.random.randint(20, 51)
sid = f"WP{i+1:02d}{date.year}"
for j in range(track_length):
lat = base_lat + j * 0.2 + np.random.normal(0, 0.1)
lon = base_lon + j * 0.3 + np.random.normal(0, 0.1)
wind = max(25, 70 + np.random.normal(0, 20))
pres = max(950, 1000 - wind + np.random.normal(0, 5))
data.append({
'SID': sid,
'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead
'NAME': f'FALLBACK_{i+1}',
'SEASON': date.year,
'LAT': lat,
'LON': lon,
'USA_WIND': wind,
'USA_PRES': pres,
'BASIN': 'WP'
})
df = pd.DataFrame(data)
logging.info(f"Created fallback typhoon data with {len(df)} records")
return df
def process_oni_data(oni_data):
"""Process ONI data into long format"""
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
oni_long['Month'] = oni_long['Month'].map(month_map)
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01')
oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce')
return oni_long
def process_typhoon_data(typhoon_data):
"""Process typhoon data"""
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}")
typhoon_max = typhoon_data.groupby('SID').agg({
'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first',
'LAT':'first','LON':'first'
}).reset_index()
if 'ISO_TIME' in typhoon_max.columns:
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
else:
# Fallback if no ISO_TIME
typhoon_max['Month'] = '01'
typhoon_max['Year'] = typhoon_max['SEASON']
typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
return typhoon_max
def merge_data(oni_long, typhoon_max):
"""Merge ONI and typhoon data"""
return pd.merge(typhoon_max, oni_long, on=['Year','Month'])
# -----------------------------
# ENHANCED: Categorization Functions
# -----------------------------
def categorize_typhoon_enhanced(wind_speed):
"""Enhanced categorization that properly includes Tropical Depressions"""
if pd.isna(wind_speed):
return 'Unknown'
# Convert to knots if in m/s (some datasets use m/s)
if wind_speed < 10: # Likely in m/s, convert to knots
wind_speed = wind_speed * 1.94384
# FIXED thresholds to include TD
if wind_speed < 34: # Below 34 knots = Tropical Depression
return 'Tropical Depression'
elif wind_speed < 64: # 34-63 knots = Tropical Storm
return 'Tropical Storm'
elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon
return 'C1 Typhoon'
elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon
return 'C2 Typhoon'
elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon
return 'C3 Strong Typhoon'
elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon
return 'C4 Very Strong Typhoon'
else: # 137+ knots = Category 5 Super Typhoon
return 'C5 Super Typhoon'
# Original function for backward compatibility
def categorize_typhoon(wind_speed):
"""Original categorize typhoon function for backward compatibility"""
return categorize_typhoon_enhanced(wind_speed)
def classify_enso_phases(oni_value):
"""Classify ENSO phases based on ONI value"""
if isinstance(oni_value, pd.Series):
oni_value = oni_value.iloc[0]
if pd.isna(oni_value):
return 'Neutral'
if oni_value >= 0.5:
return 'El Nino'
elif oni_value <= -0.5:
return 'La Nina'
else:
return 'Neutral'
# -----------------------------
# NEW: ADVANCED ML FEATURES WITH ROUTE VISUALIZATION
# -----------------------------
def extract_storm_features(typhoon_data):
"""Extract comprehensive features for clustering analysis"""
# Group by storm ID to get storm-level features
storm_features = typhoon_data.groupby('SID').agg({
'USA_WIND': ['max', 'mean', 'std'],
'USA_PRES': ['min', 'mean', 'std'],
'LAT': ['mean', 'std', 'max', 'min'],
'LON': ['mean', 'std', 'max', 'min'],
'ISO_TIME': ['count'] # Track length
}).reset_index()
# Flatten column names
storm_features.columns = ['SID'] + ['_'.join(col).strip() for col in storm_features.columns[1:]]
# Add additional computed features
storm_features['lat_range'] = storm_features['LAT_max'] - storm_features['LAT_min']
storm_features['lon_range'] = storm_features['LON_max'] - storm_features['LON_min']
storm_features['track_length'] = storm_features['ISO_TIME_count']
# Add genesis location features
genesis_data = typhoon_data.groupby('SID').first()[['LAT', 'LON', 'USA_WIND']]
genesis_data.columns = ['genesis_lat', 'genesis_lon', 'genesis_intensity']
storm_features = storm_features.merge(genesis_data, on='SID', how='left')
# Add track shape features
track_stats = []
for sid in storm_features['SID']:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 2:
# Calculate track curvature and direction changes
lats = storm_track['LAT'].values
lons = storm_track['LON'].values
# Calculate bearing changes
bearing_changes = []
for i in range(1, len(lats)-1):
# Simple bearing calculation
dlat1 = lats[i] - lats[i-1]
dlon1 = lons[i] - lons[i-1]
dlat2 = lats[i+1] - lats[i]
dlon2 = lons[i+1] - lons[i]
angle1 = np.arctan2(dlat1, dlon1)
angle2 = np.arctan2(dlat2, dlon2)
change = abs(angle2 - angle1)
bearing_changes.append(change)
avg_curvature = np.mean(bearing_changes) if bearing_changes else 0
total_distance = np.sum(np.sqrt((np.diff(lats)**2 + np.diff(lons)**2)))
track_stats.append({
'SID': sid,
'avg_curvature': avg_curvature,
'total_distance': total_distance
})
else:
track_stats.append({
'SID': sid,
'avg_curvature': 0,
'total_distance': 0
})
track_stats_df = pd.DataFrame(track_stats)
storm_features = storm_features.merge(track_stats_df, on='SID', how='left')
return storm_features
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
"""Perform UMAP or t-SNE dimensionality reduction"""
# Select numeric features for clustering
feature_cols = [col for col in storm_features.columns if col != 'SID' and storm_features[col].dtype in ['float64', 'int64']]
X = storm_features[feature_cols].fillna(0)
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
if method.lower() == 'umap' and UMAP_AVAILABLE:
# UMAP parameters optimized for typhoon data - fixed warnings
reducer = umap.UMAP(
n_components=n_components,
n_neighbors=15,
min_dist=0.1,
metric='euclidean',
random_state=42,
n_jobs=1 # Explicitly set to avoid warning
)
elif method.lower() == 'tsne':
# t-SNE parameters
reducer = TSNE(
n_components=n_components,
perplexity=min(30, len(X_scaled)//4),
learning_rate=200,
n_iter=1000,
random_state=42
)
else:
# Fallback to PCA if UMAP not available
reducer = PCA(n_components=n_components, random_state=42)
# Fit and transform
embedding = reducer.fit_transform(X_scaled)
return embedding, feature_cols, scaler
def cluster_storms(embedding, method='dbscan', eps=0.5, min_samples=3):
"""Cluster storms based on their embedding"""
if method.lower() == 'dbscan':
clusterer = DBSCAN(eps=eps, min_samples=min_samples)
elif method.lower() == 'kmeans':
clusterer = KMeans(n_clusters=5, random_state=42)
else:
raise ValueError("Method must be 'dbscan' or 'kmeans'")
clusters = clusterer.fit_predict(embedding)
return clusters
def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True):
"""Create comprehensive clustering visualization with route display"""
try:
# Validate inputs
if storm_features is None or storm_features.empty:
raise ValueError("No storm features available for clustering")
if typhoon_data is None or typhoon_data.empty:
raise ValueError("No typhoon data available for route visualization")
# Perform dimensionality reduction
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
# Perform clustering
cluster_labels = cluster_storms(embedding, 'dbscan')
# Add clustering results to storm features
storm_features_viz = storm_features.copy()
storm_features_viz['cluster'] = cluster_labels
storm_features_viz['dim1'] = embedding[:, 0]
storm_features_viz['dim2'] = embedding[:, 1]
# Merge with typhoon data for additional info
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
if show_routes:
# Create subplot with both scatter plot and route map
fig = make_subplots(
rows=1, cols=2,
subplot_titles=(
f'Storm Clustering using {method.upper()}',
'Clustered Storm Routes'
),
specs=[[{"type": "scatter"}, {"type": "geo"}]],
column_widths=[0.5, 0.5]
)
# Add clustering scatter plot
unique_clusters = sorted(storm_features_viz['cluster'].unique())
for i, cluster in enumerate(unique_clusters):
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] if cluster != -1 else '#CCCCCC'
cluster_name = f'Cluster {cluster}' if cluster != -1 else 'Noise'
fig.add_trace(
go.Scatter(
x=cluster_data['dim1'],
y=cluster_data['dim2'],
mode='markers',
marker=dict(color=color, size=8),
name=cluster_name,
hovertemplate=(
'<b>%{customdata[0]}</b><br>'
'Season: %{customdata[1]}<br>'
'Max Wind: %{customdata[2]:.0f} kt<br>'
'Min Pressure: %{customdata[3]:.0f} hPa<br>'
'Track Length: %{customdata[4]:.0f} points<br>'
'<extra></extra>'
),
customdata=np.column_stack((
cluster_data['NAME'].fillna('UNNAMED'),
cluster_data['SEASON'].fillna(2000),
cluster_data['USA_WIND_max'].fillna(0),
cluster_data['USA_PRES_min'].fillna(1000),
cluster_data['track_length'].fillna(0)
))
),
row=1, col=1
)
# Add route map
for i, cluster in enumerate(unique_clusters):
if cluster == -1: # Skip noise for route visualization
continue
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
for j, sid in enumerate(cluster_storm_ids[:10]): # Limit to 10 storms per cluster for performance
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
fig.add_trace(
go.Scattergeo(
lon=storm_track['LON'],
lat=storm_track['LAT'],
mode='lines+markers',
line=dict(color=color, width=2),
marker=dict(color=color, size=4),
name=f'C{cluster}: {storm_name}' if j == 0 else None,
showlegend=(j == 0),
hovertemplate=(
f'<b>{storm_name}</b><br>'
'Lat: %{lat:.1f}°<br>'
'Lon: %{lon:.1f}°<br>'
f'Cluster: {cluster}<br>'
'<extra></extra>'
)
),
row=1, col=2
)
except Exception as track_error:
print(f"Error adding track for storm {sid}: {track_error}")
continue
# Update layout
fig.update_layout(
title_text="Advanced Storm Clustering Analysis with Route Visualization",
showlegend=True
)
# Update geo layout
fig.update_geos(
projection_type="natural earth",
showland=True,
landcolor="LightGray",
showocean=True,
oceancolor="LightBlue",
showcoastlines=True,
coastlinecolor="Gray",
center=dict(lat=20, lon=140),
row=1, col=2
)
# Update scatter plot axes
fig.update_xaxes(title_text=f"{method.upper()} Dimension 1", row=1, col=1)
fig.update_yaxes(title_text=f"{method.upper()} Dimension 2", row=1, col=1)
else:
# Simple scatter plot only
fig = px.scatter(
storm_features_viz,
x='dim1',
y='dim2',
color='cluster',
hover_data=['NAME', 'SEASON', 'USA_WIND_max', 'USA_PRES_min'],
title=f'Storm Clustering using {method.upper()}',
labels={
'dim1': f'{method.upper()} Dimension 1',
'dim2': f'{method.upper()} Dimension 2',
'cluster': 'Cluster'
}
)
# Generate detailed cluster statistics
try:
cluster_stats = storm_features_viz.groupby('cluster').agg({
'USA_WIND_max': ['mean', 'std', 'min', 'max'],
'USA_PRES_min': ['mean', 'std', 'min', 'max'],
'track_length': ['mean', 'std'],
'genesis_lat': 'mean',
'genesis_lon': 'mean',
'total_distance': 'mean',
'avg_curvature': 'mean',
'SID': 'count'
}).round(2)
# Flatten column names for readability
cluster_stats.columns = ['_'.join(col).strip() for col in cluster_stats.columns]
stats_text = "ADVANCED CLUSTER ANALYSIS RESULTS\n" + "="*50 + "\n\n"
for cluster in sorted(storm_features_viz['cluster'].unique()):
if cluster == -1:
stats_text += f"NOISE POINTS: {cluster_stats.loc[-1, 'SID_count']} storms\n\n"
continue
cluster_row = cluster_stats.loc[cluster]
storm_count = int(cluster_row['SID_count'])
stats_text += f"CLUSTER {cluster}: {storm_count} storms\n"
stats_text += f" Intensity: {cluster_row['USA_WIND_max_mean']:.1f} +/- {cluster_row['USA_WIND_max_std']:.1f} kt\n"
stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} +/- {cluster_row['USA_PRES_min_std']:.1f} hPa\n"
stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} +/- {cluster_row['track_length_std']:.1f} points\n"
stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}°N, {cluster_row['genesis_lon']:.1f}°E\n"
stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n"
stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n"
# Add feature importance summary
stats_text += "CLUSTERING FEATURES USED:\n"
stats_text += " - Storm intensity (max/mean/std wind & pressure)\n"
stats_text += " - Track characteristics (length, curvature, distance)\n"
stats_text += " - Genesis location (lat/lon)\n"
stats_text += " - Geographic range (lat/lon span)\n"
stats_text += f" - Total features: {len(feature_cols)}\n\n"
stats_text += f"ALGORITHM: {method.upper()} + DBSCAN clustering\n"
stats_text += f"CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n"
except Exception as stats_error:
stats_text = f"Error generating cluster statistics: {str(stats_error)}"
return fig, stats_text, storm_features_viz
except Exception as e:
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error in clustering analysis: {str(e)}",
xref="paper", yref="paper",
x=0.5, y=0.5, xanchor='center', yanchor='middle',
showarrow=False, font_size=16
)
return error_fig, f"Error in clustering: {str(e)}", None
# -----------------------------
# NEW: Optional CNN Implementation
# -----------------------------
def create_cnn_model(input_shape=(64, 64, 3)):
"""Create CNN model for typhoon intensity prediction from satellite images"""
if not CNN_AVAILABLE:
return None
try:
model = models.Sequential([
# Convolutional layers
layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
# Dense layers
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(32, activation='relu'),
# Output layer for intensity prediction
layers.Dense(1, activation='linear') # Regression for wind speed
])
model.compile(
optimizer='adam',
loss='mean_squared_error',
metrics=['mae']
)
return model
except Exception as e:
print(f"Error creating CNN model: {e}")
return None
def simulate_cnn_prediction(lat, lon, month, oni_value):
"""Simulate CNN prediction with robust error handling"""
try:
if not CNN_AVAILABLE:
# Provide a physics-based prediction when CNN is not available
return simulate_physics_based_prediction(lat, lon, month, oni_value)
# This would normally process satellite imagery
# For demo purposes, we'll use a simple heuristic
# Simulate environmental factors
sst_anomaly = oni_value * 0.5 # Simplified SST relationship
seasonal_factor = 1.2 if month in [7, 8, 9, 10] else 0.8
latitude_factor = max(0.5, (30 - abs(lat)) / 30) if abs(lat) < 30 else 0.1
# Simple intensity prediction
base_intensity = 40
intensity = base_intensity + sst_anomaly * 10 + seasonal_factor * 20 + latitude_factor * 30
intensity = max(0, min(180, intensity)) # Clamp to reasonable range
confidence = 0.75 + np.random.normal(0, 0.1)
confidence = max(0.5, min(0.95, confidence))
return intensity, f"CNN Prediction: {intensity:.1f} kt (Confidence: {confidence:.1%})"
except Exception as e:
# Fallback to physics-based prediction
return simulate_physics_based_prediction(lat, lon, month, oni_value)
def simulate_physics_based_prediction(lat, lon, month, oni_value):
"""Physics-based intensity prediction as fallback"""
try:
# Simple climatological prediction based on known relationships
base_intensity = 45
# ENSO effects
if oni_value > 0.5: # El Niño
intensity_modifier = -15 # Generally suppresses activity in WP
elif oni_value < -0.5: # La Niña
intensity_modifier = +20 # Generally enhances activity
else:
intensity_modifier = 0
# Seasonal effects
if month in [8, 9, 10]: # Peak season
seasonal_modifier = 25
elif month in [6, 7, 11]: # Active season
seasonal_modifier = 15
else: # Quiet season
seasonal_modifier = -10
# Latitude effects (closer to equator = less favorable)
if abs(lat) < 10:
lat_modifier = -20 # Too close to equator
elif 10 <= abs(lat) <= 25:
lat_modifier = 10 # Optimal range
else:
lat_modifier = -5 # Too far from equator
# Longitude effects for Western Pacific
if 120 <= lon <= 160:
lon_modifier = 10 # Favorable WP region
else:
lon_modifier = -5
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + lat_modifier + lon_modifier
predicted_intensity = max(25, min(180, predicted_intensity))
confidence = 0.65 # Lower confidence for physics-based model
return predicted_intensity, f"Physics-based Prediction: {predicted_intensity:.1f} kt (Confidence: {confidence:.1%})"
except Exception as e:
return 50, f"Error in prediction: {str(e)}"
# -----------------------------
# Regression Functions (Original)
# -----------------------------
def perform_wind_regression(start_year, start_month, end_year, end_month):
"""Perform wind regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
X = sm.add_constant(data['ONI'])
y = data['severe_typhoon']
try:
model = sm.Logit(y, X).fit(disp=0)
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Wind Regression Error: {e}"
def perform_pressure_regression(start_year, start_month, end_year, end_month):
"""Perform pressure regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
X = sm.add_constant(data['ONI'])
y = data['intense_typhoon']
try:
model = sm.Logit(y, X).fit(disp=0)
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Pressure Regression Error: {e}"
def perform_longitude_regression(start_year, start_month, end_year, end_month):
"""Perform longitude regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
data['western_typhoon'] = (data['LON']<=140).astype(int)
X = sm.add_constant(data['ONI'])
y = data['western_typhoon']
try:
model = sm.OLS(y, sm.add_constant(X)).fit()
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Longitude Regression Error: {e}"
# -----------------------------
# Visualization Functions (Enhanced)
# -----------------------------
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get full typhoon tracks"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
unique_storms = filtered_data['SID'].unique()
count = len(unique_storms)
fig = go.Figure()
for sid in unique_storms:
storm_data = typhoon_data[typhoon_data['SID']==sid]
if storm_data.empty:
continue
name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
basin = storm_data['SID'].iloc[0][:2]
storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
fig.add_trace(go.Scattergeo(
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines',
name=f"{name} ({basin})",
line=dict(width=1.5, color=color), hoverinfo="name"
))
if typhoon_search:
search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if search_mask.any():
for sid in typhoon_data[search_mask]['SID'].unique():
storm_data = typhoon_data[typhoon_data['SID']==sid]
fig.add_trace(go.Scattergeo(
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers',
name=f"MATCHED: {storm_data['NAME'].iloc[0]}",
line=dict(width=3, color='yellow'),
marker=dict(size=5), hoverinfo="name"
))
fig.update_layout(
title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})",
geo=dict(
projection_type='natural earth',
showland=True,
showcoastlines=True,
landcolor='rgb(243,243,243)',
countrycolor='rgb(204,204,204)',
coastlinecolor='rgb(204,204,204)',
center=dict(lon=140, lat=20),
projection_scale=3
),
legend_title="Typhoons by ENSO Phase",
showlegend=True,
height=700
)
fig.add_annotation(
x=0.02, y=0.98, xref="paper", yref="paper",
text="Red: El Niño, Blue: La Nina, Green: Neutral",
showarrow=False, align="left",
bgcolor="rgba(255,255,255,0.8)"
)
return fig, f"Total typhoons displayed: {count}"
def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get wind analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
hover_data=['NAME','Year','Category'],
title='Wind Speed vs ONI',
labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'},
color_discrete_map=enhanced_color_map)
if typhoon_search:
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if mask.any():
fig.add_trace(go.Scatter(
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'],
mode='markers', marker=dict(size=10, color='red', symbol='star'),
name=f'Matched: {typhoon_search}',
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
regression = perform_wind_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get pressure analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
hover_data=['NAME','Year','Category'],
title='Pressure vs ONI',
labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'},
color_discrete_map=enhanced_color_map)
if typhoon_search:
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if mask.any():
fig.add_trace(go.Scatter(
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'],
mode='markers', marker=dict(size=10, color='red', symbol='star'),
name=f'Matched: {typhoon_search}',
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get longitude analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
title='Typhoon Generation Longitude vs ONI (All Years)')
if len(filtered_data) > 1:
X = np.array(filtered_data['LON']).reshape(-1,1)
y = filtered_data['ONI']
try:
model = sm.OLS(y, sm.add_constant(X)).fit()
y_pred = model.predict(sm.add_constant(X))
fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
slope = model.params[1]
slopes_text = f"All Years Slope: {slope:.4f}"
except Exception as e:
slopes_text = f"Regression Error: {e}"
else:
slopes_text = "Insufficient data for regression"
regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
return fig, slopes_text, regression
def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
"""Categorize typhoon by standard with enhanced TD support - FIXED for matplotlib"""
if pd.isna(wind_speed):
return 'Tropical Depression', '#808080'
if standard=='taiwan':
wind_speed_ms = wind_speed * 0.514444
if wind_speed_ms >= 51.0:
return 'Strong Typhoon', '#FF0000' # Red
elif wind_speed_ms >= 33.7:
return 'Medium Typhoon', '#FFA500' # Orange
elif wind_speed_ms >= 17.2:
return 'Mild Typhoon', '#FFFF00' # Yellow
return 'Tropical Depression', '#808080' # Gray
else:
if wind_speed >= 137:
return 'C5 Super Typhoon', '#FF0000' # Red
elif wind_speed >= 113:
return 'C4 Very Strong Typhoon', '#FFA500' # Orange
elif wind_speed >= 96:
return 'C3 Strong Typhoon', '#FFFF00' # Yellow
elif wind_speed >= 83:
return 'C2 Typhoon', '#00FF00' # Green
elif wind_speed >= 64:
return 'C1 Typhoon', '#00FFFF' # Cyan
elif wind_speed >= 34:
return 'Tropical Storm', '#0000FF' # Blue
return 'Tropical Depression', '#808080' # Gray
# -----------------------------
# ENHANCED: Animation Functions
# -----------------------------
def get_available_years(typhoon_data):
"""Get all available years including 2025 - with error handling"""
try:
if typhoon_data is None or typhoon_data.empty:
return [str(year) for year in range(2000, 2026)]
if 'ISO_TIME' in typhoon_data.columns:
years = typhoon_data['ISO_TIME'].dt.year.dropna().unique()
elif 'SEASON' in typhoon_data.columns:
years = typhoon_data['SEASON'].dropna().unique()
else:
years = range(2000, 2026) # Default range including 2025
# Convert to strings and sort
year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)])
# Ensure we have at least some years
if not year_strings:
return [str(year) for year in range(2000, 2026)]
return year_strings
except Exception as e:
print(f"Error in get_available_years: {e}")
return [str(year) for year in range(2000, 2026)]
def update_typhoon_options_enhanced(year, basin):
"""Enhanced typhoon options with TD support and 2025 data"""
try:
year = int(year)
# Filter by year - handle both ISO_TIME and SEASON columns
if 'ISO_TIME' in typhoon_data.columns:
year_mask = typhoon_data['ISO_TIME'].dt.year == year
elif 'SEASON' in typhoon_data.columns:
year_mask = typhoon_data['SEASON'] == year
else:
# Fallback - try to extract year from SID or other fields
year_mask = typhoon_data.index >= 0 # Include all data as fallback
year_data = typhoon_data[year_mask].copy()
# Filter by basin if specified
if basin != "All Basins":
basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2]
if 'SID' in year_data.columns:
year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)]
elif 'BASIN' in year_data.columns:
year_data = year_data[year_data['BASIN'] == basin_code]
if year_data.empty:
return gr.update(choices=["No storms found"], value=None)
# Get unique storms - include ALL intensities (including TD)
storms = year_data.groupby('SID').agg({
'NAME': 'first',
'USA_WIND': 'max'
}).reset_index()
# Enhanced categorization including TD
storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced)
# Create options with category information
options = []
for _, storm in storms.iterrows():
name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED'
sid = storm['SID']
category = storm['category']
max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0
option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)"
options.append(option)
if not options:
return gr.update(choices=["No storms found"], value=None)
return gr.update(choices=sorted(options), value=options[0])
except Exception as e:
print(f"Error in update_typhoon_options_enhanced: {e}")
return gr.update(choices=["Error loading storms"], value=None)
def generate_enhanced_track_video(year, typhoon_selection, standard):
"""Enhanced track video generation with TD support and 2025 compatibility - FIXED color handling"""
if not typhoon_selection or typhoon_selection == "No storms found":
return None
try:
# Extract SID from selection
sid = typhoon_selection.split('(')[1].split(')')[0]
# Get storm data
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
if storm_df.empty:
print(f"No data found for storm {sid}")
return None
# Sort by time
if 'ISO_TIME' in storm_df.columns:
storm_df = storm_df.sort_values('ISO_TIME')
# Extract data for animation
lats = storm_df['LAT'].astype(float).values
lons = storm_df['LON'].astype(float).values
if 'USA_WIND' in storm_df.columns:
winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values
else:
winds = np.full(len(lats), 30) # Default TD strength
# Enhanced metadata
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
print(f"Generating video for {storm_name} ({sid}) with {len(lats)} track points")
# Create figure with enhanced map
fig, ax = plt.subplots(figsize=(14, 8), subplot_kw={'projection': ccrs.PlateCarree()})
# Enhanced map features
ax.stock_img()
ax.add_feature(cfeature.COASTLINE, linewidth=0.8)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5)
ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5)
# Set extent based on track
padding = 5
ax.set_extent([
min(lons) - padding, max(lons) + padding,
min(lats) - padding, max(lats) + padding
])
# Add gridlines
gl = ax.gridlines(draw_labels=True, alpha=0.3)
gl.top_labels = gl.right_labels = False
# Title with enhanced info
ax.set_title(f"{season} {storm_name} ({sid}) Track Animation", fontsize=16, fontweight='bold')
# Animation elements
line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7, label='Track')
point, = ax.plot([], [], 'o', markersize=12)
# Enhanced info display
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
fontsize=11, verticalalignment='top',
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
# Color legend with TD support - FIXED
legend_elements = []
for category in ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon',
'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']:
if category in matplotlib_color_map:
color = get_matplotlib_color(category)
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
markerfacecolor=color, markersize=8, label=category))
ax.legend(handles=legend_elements, loc='upper right', fontsize=9)
def animate(frame):
try:
if frame >= len(lats):
return line, point, info_box
# Update track line
line.set_data(lons[:frame+1], lats[:frame+1])
# Update current position
current_wind = winds[frame]
category = categorize_typhoon_enhanced(current_wind)
color = get_matplotlib_color(category) # FIXED: Use matplotlib-compatible color
# Debug print for first few frames
if frame < 3:
print(f"Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}")
point.set_data([lons[frame]], [lats[frame]])
point.set_color(color)
point.set_markersize(8 + current_wind/10) # Size based on intensity
# Enhanced info display
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
current_time = storm_df.iloc[frame]['ISO_TIME']
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
else:
time_str = f"Step {frame+1}"
info_text = (
f"Storm: {storm_name}\n"
f"Time: {time_str}\n"
f"Position: {lats[frame]:.1f}°N, {lons[frame]:.1f}°E\n"
f"Max Wind: {current_wind:.0f} kt\n"
f"Category: {category}\n"
f"Frame: {frame+1}/{len(lats)}"
)
info_box.set_text(info_text)
return line, point, info_box
except Exception as e:
print(f"Error in animate frame {frame}: {e}")
return line, point, info_box
# Create animation
anim = animation.FuncAnimation(
fig, animate, frames=len(lats),
interval=300, blit=False, repeat=True
)
# Save animation
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
dir=tempfile.gettempdir())
# Enhanced writer settings
writer = animation.FFMpegWriter(
fps=4, bitrate=2000, codec='libx264',
extra_args=['-pix_fmt', 'yuv420p'] # Better compatibility
)
print(f"Saving animation to {temp_file.name}")
anim.save(temp_file.name, writer=writer, dpi=100)
plt.close(fig)
print(f"Video generated successfully: {temp_file.name}")
return temp_file.name
except Exception as e:
print(f"Error generating video: {e}")
import traceback
traceback.print_exc()
return None
# Simplified wrapper for backward compatibility - FIXED
def simplified_track_video(year, basin, typhoon, standard):
"""Simplified track video function with fixed color handling"""
if not typhoon:
return None
return generate_enhanced_track_video(year, typhoon, standard)
# -----------------------------
# Load & Process Data
# -----------------------------
# Global variables initialization
oni_data = None
typhoon_data = None
merged_data = None
def initialize_data():
"""Initialize all data safely"""
global oni_data, typhoon_data, merged_data
try:
logging.info("Starting data loading process...")
update_oni_data()
oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
if oni_data is not None and typhoon_data is not None:
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
logging.info("Data loading complete.")
else:
logging.error("Failed to load required data")
# Create minimal fallback data
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
'Oct': [0], 'Nov': [0], 'Dec': [0]})
typhoon_data = create_fallback_typhoon_data()
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
except Exception as e:
logging.error(f"Error during data initialization: {e}")
# Create minimal fallback data
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
'Oct': [0], 'Nov': [0], 'Dec': [0]})
typhoon_data = create_fallback_typhoon_data()
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
# Initialize data
initialize_data()
# -----------------------------
# ENHANCED: Gradio Interface
# -----------------------------
def create_interface():
"""Create the enhanced Gradio interface with robust error handling"""
try:
# Ensure data is available
if oni_data is None or typhoon_data is None or merged_data is None:
logging.warning("Data not properly loaded, creating minimal interface")
return create_minimal_fallback_interface()
# Get safe data statistics
try:
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
total_records = len(typhoon_data)
available_years = get_available_years(typhoon_data)
year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
except Exception as e:
logging.error(f"Error getting data statistics: {e}")
total_storms = 0
total_records = 0
year_range_display = "Unknown"
available_years = [str(year) for year in range(2000, 2026)]
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Enhanced Typhoon Analysis Platform")
gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions")
with gr.Tab("Overview"):
overview_text = f"""
## Welcome to the Enhanced Typhoon Analysis Dashboard
This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities.
### Enhanced Features:
- Advanced ML Clustering: UMAP/t-SNE storm pattern analysis with route visualization
- Optional CNN Predictions: Deep learning intensity forecasting
- Complete TD Support: Now includes Tropical Depressions (< 34 kt)
- 2025 Data Ready: Real-time compatibility with current year data
- Enhanced Animations: High-quality storm track visualizations
### Data Status:
- ONI Data: {len(oni_data)} years loaded
- Typhoon Data: {total_records} records loaded
- Merged Data: {len(merged_data)} typhoons with ONI values
- Available Years: {year_range_display}
### Technical Capabilities:
- UMAP Clustering: {"Available" if UMAP_AVAILABLE else "Limited to t-SNE/PCA"}
- AI Predictions: {"Deep Learning" if CNN_AVAILABLE else "Physics-based"}
- Enhanced Categorization: Tropical Depression to Super Typhoon
- Platform Compatibility: Optimized for Hugging Face Spaces
"""
gr.Markdown(overview_text)
with gr.Tab("Advanced ML Clustering with Routes"):
gr.Markdown("## Storm Pattern Analysis using UMAP/t-SNE with Route Visualization")
gr.Markdown("**This tab shows both the dimensional clustering analysis AND the actual storm tracks colored by cluster**")
with gr.Row():
reduction_method = gr.Dropdown(
choices=['UMAP', 't-SNE', 'PCA'],
value='UMAP' if UMAP_AVAILABLE else 't-SNE',
label="Dimensionality Reduction Method"
)
show_routes = gr.Checkbox(
label="Show Storm Routes on Map",
value=True,
info="Display actual storm tracks colored by cluster"
)
analyze_clusters_btn = gr.Button("Analyze Storm Clusters & Routes", variant="primary")
with gr.Row():
cluster_plot = gr.Plot(label="Storm Clustering with Route Visualization")
with gr.Row():
cluster_stats = gr.Textbox(label="Detailed Cluster Statistics", lines=15, max_lines=20)
def run_advanced_clustering_analysis(method, show_routes):
try:
# Extract features for clustering
storm_features = extract_storm_features(typhoon_data)
fig, stats, _ = create_advanced_clustering_visualization(storm_features, typhoon_data, method.lower(), show_routes)
return fig, stats
except Exception as e:
import traceback
error_details = traceback.format_exc()
return None, f"Error: {str(e)}\n\nDetails:\n{error_details}"
analyze_clusters_btn.click(
fn=run_advanced_clustering_analysis,
inputs=[reduction_method, show_routes],
outputs=[cluster_plot, cluster_stats]
)
cluster_info_text = """
### Advanced Clustering Features:
- Multi-dimensional Analysis: Uses 15+ storm characteristics including intensity, track shape, genesis location
- Route Visualization: Shows actual storm tracks colored by cluster membership
- DBSCAN Clustering: Automatically finds natural groupings without predefined cluster count
- Comprehensive Stats: Detailed cluster analysis including intensity, pressure, track length, curvature
- Interactive: Hover over points to see storm details, zoom and pan the route map
### How to Interpret:
- Left Plot: Each dot is a storm positioned by similarity (close = similar characteristics)
- Right Plot: Actual geographic storm tracks, colored by which cluster they belong to
- Cluster Colors: Each cluster gets a unique color to identify similar storm patterns
- Noise Points: Gray points represent storms that don't fit clear patterns
"""
gr.Markdown(cluster_info_text)
with gr.Tab("Intensity Prediction"):
gr.Markdown("## AI-Powered Storm Intensity Forecasting")
if CNN_AVAILABLE:
gr.Markdown("Deep Learning models available - TensorFlow loaded successfully")
method_description = "Using Convolutional Neural Networks for advanced intensity prediction"
else:
gr.Markdown("Physics-based models available - Using climatological relationships")
gr.Markdown("*Install TensorFlow for deep learning features: `pip install tensorflow-cpu`*")
method_description = "Using established meteorological relationships and climatology"
gr.Markdown(f"**Current Method**: {method_description}")
with gr.Row():
cnn_lat = gr.Number(label="Latitude", value=20.0, info="Storm center latitude (-90 to 90)")
cnn_lon = gr.Number(label="Longitude", value=140.0, info="Storm center longitude (-180 to 180)")
cnn_month = gr.Slider(1, 12, label="Month", value=9, info="Month of year (1=Jan, 12=Dec)")
cnn_oni = gr.Number(label="ONI Value", value=0.0, info="Current ENSO index (-3 to 3)")
predict_btn = gr.Button("Predict Storm Intensity", variant="primary")
with gr.Row():
intensity_output = gr.Number(label="Predicted Max Wind (kt)")
confidence_output = gr.Textbox(label="Model Output & Confidence")
predict_btn.click(
fn=simulate_cnn_prediction,
inputs=[cnn_lat, cnn_lon, cnn_month, cnn_oni],
outputs=[intensity_output, confidence_output]
)
prediction_info_text = """
### Prediction Features:
- Environmental Analysis: Considers ENSO, latitude, seasonality
- Real-time Capable: Predictions in milliseconds
- Confidence Scoring: Uncertainty quantification included
- Robust Fallbacks: Works with or without deep learning libraries
### Interpretation Guide:
- 25-33 kt: Tropical Depression (TD)
- 34-63 kt: Tropical Storm (TS)
- 64+ kt: Typhoon categories (C1-C5)
- 100+ kt: Major typhoon (C3+)
"""
gr.Markdown(prediction_info_text)
with gr.Tab("Track Visualization"):
with gr.Row():
start_year = gr.Number(label="Start Year", value=2020)
start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
end_year = gr.Number(label="End Year", value=2025)
end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
typhoon_search = gr.Textbox(label="Typhoon Search")
analyze_btn = gr.Button("Generate Tracks")
tracks_plot = gr.Plot()
typhoon_count = gr.Textbox(label="Number of Typhoons Displayed")
analyze_btn.click(
fn=get_full_tracks,
inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search],
outputs=[tracks_plot, typhoon_count]
)
with gr.Tab("Wind Analysis"):
with gr.Row():
wind_start_year = gr.Number(label="Start Year", value=2020)
wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
wind_end_year = gr.Number(label="End Year", value=2024)
wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
wind_typhoon_search = gr.Textbox(label="Typhoon Search")
wind_analyze_btn = gr.Button("Generate Wind Analysis")
wind_scatter = gr.Plot()
wind_regression_results = gr.Textbox(label="Wind Regression Results")
wind_analyze_btn.click(
fn=get_wind_analysis,
inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search],
outputs=[wind_scatter, wind_regression_results]
)
with gr.Tab("Pressure Analysis"):
with gr.Row():
pressure_start_year = gr.Number(label="Start Year", value=2020)
pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
pressure_end_year = gr.Number(label="End Year", value=2024)
pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
pressure_typhoon_search = gr.Textbox(label="Typhoon Search")
pressure_analyze_btn = gr.Button("Generate Pressure Analysis")
pressure_scatter = gr.Plot()
pressure_regression_results = gr.Textbox(label="Pressure Regression Results")
pressure_analyze_btn.click(
fn=get_pressure_analysis,
inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search],
outputs=[pressure_scatter, pressure_regression_results]
)
with gr.Tab("Longitude Analysis"):
with gr.Row():
lon_start_year = gr.Number(label="Start Year", value=2020)
lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
lon_end_year = gr.Number(label="End Year", value=2020)
lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)")
lon_analyze_btn = gr.Button("Generate Longitude Analysis")
regression_plot = gr.Plot()
slopes_text = gr.Textbox(label="Regression Slopes")
lon_regression_results = gr.Textbox(label="Longitude Regression Results")
lon_analyze_btn.click(
fn=get_longitude_analysis,
inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search],
outputs=[regression_plot, slopes_text, lon_regression_results]
)
with gr.Tab("Enhanced Track Animation"):
gr.Markdown("## High-Quality Storm Track Visualization (All Categories Including TD)")
with gr.Row():
year_dropdown = gr.Dropdown(
label="Year",
choices=available_years,
value=available_years[-1] if available_years else "2024"
)
basin_dropdown = gr.Dropdown(
label="Basin",
choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"],
value="All Basins"
)
with gr.Row():
typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)")
standard_dropdown = gr.Dropdown(
label="Classification Standard",
choices=['atlantic', 'taiwan'],
value='atlantic'
)
generate_video_btn = gr.Button("Generate Enhanced Animation", variant="primary")
video_output = gr.Video(label="Storm Track Animation")
# Update storm options when year or basin changes
for input_comp in [year_dropdown, basin_dropdown]:
input_comp.change(
fn=update_typhoon_options_enhanced,
inputs=[year_dropdown, basin_dropdown],
outputs=[typhoon_dropdown]
)
# Generate video
generate_video_btn.click(
fn=generate_enhanced_track_video,
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
outputs=[video_output]
)
animation_info_text = """
### Enhanced Animation Features:
- Full TD Support: Now displays Tropical Depressions (< 34 kt) in gray
- 2025 Compatibility: Complete support for current year data
- Enhanced Maps: Better cartographic projections with terrain features
- Smart Scaling: Storm symbols scale dynamically with intensity
- Real-time Info: Live position, time, and meteorological data display
- Professional Styling: Publication-quality animations with proper legends
- Optimized Export: Fast rendering with web-compatible video formats
"""
gr.Markdown(animation_info_text)
with gr.Tab("Data Statistics & Insights"):
gr.Markdown("## Comprehensive Dataset Analysis")
# Create enhanced data summary
try:
if len(typhoon_data) > 0:
# Storm category distribution
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
cat_counts = storm_cats.value_counts()
# Create distribution chart with enhanced colors
fig_dist = px.bar(
x=cat_counts.index,
y=cat_counts.values,
title="Storm Intensity Distribution (Including Tropical Depressions)",
labels={'x': 'Category', 'y': 'Number of Storms'},
color=cat_counts.index,
color_discrete_map=enhanced_color_map
)
# Seasonal distribution
if 'ISO_TIME' in typhoon_data.columns:
seasonal_data = typhoon_data.copy()
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size()
fig_seasonal = px.bar(
x=monthly_counts.index,
y=monthly_counts.values,
title="Seasonal Storm Distribution",
labels={'x': 'Month', 'y': 'Number of Storms'},
color=monthly_counts.values,
color_continuous_scale='Viridis'
)
else:
fig_seasonal = None
# Basin distribution
if 'SID' in typhoon_data.columns:
basin_data = typhoon_data['SID'].str[:2].value_counts()
fig_basin = px.pie(
values=basin_data.values,
names=basin_data.index,
title="Distribution by Basin"
)
else:
fig_basin = None
with gr.Row():
gr.Plot(value=fig_dist)
if fig_seasonal:
with gr.Row():
gr.Plot(value=fig_seasonal)
if fig_basin:
with gr.Row():
gr.Plot(value=fig_basin)
except Exception as e:
gr.Markdown(f"Visualization error: {str(e)}")
# Enhanced statistics - FIXED formatting
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
total_records = len(typhoon_data)
if 'SEASON' in typhoon_data.columns:
try:
min_year = int(typhoon_data['SEASON'].min())
max_year = int(typhoon_data['SEASON'].max())
year_range = f"{min_year}-{max_year}"
years_covered = typhoon_data['SEASON'].nunique()
except (ValueError, TypeError):
year_range = "Unknown"
years_covered = 0
else:
year_range = "Unknown"
years_covered = 0
if 'SID' in typhoon_data.columns:
try:
basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique()))
avg_storms_per_year = total_storms / max(years_covered, 1)
except Exception:
basins_available = "Unknown"
avg_storms_per_year = 0
else:
basins_available = "Unknown"
avg_storms_per_year = 0
# TD specific statistics
try:
if 'USA_WIND' in typhoon_data.columns:
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique())
typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique())
td_percentage = (td_storms / max(total_storms, 1)) * 100
else:
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
except Exception as e:
print(f"Error calculating TD statistics: {e}")
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
# Create statistics text safely
stats_text = f"""
### Enhanced Dataset Summary:
- Total Unique Storms: {total_storms:,}
- Total Track Records: {total_records:,}
- Year Range: {year_range} ({years_covered} years)
- Basins Available: {basins_available}
- Average Storms/Year: {avg_storms_per_year:.1f}
### Storm Category Breakdown:
- Tropical Depressions: {td_storms:,} storms ({td_percentage:.1f}%)
- Tropical Storms: {ts_storms:,} storms
- Typhoons (C1-C5): {typhoon_storms:,} storms
### New Platform Capabilities:
- Complete TD Analysis - First platform to include comprehensive TD tracking
- Advanced ML Clustering - DBSCAN pattern recognition with route visualization
- Real-time Predictions - Physics-based and optional CNN intensity forecasting
- 2025 Data Ready - Full compatibility with current season data
- Enhanced Animations - Professional-quality storm track videos
- Multi-basin Analysis - Comprehensive Pacific and Atlantic coverage
### Research Applications:
- Climate change impact studies
- Seasonal forecasting research
- Storm pattern classification
- ENSO-typhoon relationship analysis
- Intensity prediction model development
"""
gr.Markdown(stats_text)
return demo
except Exception as e:
logging.error(f"Error creating Gradio interface: {e}")
import traceback
traceback.print_exc()
# Create a minimal fallback interface
return create_minimal_fallback_interface()
def create_minimal_fallback_interface():
"""Create a minimal fallback interface when main interface fails"""
with gr.Blocks() as demo:
gr.Markdown("# Enhanced Typhoon Analysis Platform")
gr.Markdown("**Notice**: Loading with minimal interface due to data issues.")
with gr.Tab("Status"):
gr.Markdown("""
## Platform Status
The application is running but encountered issues loading the full interface.
This could be due to:
- Data loading problems
- Missing dependencies
- Configuration issues
### Available Features:
- Basic interface is functional
- Error logs are being generated
- System is ready for debugging
### Next Steps:
1. Check the console logs for detailed error information
2. Verify all required data files are accessible
3. Ensure all dependencies are properly installed
4. Try restarting the application
""")
with gr.Tab("Debug"):
gr.Markdown("## Debug Information")
def get_debug_info():
debug_text = f"""
Python Environment:
- Working Directory: {os.getcwd()}
- Data Path: {DATA_PATH}
- UMAP Available: {UMAP_AVAILABLE}
- CNN Available: {CNN_AVAILABLE}
Data Status:
- ONI Data: {'Loaded' if oni_data is not None else 'Failed'}
- Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'}
- Merged Data: {'Loaded' if merged_data is not None else 'Failed'}
File Checks:
- ONI Path Exists: {os.path.exists(ONI_DATA_PATH)}
- Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)}
"""
return debug_text
debug_btn = gr.Button("Get Debug Info")
debug_output = gr.Textbox(label="Debug Information", lines=15)
debug_btn.click(fn=get_debug_info, outputs=debug_output)
return demo
# -----------------------------
# Color Test Functions (Optional)
# -----------------------------
def test_color_conversion():
"""Test color conversion functions"""
print("Testing color conversion...")
# Test all categories
test_winds = [25, 40, 70, 85, 100, 120, 150] # TD, TS, C1, C2, C3, C4, C5
for wind in test_winds:
category = categorize_typhoon_enhanced(wind)
plotly_color = enhanced_color_map.get(category, 'rgb(128,128,128)')
matplotlib_color = get_matplotlib_color(category)
print(f"Wind: {wind:3d}kt -> {category:20s} -> Plotly: {plotly_color:15s} -> Matplotlib: {matplotlib_color}")
print("Color conversion test complete!")
def test_rgb_conversion():
"""Test RGB string to hex conversion"""
test_colors = [
'rgb(128, 128, 128)',
'rgb(255, 0, 0)',
'rgb(0, 255, 0)',
'rgb(0, 0, 255)'
]
print("Testing RGB to hex conversion...")
for rgb_str in test_colors:
hex_color = rgb_string_to_hex(rgb_str)
print(f"{rgb_str:20s} -> {hex_color}")
print("RGB conversion test complete!")
# Create and launch the interface
demo = create_interface()
if __name__ == "__main__":
demo.launch() |