Spaces:
Running
Running
minor fixes
Browse files- src/pages/Calculator.tsx +412 -401
src/pages/Calculator.tsx
CHANGED
|
@@ -614,90 +614,94 @@ const PrefillChunkingCalculator = ({
|
|
| 614 |
)
|
| 615 |
|
| 616 |
return (
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
<div
|
| 620 |
-
<div className='
|
| 621 |
-
<div className='
|
| 622 |
-
|
| 623 |
-
<div>
|
| 624 |
-
<div className='chart-row'>
|
| 625 |
-
<div className='chart-row-title'>FP32</div>
|
| 626 |
-
<PrefillChunkingModelSizeBarChart
|
| 627 |
-
modelSize={calculateMemory(modelParams, 'fp32')}
|
| 628 |
-
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
| 629 |
-
modelPrecision='fp32'
|
| 630 |
-
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 631 |
-
activationMemorySize={activationMemorySize}
|
| 632 |
-
/>
|
| 633 |
-
<div className='chart-row-size ml-8'>
|
| 634 |
-
{(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
|
| 635 |
-
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
| 636 |
-
</div>
|
| 637 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
|
|
|
| 651 |
</div>
|
| 652 |
-
</div>
|
| 653 |
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
|
|
|
| 666 |
</div>
|
| 667 |
-
</div>
|
| 668 |
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
|
|
|
| 681 |
</div>
|
| 682 |
</div>
|
| 683 |
</div>
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 688 |
</div>
|
| 689 |
-
<PrefillChunkingInferenceRuntimeLineChart
|
| 690 |
-
availableMemory={{
|
| 691 |
-
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
| 692 |
-
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
| 693 |
-
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
| 694 |
-
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
| 695 |
-
}}
|
| 696 |
-
memoryPerInput={memoryPerInput}
|
| 697 |
-
activationMemorySize={activationMemorySize}
|
| 698 |
-
/>
|
| 699 |
</div>
|
| 700 |
-
|
| 701 |
)
|
| 702 |
}
|
| 703 |
|
|
@@ -770,343 +774,353 @@ const Calculator = () => {
|
|
| 770 |
}
|
| 771 |
|
| 772 |
return (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
>
|
| 783 |
-
Standard Calculator
|
| 784 |
-
</button>
|
| 785 |
-
<button
|
| 786 |
-
className={`${
|
| 787 |
-
isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
| 788 |
-
}`}
|
| 789 |
-
onClick={() => setIsPrefillChunking(true)}
|
| 790 |
-
>
|
| 791 |
-
Calculator with Prefill Chunking
|
| 792 |
-
</button>
|
| 793 |
-
</div>
|
| 794 |
-
|
| 795 |
-
{/* Model Memory Calculator */}
|
| 796 |
-
<div className="w-full max-w-4xl">
|
| 797 |
-
<div className="text-4xl mb-4 text-center">Model Memory Calculator</div>
|
| 798 |
-
<div className="mb-6 text-center">
|
| 799 |
-
Use our Model Memory Calculator to help you estimate the memory footprint of your model for different precisions and the maximum batch size / sequence length combination you can run on your device.
|
| 800 |
-
</div>
|
| 801 |
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
>
|
| 815 |
-
Model Selection
|
| 816 |
-
</button>
|
| 817 |
-
<button
|
| 818 |
-
className={`${
|
| 819 |
-
modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
| 820 |
-
}`}
|
| 821 |
-
onClick={() => setModelSelectionTab(false)}
|
| 822 |
-
>
|
| 823 |
-
Custom Model
|
| 824 |
-
</button>
|
| 825 |
-
</div>
|
| 826 |
-
<div>
|
| 827 |
-
{modelSelectionTab ? (
|
| 828 |
-
<>
|
| 829 |
-
<label htmlFor="model">Select a Model</label>
|
| 830 |
-
<select
|
| 831 |
-
id="model"
|
| 832 |
-
className="calculator-select"
|
| 833 |
-
onChange={(e) => {
|
| 834 |
-
setModelParams(Number(e.target.value));
|
| 835 |
-
setHiddenSize(
|
| 836 |
-
Number(
|
| 837 |
-
e.target.options[e.target.selectedIndex].getAttribute('data-hiddenSize')
|
| 838 |
-
)
|
| 839 |
-
);
|
| 840 |
-
setNumLayers(
|
| 841 |
-
Number(
|
| 842 |
-
e.target.options[e.target.selectedIndex].getAttribute('data-numLayers')
|
| 843 |
-
)
|
| 844 |
-
);
|
| 845 |
-
}}
|
| 846 |
>
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 854 |
>
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
</div>
|
| 895 |
-
</div>
|
| 896 |
-
</div>
|
| 897 |
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
>
|
| 912 |
-
Device Selection
|
| 913 |
-
</button>
|
| 914 |
-
<button
|
| 915 |
-
className={`${
|
| 916 |
-
deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
| 917 |
-
}`}
|
| 918 |
-
onClick={() => {
|
| 919 |
-
setDeviceSelectionTab(false);
|
| 920 |
-
setDeviceMemory(null);
|
| 921 |
-
}}
|
| 922 |
-
>
|
| 923 |
-
Custom Device
|
| 924 |
-
</button>
|
| 925 |
-
</div>
|
| 926 |
-
<div>
|
| 927 |
-
{deviceSelectionTab ? (
|
| 928 |
-
<>
|
| 929 |
-
<label htmlFor="device">Select a Device</label>
|
| 930 |
-
<select
|
| 931 |
-
id="device"
|
| 932 |
-
className="calculator-select"
|
| 933 |
-
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
| 934 |
>
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 957 |
</div>
|
| 958 |
-
</div>
|
| 959 |
-
</div>
|
| 960 |
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
|
|
|
| 969 |
</div>
|
| 970 |
-
</div>
|
| 971 |
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
|
|
|
| 984 |
</div>
|
| 985 |
</div>
|
| 986 |
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
/>
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
|
| 1041 |
-
|
| 1042 |
-
|
| 1043 |
-
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
</div>
|
| 1077 |
</div>
|
| 1078 |
|
| 1079 |
-
{/*
|
| 1080 |
-
<div className=
|
| 1081 |
-
<div className=
|
| 1082 |
-
|
| 1083 |
-
modelSize={calculateMemory(modelParams, 'int4')}
|
| 1084 |
-
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
| 1085 |
-
modelPrecision="int4"
|
| 1086 |
-
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 1087 |
-
/>
|
| 1088 |
-
<div className="chart-row-size ml-8">
|
| 1089 |
-
{calculateMemory(modelParams, 'int4')} {deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
| 1090 |
</div>
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
| 1104 |
-
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
| 1105 |
-
}}
|
| 1106 |
-
memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
|
| 1107 |
-
/>
|
| 1108 |
-
<div className="chart-side-panel ml-4 pt-4">
|
| 1109 |
-
<div className='mb-2'>
|
| 1110 |
Memory/token:{' '}
|
| 1111 |
{(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
|
| 1112 |
</div>
|
|
@@ -1411,17 +1425,14 @@ const Calculator = () => {
|
|
| 1411 |
</>
|
| 1412 |
) : null}
|
| 1413 |
</div>
|
|
|
|
|
|
|
| 1414 |
</div>
|
| 1415 |
-
|
| 1416 |
-
|
| 1417 |
-
|
| 1418 |
-
|
| 1419 |
-
|
| 1420 |
-
</div>
|
| 1421 |
-
</div>
|
| 1422 |
-
|
| 1423 |
-
|
| 1424 |
-
|
| 1425 |
)
|
| 1426 |
}
|
| 1427 |
|
|
|
|
| 614 |
)
|
| 615 |
|
| 616 |
return (
|
| 617 |
+
<>
|
| 618 |
+
{/* Model Footprint with Prefill Chunking */}
|
| 619 |
+
<div>
|
| 620 |
+
<div className='chart mb-8'>
|
| 621 |
+
<div className='flex flex-col items-center'>
|
| 622 |
+
<div className='text-2xl'>Model Footprint with Prefill Chunking</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 623 |
</div>
|
| 624 |
+
<div>
|
| 625 |
+
<div className='chart-row'>
|
| 626 |
+
<div className='chart-row-title'>FP32</div>
|
| 627 |
+
<PrefillChunkingModelSizeBarChart
|
| 628 |
+
modelSize={calculateMemory(modelParams, 'fp32')}
|
| 629 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
| 630 |
+
modelPrecision='fp32'
|
| 631 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 632 |
+
activationMemorySize={activationMemorySize}
|
| 633 |
+
/>
|
| 634 |
+
<div className='chart-row-size ml-8'>
|
| 635 |
+
{(calculateMemory(modelParams, 'fp32') + activationMemorySize).toFixed(2)}{' '}
|
| 636 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
| 637 |
+
</div>
|
| 638 |
+
</div>
|
| 639 |
|
| 640 |
+
<div className='chart-row my-8'>
|
| 641 |
+
<div className='chart-row-title'>FP16</div>
|
| 642 |
+
<PrefillChunkingModelSizeBarChart
|
| 643 |
+
modelSize={calculateMemory(modelParams, 'fp16')}
|
| 644 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
|
| 645 |
+
modelPrecision='fp16'
|
| 646 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 647 |
+
activationMemorySize={activationMemorySize}
|
| 648 |
+
/>
|
| 649 |
+
<div className='chart-row-size ml-8'>
|
| 650 |
+
{(calculateMemory(modelParams, 'fp16') + activationMemorySize).toFixed(2)}{' '}
|
| 651 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
| 652 |
+
</div>
|
| 653 |
</div>
|
|
|
|
| 654 |
|
| 655 |
+
<div className='chart-row my-8'>
|
| 656 |
+
<div className='chart-row-title'>INT8</div>
|
| 657 |
+
<PrefillChunkingModelSizeBarChart
|
| 658 |
+
modelSize={calculateMemory(modelParams, 'int8')}
|
| 659 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
|
| 660 |
+
modelPrecision='int8'
|
| 661 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 662 |
+
activationMemorySize={activationMemorySize}
|
| 663 |
+
/>
|
| 664 |
+
<div className='chart-row-size ml-8'>
|
| 665 |
+
{(calculateMemory(modelParams, 'int8') + activationMemorySize).toFixed(2)}{' '}
|
| 666 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
| 667 |
+
</div>
|
| 668 |
</div>
|
|
|
|
| 669 |
|
| 670 |
+
<div className='chart-row my-8'>
|
| 671 |
+
<div className='chart-row-title'>INT4</div>
|
| 672 |
+
<PrefillChunkingModelSizeBarChart
|
| 673 |
+
modelSize={calculateMemory(modelParams, 'int4')}
|
| 674 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
| 675 |
+
modelPrecision='int4'
|
| 676 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 677 |
+
activationMemorySize={activationMemorySize}
|
| 678 |
+
/>
|
| 679 |
+
<div className='chart-row-size ml-8'>
|
| 680 |
+
{(calculateMemory(modelParams, 'int4') + activationMemorySize).toFixed(2)}{' '}
|
| 681 |
+
{deviceMemory !== null && deviceMemory > 0 ? `/ ${deviceMemory} ` : null}GB
|
| 682 |
+
</div>
|
| 683 |
</div>
|
| 684 |
</div>
|
| 685 |
</div>
|
| 686 |
+
<div className='chart'>
|
| 687 |
+
<div className='flex flex-col items-center'>
|
| 688 |
+
<div className='text-2xl'>
|
| 689 |
+
Maximum Batch Size / Sequence Length with Prefill Chunking
|
| 690 |
+
</div>
|
| 691 |
+
</div>
|
| 692 |
+
<PrefillChunkingInferenceRuntimeLineChart
|
| 693 |
+
availableMemory={{
|
| 694 |
+
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
| 695 |
+
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
| 696 |
+
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
| 697 |
+
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
| 698 |
+
}}
|
| 699 |
+
memoryPerInput={memoryPerInput}
|
| 700 |
+
activationMemorySize={activationMemorySize}
|
| 701 |
+
/>
|
| 702 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
</div>
|
| 704 |
+
</>
|
| 705 |
)
|
| 706 |
}
|
| 707 |
|
|
|
|
| 774 |
}
|
| 775 |
|
| 776 |
return (
|
| 777 |
+
<div className='flex flex-col items-center justify-center min-h-screen px-4'>
|
| 778 |
+
{/* Toggle Button */}
|
| 779 |
+
<div className='mb-4 flex space-x-4'>
|
| 780 |
+
<button
|
| 781 |
+
className={`${
|
| 782 |
+
!isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
| 783 |
+
}`}
|
| 784 |
+
onClick={() => setIsPrefillChunking(false)}
|
| 785 |
+
>
|
| 786 |
+
Standard Calculator
|
| 787 |
+
</button>
|
| 788 |
+
<button
|
| 789 |
+
className={`${
|
| 790 |
+
isPrefillChunking ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
| 791 |
+
}`}
|
| 792 |
+
onClick={() => setIsPrefillChunking(true)}
|
| 793 |
+
>
|
| 794 |
+
Calculator with Prefill Chunking
|
| 795 |
+
</button>
|
| 796 |
+
</div>
|
| 797 |
|
| 798 |
+
{/* Model Memory Calculator */}
|
| 799 |
+
<div className='w-full max-w-4xl'>
|
| 800 |
+
<div className='text-4xl mb-4 text-center'>Model Memory Calculator</div>
|
| 801 |
+
<div className='mb-6 text-center'>
|
| 802 |
+
Use our Model Memory Calculator to help you estimate the memory footprint of your model
|
| 803 |
+
for different precisions and the maximum batch size / sequence length combination you can
|
| 804 |
+
run on your device.
|
| 805 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
|
| 807 |
+
{/* Model and Device Selection */}
|
| 808 |
+
<div className='grid grid-cols-1 sm:grid-cols-2 gap-4 mb-6'>
|
| 809 |
+
{/* Model Selection */}
|
| 810 |
+
<div className='calculator-input-box'>
|
| 811 |
+
<div className='text-2xl calculator-input-title'>Model</div>
|
| 812 |
+
<div className='calculator-input-content'>
|
| 813 |
+
<div className='mb-2'>
|
| 814 |
+
<button
|
| 815 |
+
className={`${
|
| 816 |
+
modelSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
| 817 |
+
}`}
|
| 818 |
+
onClick={() => setModelSelectionTab(true)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 819 |
>
|
| 820 |
+
Model Selection
|
| 821 |
+
</button>
|
| 822 |
+
<button
|
| 823 |
+
className={`${
|
| 824 |
+
modelSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
| 825 |
+
}`}
|
| 826 |
+
onClick={() => setModelSelectionTab(false)}
|
| 827 |
+
>
|
| 828 |
+
Custom Model
|
| 829 |
+
</button>
|
| 830 |
+
</div>
|
| 831 |
+
<div>
|
| 832 |
+
{modelSelectionTab ? (
|
| 833 |
+
<>
|
| 834 |
+
<label htmlFor='model'>Select a Model</label>
|
| 835 |
+
<select
|
| 836 |
+
id='model'
|
| 837 |
+
className='calculator-select'
|
| 838 |
+
onChange={(e) => {
|
| 839 |
+
setModelParams(Number(e.target.value))
|
| 840 |
+
setHiddenSize(
|
| 841 |
+
Number(
|
| 842 |
+
e.target.options[e.target.selectedIndex].getAttribute(
|
| 843 |
+
'data-hiddenSize',
|
| 844 |
+
),
|
| 845 |
+
),
|
| 846 |
+
)
|
| 847 |
+
setNumLayers(
|
| 848 |
+
Number(
|
| 849 |
+
e.target.options[e.target.selectedIndex].getAttribute('data-numLayers'),
|
| 850 |
+
),
|
| 851 |
+
)
|
| 852 |
+
}}
|
| 853 |
>
|
| 854 |
+
<option value=''>None selected</option>
|
| 855 |
+
{MODELS.map((model) => (
|
| 856 |
+
<option
|
| 857 |
+
key={model.name}
|
| 858 |
+
value={model.params}
|
| 859 |
+
data-hiddenSize={model.hidden_size}
|
| 860 |
+
data-numLayers={model.num_hidden_layers}
|
| 861 |
+
>
|
| 862 |
+
{model.name}
|
| 863 |
+
</option>
|
| 864 |
+
))}
|
| 865 |
+
</select>
|
| 866 |
+
</>
|
| 867 |
+
) : (
|
| 868 |
+
<>
|
| 869 |
+
<label htmlFor='modelParams'>Model Parameters (in billions)</label>
|
| 870 |
+
<input
|
| 871 |
+
type='number'
|
| 872 |
+
id='modelParams'
|
| 873 |
+
className='calculator-input mb-2'
|
| 874 |
+
placeholder='e.g. 7 (for LLaMA-7B)'
|
| 875 |
+
value={modelParams || ''}
|
| 876 |
+
min={0}
|
| 877 |
+
onChange={(e) => setModelParams(Number(e.target.value))}
|
| 878 |
+
/>
|
| 879 |
+
<label htmlFor='hiddenSize'>Hidden Size</label>
|
| 880 |
+
<input
|
| 881 |
+
type='number'
|
| 882 |
+
id='hiddenSize'
|
| 883 |
+
className='calculator-input mb-2'
|
| 884 |
+
placeholder='e.g. 4096 (for LLaMA-7B)'
|
| 885 |
+
value={hiddenSize || ''}
|
| 886 |
+
min={1}
|
| 887 |
+
onChange={(e) => setHiddenSize(Number(e.target.value))}
|
| 888 |
+
/>
|
| 889 |
+
<label htmlFor='numLayers'>Number of Layers</label>
|
| 890 |
+
<input
|
| 891 |
+
type='number'
|
| 892 |
+
id='numLayers'
|
| 893 |
+
className='calculator-input'
|
| 894 |
+
placeholder='e.g. 32 (for LLaMA-7B)'
|
| 895 |
+
value={numLayers || ''}
|
| 896 |
+
min={1}
|
| 897 |
+
onChange={(e) => setNumLayers(Number(e.target.value))}
|
| 898 |
+
/>
|
| 899 |
+
</>
|
| 900 |
+
)}
|
| 901 |
+
</div>
|
| 902 |
+
</div>
|
| 903 |
</div>
|
|
|
|
|
|
|
| 904 |
|
| 905 |
+
{/* Device Selection */}
|
| 906 |
+
<div className='calculator-input-box'>
|
| 907 |
+
<div className='text-2xl calculator-input-title'>Device</div>
|
| 908 |
+
<div className='calculator-input-content'>
|
| 909 |
+
<div className='mb-2'>
|
| 910 |
+
<button
|
| 911 |
+
className={`${
|
| 912 |
+
deviceSelectionTab ? 'calculator-input-tab-active' : 'calculator-input-tab'
|
| 913 |
+
}`}
|
| 914 |
+
onClick={() => {
|
| 915 |
+
setDeviceSelectionTab(true)
|
| 916 |
+
setDeviceMemory(null)
|
| 917 |
+
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
>
|
| 919 |
+
Device Selection
|
| 920 |
+
</button>
|
| 921 |
+
<button
|
| 922 |
+
className={`${
|
| 923 |
+
deviceSelectionTab ? 'calculator-input-tab' : 'calculator-input-tab-active'
|
| 924 |
+
}`}
|
| 925 |
+
onClick={() => {
|
| 926 |
+
setDeviceSelectionTab(false)
|
| 927 |
+
setDeviceMemory(null)
|
| 928 |
+
}}
|
| 929 |
+
>
|
| 930 |
+
Custom Device
|
| 931 |
+
</button>
|
| 932 |
+
</div>
|
| 933 |
+
<div>
|
| 934 |
+
{deviceSelectionTab ? (
|
| 935 |
+
<>
|
| 936 |
+
<label htmlFor='device'>Select a Device</label>
|
| 937 |
+
<select
|
| 938 |
+
id='device'
|
| 939 |
+
className='calculator-select'
|
| 940 |
+
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
| 941 |
+
>
|
| 942 |
+
<option value=''>None selected</option>
|
| 943 |
+
{DEVICES.map((device) => (
|
| 944 |
+
<option key={device.name} value={device.size}>
|
| 945 |
+
{device.name}
|
| 946 |
+
</option>
|
| 947 |
+
))}
|
| 948 |
+
</select>
|
| 949 |
+
</>
|
| 950 |
+
) : (
|
| 951 |
+
<>
|
| 952 |
+
<label htmlFor='deviceMemory'>Device RAM (in GB)</label>
|
| 953 |
+
<input
|
| 954 |
+
type='number'
|
| 955 |
+
id='deviceMemory'
|
| 956 |
+
className='calculator-input'
|
| 957 |
+
placeholder='e.g. 24'
|
| 958 |
+
value={deviceMemory || ''}
|
| 959 |
+
min={0}
|
| 960 |
+
onChange={(e) => setDeviceMemory(Number(e.target.value))}
|
| 961 |
+
/>
|
| 962 |
+
</>
|
| 963 |
+
)}
|
| 964 |
+
</div>
|
| 965 |
+
</div>
|
| 966 |
</div>
|
|
|
|
|
|
|
| 967 |
|
| 968 |
+
<div className='calculator-box'>
|
| 969 |
+
<div className='text-2xl ml-5 mb-4'>Backend Precision Table</div>
|
| 970 |
+
<div className='ml-5 mb-4'>
|
| 971 |
+
<BackendPrecisionTable />
|
| 972 |
+
</div>
|
| 973 |
+
<div className='ml-5'>
|
| 974 |
+
This table shows the precision used by each Takeoff backend for CPUs and GPUs, as well
|
| 975 |
+
as their accuracy preservation.
|
| 976 |
+
</div>
|
| 977 |
</div>
|
|
|
|
| 978 |
|
| 979 |
+
<div className='calculator-box'>
|
| 980 |
+
<div className='text-2xl ml-5 mb-4'>Input parameters</div>
|
| 981 |
+
<div className='ml-5 mb-4'>
|
| 982 |
+
<strong>Sequence Length</strong>: The combined length of input tokens and output
|
| 983 |
+
tokens. To restrict the maximum sequence length for inference on Takeoff, use the API
|
| 984 |
+
parameters <code>prompt_new_tokens</code> for input tokens and{' '}
|
| 985 |
+
<code>max_new_tokens</code> for output tokens when making a request.
|
| 986 |
+
</div>
|
| 987 |
+
<div className='ml-5'>
|
| 988 |
+
<strong>Batch Size</strong>: The number of sequences that can be processed in
|
| 989 |
+
parallel. To set a maximum batch size for inference on Takeoff, set the environment
|
| 990 |
+
variable <code>TAKEOFF_MAX_BATCH_SIZE</code> to your desired value.
|
| 991 |
+
</div>
|
| 992 |
</div>
|
| 993 |
</div>
|
| 994 |
|
| 995 |
+
{/* Prefill Chunking Settings */}
|
| 996 |
+
{isPrefillChunking && (
|
| 997 |
+
<div className='calculator-input-box mb-6'>
|
| 998 |
+
<div className='text-2xl calculator-input-title'>Prefill Chunking Settings</div>
|
| 999 |
+
<div className='calculator-input-content'>
|
| 1000 |
+
<label htmlFor='maxChunkSize'>Max Chunk Size</label>
|
| 1001 |
+
<input
|
| 1002 |
+
type='number'
|
| 1003 |
+
id='maxChunkSize'
|
| 1004 |
+
className='calculator-input mb-2'
|
| 1005 |
+
placeholder='e.g. 512'
|
| 1006 |
+
value={maxChunkSize || ''}
|
| 1007 |
+
min={1}
|
| 1008 |
+
onChange={(e) => setMaxChunkSize(Number(e.target.value))}
|
| 1009 |
+
/>
|
| 1010 |
+
<label htmlFor='intermediateSize'>Intermediate Size</label>
|
| 1011 |
+
<input
|
| 1012 |
+
type='number'
|
| 1013 |
+
id='intermediateSize'
|
| 1014 |
+
className='calculator-input'
|
| 1015 |
+
placeholder='e.g. 2048'
|
| 1016 |
+
value={intermediateSize || ''}
|
| 1017 |
+
min={1}
|
| 1018 |
+
onChange={(e) => setIntermediateSize(Number(e.target.value))}
|
| 1019 |
+
/>
|
| 1020 |
+
</div>
|
| 1021 |
+
</div>
|
| 1022 |
+
)}
|
| 1023 |
+
|
| 1024 |
+
{/* Charts Section */}
|
| 1025 |
+
{isPrefillChunking ? (
|
| 1026 |
+
<PrefillChunkingCalculator
|
| 1027 |
+
deviceMemory={deviceMemory!}
|
| 1028 |
+
modelParams={modelParams!}
|
| 1029 |
+
hiddenSize={hiddenSize!}
|
| 1030 |
+
numLayers={numLayers!}
|
| 1031 |
+
batchSize={batchSize}
|
| 1032 |
+
seqLength={seqLength}
|
| 1033 |
+
maxChunkSize={maxChunkSize}
|
| 1034 |
+
intermediateSize={intermediateSize}
|
| 1035 |
/>
|
| 1036 |
+
) : (
|
| 1037 |
+
hiddenSize &&
|
| 1038 |
+
numLayers &&
|
| 1039 |
+
deviceMemory &&
|
| 1040 |
+
modelParams && (
|
| 1041 |
+
<>
|
| 1042 |
+
{/* Model Footprint Chart */}
|
| 1043 |
+
<div className='chart mb-8'>
|
| 1044 |
+
<div className='text-2xl text-center mb-4'>Model Footprint</div>
|
| 1045 |
+
<div className='space-y-8'>
|
| 1046 |
+
<div className='chart-row'>
|
| 1047 |
+
<div className='chart-row-title'>FP32</div>
|
| 1048 |
+
<ModelSizeBarChart
|
| 1049 |
+
modelSize={calculateMemory(modelParams, 'fp32')}
|
| 1050 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp32')}
|
| 1051 |
+
modelPrecision='fp32'
|
| 1052 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 1053 |
+
/>
|
| 1054 |
+
<div className='chart-row-size ml-8'>
|
| 1055 |
+
{calculateMemory(modelParams, 'fp32')}{' '}
|
| 1056 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
| 1057 |
+
</div>
|
| 1058 |
+
</div>
|
| 1059 |
+
|
| 1060 |
+
{/* FP16 */}
|
| 1061 |
+
<div className='chart-row'>
|
| 1062 |
+
<div className='chart-row-title'>FP16</div>
|
| 1063 |
+
<ModelSizeBarChart
|
| 1064 |
+
modelSize={calculateMemory(modelParams, 'fp16')}
|
| 1065 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'fp16')}
|
| 1066 |
+
modelPrecision='fp16'
|
| 1067 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 1068 |
+
/>
|
| 1069 |
+
<div className='chart-row-size ml-8'>
|
| 1070 |
+
{calculateMemory(modelParams, 'fp16')}{' '}
|
| 1071 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
| 1072 |
+
</div>
|
| 1073 |
+
</div>
|
| 1074 |
+
|
| 1075 |
+
{/* INT8 */}
|
| 1076 |
+
<div className='chart-row'>
|
| 1077 |
+
<div className='chart-row-title'>INT8</div>
|
| 1078 |
+
<ModelSizeBarChart
|
| 1079 |
+
modelSize={calculateMemory(modelParams, 'int8')}
|
| 1080 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int8')}
|
| 1081 |
+
modelPrecision='int8'
|
| 1082 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 1083 |
+
/>
|
| 1084 |
+
<div className='chart-row-size ml-8'>
|
| 1085 |
+
{calculateMemory(modelParams, 'int8')}{' '}
|
| 1086 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
| 1087 |
+
</div>
|
| 1088 |
+
</div>
|
| 1089 |
+
|
| 1090 |
+
{/* INT4 */}
|
| 1091 |
+
<div className='chart-row'>
|
| 1092 |
+
<div className='chart-row-title'>INT4</div>
|
| 1093 |
+
<ModelSizeBarChart
|
| 1094 |
+
modelSize={calculateMemory(modelParams, 'int4')}
|
| 1095 |
+
largestModelSize={deviceMemory || calculateMemory(modelParams, 'int4')}
|
| 1096 |
+
modelPrecision='int4'
|
| 1097 |
+
deviceMemorySet={deviceMemory !== null && deviceMemory > 0}
|
| 1098 |
+
/>
|
| 1099 |
+
<div className='chart-row-size ml-8'>
|
| 1100 |
+
{calculateMemory(modelParams, 'int4')}{' '}
|
| 1101 |
+
{deviceMemory ? `/ ${deviceMemory} ` : null}GB
|
| 1102 |
+
</div>
|
| 1103 |
+
</div>
|
| 1104 |
</div>
|
| 1105 |
</div>
|
| 1106 |
|
| 1107 |
+
{/* Maximum Batch Size / Sequence Length Chart */}
|
| 1108 |
+
<div className='chart mb-8'>
|
| 1109 |
+
<div className='text-2xl text-center mb-4'>
|
| 1110 |
+
Maximum Batch Size / Sequence Length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1111 |
</div>
|
| 1112 |
+
<div className='flex flex-row items-left'>
|
| 1113 |
+
<InferenceRuntimeLineChart
|
| 1114 |
+
availableMemory={{
|
| 1115 |
+
int4: deviceMemory - calculateMemory(modelParams, 'int4'),
|
| 1116 |
+
int8: deviceMemory - calculateMemory(modelParams, 'int8'),
|
| 1117 |
+
fp16: deviceMemory - calculateMemory(modelParams, 'fp16'),
|
| 1118 |
+
fp32: deviceMemory - calculateMemory(modelParams, 'fp32'),
|
| 1119 |
+
}}
|
| 1120 |
+
memoryPerInput={calculateMemoryPerInput(hiddenSize, numLayers)}
|
| 1121 |
+
/>
|
| 1122 |
+
<div className='chart-side-panel ml-4 pt-4'>
|
| 1123 |
+
<div className='mb-2'>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1124 |
Memory/token:{' '}
|
| 1125 |
{(calculateMemoryPerInput(hiddenSize, numLayers) * 1_000_000).toFixed(0)} KB
|
| 1126 |
</div>
|
|
|
|
| 1425 |
</>
|
| 1426 |
) : null}
|
| 1427 |
</div>
|
| 1428 |
+
</div>
|
| 1429 |
+
</div>
|
| 1430 |
</div>
|
| 1431 |
+
</>
|
| 1432 |
+
)
|
| 1433 |
+
)}
|
| 1434 |
+
</div>
|
| 1435 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1436 |
)
|
| 1437 |
}
|
| 1438 |
|