fixed js plots

Vashistht · Vashistht · commit 24fb14df59b9 · 2024-08-09T16:41:44.000-04:00
diff --git a/static/js/plots/throughput_latency_smaller.js b/static/js/plots/throughput_latency_smaller.js
@@ -0,0 +1,183 @@
+document.addEventListener('DOMContentLoaded', function() {
+    // First chart
+    const ctx1 = document.getElementById('chart1').getContext('2d');
+    ctx1.canvas.width = 300;  // Set the width of the first chart
+    ctx1.canvas.height = 200;  // Set the height of the first chart
+
+    const data1 = {
+        datasets: [
+            {
+                label: '(AutoReg) Prefill 8000',
+                data: [
+                    {x: 16.53, y: 1936},
+                    {x: 21.45, y: 2237},
+                    {x: 31.49, y: 2237},
+                    {x: 49.89, y: 2565}
+                ],
+                borderColor: 'rgb(153, 102, 51)',
+                backgroundColor: 'rgb(153, 102, 51)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'rect',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: '(SpecDec) Prefill 8000',
+                data: [
+                    {x: 12.83, y: 2493},
+                    {x: 15.26, y: 3146},
+                    {x: 20.09, y: 3186},
+                    {x: 30.41, y: 4209}
+                ],
+                borderColor: 'rgb(102, 51, 0)',
+                backgroundColor: 'rgb(102, 51, 0)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'circle',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            }
+        ]
+    };
+
+    const options1 = {
+        scales: {
+            y: {
+                type: 'linear',
+                title: {
+                    display: true,
+                    text: 'Max Throughput (tokens/s)',
+                }
+            },
+            x: {
+                title: {
+                    display: true,
+                    text: 'Avg. Tokenwise Latency (ms)'
+                },
+                type: 'linear',
+                position: 'bottom',
+            }
+        }
+    };
+
+    const config1 = {
+        type: 'scatter',
+        data: data1,
+        options: options1
+    };
+
+    new Chart(ctx1, config1);
+
+    // Second chart
+    const ctx2 = document.getElementById('chart2').getContext('2d');
+    ctx2.canvas.width = 375;  // Set the width of the second chart
+    ctx2.canvas.height = 250;  // Set the height of the second chart
+
+    const data2 = {
+        datasets: [
+            {
+                label: 'Prefill 2048',
+                data: [{x: 9.19, y: 1.03}, {x: 10.27, y: 1.06}, {x: 12.19, y: 1.14}, {x: 16.96, y: 1.13}, {x: 26.9, y: 1.13}],
+                borderColor: 'rgb(153, 102, 51)',
+                backgroundColor: 'rgb(153, 102, 51)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'rect',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: 'Prefill 4000',
+                data: [{x: 10.25, y: 1.16}, {x: 11.93, y: 1.21}, {x: 14.86, y: 1.3}, {x: 21.54, y: 1.34}, {x: 34.85, y: 1.4}],
+                borderColor: 'rgb(102, 51, 0)',
+                backgroundColor: 'rgb(102, 51, 0)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'circle',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: 'Prefill 8000',
+                data: [{x: 12.83, y: 1.29}, {x: 15.26, y: 1.41}, {x: 20.09, y: 1.57}, {x: 30.41, y: 1.64}],
+                borderColor: 'rgb(0, 51, 102)',
+                backgroundColor: 'rgb(0, 51, 102)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'triangle',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: 'Prefill 16000',
+                data: [{x: 16.74, y: 1.57}, {x: 21.26, y: 1.69}, {x: 29.79, y: 1.85}],
+                borderColor: 'rgb(0, 102, 153)',
+                backgroundColor: 'rgb(0, 102, 153)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'rectRot',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: 'Prefill 24000',
+                data: [{x: 20.76, y: 1.72}, {x: 26.95, y: 1.86}],
+                borderColor: 'rgb(0, 153, 204)',
+                backgroundColor: 'rgb(0, 153, 204)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'circle',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            },
+            {
+                label: 'Prefill 32000',
+                data: [{x: 24.04, y: 1.87}],
+                borderColor: 'rgb(0, 204, 255)',
+                backgroundColor: 'rgb(0, 204, 255)',
+                fill: false,
+                tension: 0.1,
+                pointStyle: 'triangle',
+                pointRadius: 6,
+                borderWidth: 2,
+                showLine: true
+            }
+        ]
+    };
+
+    const options2 = {
+        scales: {
+            y: {
+                type: 'linear',
+                title: {
+                    display: true,
+                    text: 'Throughput Ratio (SpecDec/AutoReg)'
+                }
+            },
+            x: {
+                type: 'linear',
+                position: 'bottom',
+                title: {
+                    display: true,
+                    text: 'Avg. Tokenwise Latency (ms)'
+                }
+            }
+        }
+    };
+
+    const config2 = {
+        type: 'scatter',
+        data: data2,
+        options: options2
+    };
+
+    new Chart(ctx2, config2);
+});
diff --git a/test.html b/test.html
@@ -208,16 +208,46 @@ <h6>Key observations:</h6>
               <li>For moderate to long sequences, optimal speculation length also increases with batch
                   size.</li>
             </ol> -->
-             <p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p>
-             <div style="display: flex; flex-wrap: wrap; justify-content: center;">
+             <!-- <p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p> -->
+             <!-- <div style="display: flex; flex-wrap: wrap; justify-content: center;">
               <div style="width: 120%; min-width: 300px; margin: 5px;">
                   <canvas id="chart1"></canvas>
               </div>
               <div style="width: 120%; min-width: 300px; margin: 5px;">
                   <canvas id="chart2"></canvas>
               </div>
             </div>
-             <script src="static/js/plots/throughput_latency.js"></script>
+             <script src="static/js/plots/throughput_latency.js"></script> -->
+             <!-- <ol style="font-size: 0.9em;">
+              <li>Speculative Decoding is effective in improving throughput for long enough sequences.</li>
+              <li>For every model and hardware pair, there exists a critical sequence length, beyond which
+                  speedup increases with increasing batch size—the longer the sequence, the better the
+                  speedup scaling.</li>
+              <li>For moderate to long sequences, optimal speculation length also increases with batch
+                  size.</li>
+            </ol> -->
+            <p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p>
+            <!-- <div style="display: flex; flex-wrap: wrap; justify-content: center;">
+            <div style="width: 60%; min-width: 250px; margin: 5px;">
+                <canvas id="chart1"></canvas>
+            </div>
+            <div style="width: 60%; min-width: 250px; margin: 5px;">
+                <canvas id="chart2"></canvas>
+            </div>
+          </div>
+            <script src="static/js/plots/throughput_latency_smaller.js"></script>
+          
+             -->
+             <div style="display: flex; flex-wrap: wrap; justify-content: center;">
+              <div style="width: 45%; min-width: 150px; margin: 5px;">
+                  <canvas id="chart1"></canvas>
+              </div>
+              <div style="width: 45%; min-width: 150px; margin: 5px;">
+                  <canvas id="chart2"></canvas>
+              </div>
+          </div>
+          <script src="static/js/plots/throughput_latency_smaller.js"></script>
+          
              <p><strong>2. For every model and hardware pair, there exists a critical sequence length, beyond which speedup increases with increasing batch size—the longer the sequence, the better the speedup scaling.</strong></p>
              <p><strong>3. Interestingly, the optimal speculation length also increases with batch size for sufficiently long sequences.</strong></p>
           </div>