Skip to content

Commit 24fb14d

Browse files
committed
fixed js plots
1 parent b39a773 commit 24fb14d

File tree

2 files changed

+216
-3
lines changed

2 files changed

+216
-3
lines changed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
document.addEventListener('DOMContentLoaded', function() {
2+
// First chart
3+
const ctx1 = document.getElementById('chart1').getContext('2d');
4+
ctx1.canvas.width = 300; // Set the width of the first chart
5+
ctx1.canvas.height = 200; // Set the height of the first chart
6+
7+
const data1 = {
8+
datasets: [
9+
{
10+
label: '(AutoReg) Prefill 8000',
11+
data: [
12+
{x: 16.53, y: 1936},
13+
{x: 21.45, y: 2237},
14+
{x: 31.49, y: 2237},
15+
{x: 49.89, y: 2565}
16+
],
17+
borderColor: 'rgb(153, 102, 51)',
18+
backgroundColor: 'rgb(153, 102, 51)',
19+
fill: false,
20+
tension: 0.1,
21+
pointStyle: 'rect',
22+
pointRadius: 6,
23+
borderWidth: 2,
24+
showLine: true
25+
},
26+
{
27+
label: '(SpecDec) Prefill 8000',
28+
data: [
29+
{x: 12.83, y: 2493},
30+
{x: 15.26, y: 3146},
31+
{x: 20.09, y: 3186},
32+
{x: 30.41, y: 4209}
33+
],
34+
borderColor: 'rgb(102, 51, 0)',
35+
backgroundColor: 'rgb(102, 51, 0)',
36+
fill: false,
37+
tension: 0.1,
38+
pointStyle: 'circle',
39+
pointRadius: 6,
40+
borderWidth: 2,
41+
showLine: true
42+
}
43+
]
44+
};
45+
46+
const options1 = {
47+
scales: {
48+
y: {
49+
type: 'linear',
50+
title: {
51+
display: true,
52+
text: 'Max Throughput (tokens/s)',
53+
}
54+
},
55+
x: {
56+
title: {
57+
display: true,
58+
text: 'Avg. Tokenwise Latency (ms)'
59+
},
60+
type: 'linear',
61+
position: 'bottom',
62+
}
63+
}
64+
};
65+
66+
const config1 = {
67+
type: 'scatter',
68+
data: data1,
69+
options: options1
70+
};
71+
72+
new Chart(ctx1, config1);
73+
74+
// Second chart
75+
const ctx2 = document.getElementById('chart2').getContext('2d');
76+
ctx2.canvas.width = 375; // Set the width of the second chart
77+
ctx2.canvas.height = 250; // Set the height of the second chart
78+
79+
const data2 = {
80+
datasets: [
81+
{
82+
label: 'Prefill 2048',
83+
data: [{x: 9.19, y: 1.03}, {x: 10.27, y: 1.06}, {x: 12.19, y: 1.14}, {x: 16.96, y: 1.13}, {x: 26.9, y: 1.13}],
84+
borderColor: 'rgb(153, 102, 51)',
85+
backgroundColor: 'rgb(153, 102, 51)',
86+
fill: false,
87+
tension: 0.1,
88+
pointStyle: 'rect',
89+
pointRadius: 6,
90+
borderWidth: 2,
91+
showLine: true
92+
},
93+
{
94+
label: 'Prefill 4000',
95+
data: [{x: 10.25, y: 1.16}, {x: 11.93, y: 1.21}, {x: 14.86, y: 1.3}, {x: 21.54, y: 1.34}, {x: 34.85, y: 1.4}],
96+
borderColor: 'rgb(102, 51, 0)',
97+
backgroundColor: 'rgb(102, 51, 0)',
98+
fill: false,
99+
tension: 0.1,
100+
pointStyle: 'circle',
101+
pointRadius: 6,
102+
borderWidth: 2,
103+
showLine: true
104+
},
105+
{
106+
label: 'Prefill 8000',
107+
data: [{x: 12.83, y: 1.29}, {x: 15.26, y: 1.41}, {x: 20.09, y: 1.57}, {x: 30.41, y: 1.64}],
108+
borderColor: 'rgb(0, 51, 102)',
109+
backgroundColor: 'rgb(0, 51, 102)',
110+
fill: false,
111+
tension: 0.1,
112+
pointStyle: 'triangle',
113+
pointRadius: 6,
114+
borderWidth: 2,
115+
showLine: true
116+
},
117+
{
118+
label: 'Prefill 16000',
119+
data: [{x: 16.74, y: 1.57}, {x: 21.26, y: 1.69}, {x: 29.79, y: 1.85}],
120+
borderColor: 'rgb(0, 102, 153)',
121+
backgroundColor: 'rgb(0, 102, 153)',
122+
fill: false,
123+
tension: 0.1,
124+
pointStyle: 'rectRot',
125+
pointRadius: 6,
126+
borderWidth: 2,
127+
showLine: true
128+
},
129+
{
130+
label: 'Prefill 24000',
131+
data: [{x: 20.76, y: 1.72}, {x: 26.95, y: 1.86}],
132+
borderColor: 'rgb(0, 153, 204)',
133+
backgroundColor: 'rgb(0, 153, 204)',
134+
fill: false,
135+
tension: 0.1,
136+
pointStyle: 'circle',
137+
pointRadius: 6,
138+
borderWidth: 2,
139+
showLine: true
140+
},
141+
{
142+
label: 'Prefill 32000',
143+
data: [{x: 24.04, y: 1.87}],
144+
borderColor: 'rgb(0, 204, 255)',
145+
backgroundColor: 'rgb(0, 204, 255)',
146+
fill: false,
147+
tension: 0.1,
148+
pointStyle: 'triangle',
149+
pointRadius: 6,
150+
borderWidth: 2,
151+
showLine: true
152+
}
153+
]
154+
};
155+
156+
const options2 = {
157+
scales: {
158+
y: {
159+
type: 'linear',
160+
title: {
161+
display: true,
162+
text: 'Throughput Ratio (SpecDec/AutoReg)'
163+
}
164+
},
165+
x: {
166+
type: 'linear',
167+
position: 'bottom',
168+
title: {
169+
display: true,
170+
text: 'Avg. Tokenwise Latency (ms)'
171+
}
172+
}
173+
}
174+
};
175+
176+
const config2 = {
177+
type: 'scatter',
178+
data: data2,
179+
options: options2
180+
};
181+
182+
new Chart(ctx2, config2);
183+
});

test.html

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,16 +208,46 @@ <h6>Key observations:</h6>
208208
<li>For moderate to long sequences, optimal speculation length also increases with batch
209209
size.</li>
210210
</ol> -->
211-
<p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p>
212-
<div style="display: flex; flex-wrap: wrap; justify-content: center;">
211+
<!-- <p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p> -->
212+
<!-- <div style="display: flex; flex-wrap: wrap; justify-content: center;">
213213
<div style="width: 120%; min-width: 300px; margin: 5px;">
214214
<canvas id="chart1"></canvas>
215215
</div>
216216
<div style="width: 120%; min-width: 300px; margin: 5px;">
217217
<canvas id="chart2"></canvas>
218218
</div>
219219
</div>
220-
<script src="static/js/plots/throughput_latency.js"></script>
220+
<script src="static/js/plots/throughput_latency.js"></script> -->
221+
<!-- <ol style="font-size: 0.9em;">
222+
<li>Speculative Decoding is effective in improving throughput for long enough sequences.</li>
223+
<li>For every model and hardware pair, there exists a critical sequence length, beyond which
224+
speedup increases with increasing batch size—the longer the sequence, the better the
225+
speedup scaling.</li>
226+
<li>For moderate to long sequences, optimal speculation length also increases with batch
227+
size.</li>
228+
</ol> -->
229+
<p><strong>1. Speculative Decoding achieves better throughput-latency trade-off for moderate to long sequences.</strong></p>
230+
<!-- <div style="display: flex; flex-wrap: wrap; justify-content: center;">
231+
<div style="width: 60%; min-width: 250px; margin: 5px;">
232+
<canvas id="chart1"></canvas>
233+
</div>
234+
<div style="width: 60%; min-width: 250px; margin: 5px;">
235+
<canvas id="chart2"></canvas>
236+
</div>
237+
</div>
238+
<script src="static/js/plots/throughput_latency_smaller.js"></script>
239+
240+
-->
241+
<div style="display: flex; flex-wrap: wrap; justify-content: center;">
242+
<div style="width: 45%; min-width: 150px; margin: 5px;">
243+
<canvas id="chart1"></canvas>
244+
</div>
245+
<div style="width: 45%; min-width: 150px; margin: 5px;">
246+
<canvas id="chart2"></canvas>
247+
</div>
248+
</div>
249+
<script src="static/js/plots/throughput_latency_smaller.js"></script>
250+
221251
<p><strong>2. For every model and hardware pair, there exists a critical sequence length, beyond which speedup increases with increasing batch size—the longer the sequence, the better the speedup scaling.</strong></p>
222252
<p><strong>3. Interestingly, the optimal speculation length also increases with batch size for sufficiently long sequences.</strong></p>
223253
</div>

0 commit comments

Comments
 (0)