Skip to content

Commit

Permalink
finally seeing the 1.7x speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Sep 17, 2023
1 parent 07fa8a0 commit 3d8aa55
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'speculative-decoding',
packages = find_packages(exclude=[]),
version = '0.0.1',
version = '0.0.2',
license='MIT',
description = 'Speculative Decoding',
author = 'Phil Wang',
Expand All @@ -20,7 +20,7 @@
'beartype',
'einops>=0.6.1',
'rotary-embedding-torch>=0.3.0',
'torch>=2.0',
'torch>=1.12',
],
classifiers=[
'Development Status :: 4 - Beta',
Expand Down
18 changes: 12 additions & 6 deletions speculative_decoding/speculative_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def base_decoding(

return out[..., prompt_seq_len:]

def safe_div(num, den, eps = 1e-10):
return num / max(den, eps)

def find_first_true_index(bool_tensor, dim = -1):
return (bool_tensor.cumsum(dim = dim) == 0).sum(dim = dim)

@torch.no_grad()
def speculative_decoding(
Expand Down Expand Up @@ -118,16 +123,17 @@ def speculative_decoding(

# prob and prob of small model (p(x) and q(x) in algorithm 1)

prob = (logits / temperature).softmax(dim = -1)
small_prob = (small_logits / temperature).softmax(dim = -1)
prob = safe_div(logits, temperature).softmax(dim = -1)
small_prob = safe_div(small_logits, temperature).softmax(dim = -1)

p, prob_next = prob[:, :-1], prob[:, -1]

p = prob[:, :-1].gather(-1, q_sampled_out)
p = p.gather(-1, q_sampled_out)
q = small_prob.gather(-1, q_sampled_out) * lenience
r = random_uniform = torch.zeros_like(q).float().uniform_(0, 1)

n = accepted = (((r > (p / q)).cumsum(dim = -1)) == 0).sum().item()

prob_next = prob[:, -1]
accepted = find_first_true_index(r > (p / q))
n = accepted[0][0] # need to handle batched spec decoding

if n < gamma:
adjusted_prob = F.relu(prob[:, n] - small_prob[:, n])
Expand Down

0 comments on commit 3d8aa55

Please sign in to comment.