cleanup

lucidrains · Sep 18, 2023 · 24e90c3 · 24e90c3
1 parent 082371c
commit 24e90c3
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 5 deletions.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'speculative-decoding',
   packages = find_packages(exclude=[]),
-  version = '0.0.5',
+  version = '0.0.6',
   license='MIT',
   description = 'Speculative Decoding',
   author = 'Phil Wang',

diff --git a/speculative_decoding/speculative_decoding.py b/speculative_decoding/speculative_decoding.py
@@ -174,7 +174,6 @@ def speculative_decoding(
 @torch.no_grad()
 def speculative_decoding_with_same_model(
     net: Module,
-    small_net: Module,
     prompt: Tensor,
     seq_len: int,
     gamma: int = 5,
@@ -204,7 +203,7 @@ def speculative_decoding_with_same_model(
         q_sampled_out = []
 
         for _ in range(gamma):
-            small_logits, cache = small_net(out, cache = cache, return_cache = True)
+            small_logits, cache = net(out, cache = cache, return_cache = True, return_early_exit_only = True)
             small_logits = small_logits[:, -1]
 
             small_logits = top_k(small_logits, thres = filter_thres)

diff --git a/train_early_exit.py b/train_early_exit.py
@@ -144,8 +144,7 @@ def __len__(self):
 
         sampled, base_decode_elapsed = benchmark(base_decoding)(model, prompt, GENERATE_LENGTH)
 
-        small_model = partial(model, return_early_exit_only = True)
-        (spec_decode_sampled, num_accepted), spec_decode_elapsed = benchmark(speculative_decoding_with_same_model)(model,small_model, prompt, GENERATE_LENGTH, GAMMA)
+        (spec_decode_sampled, num_accepted), spec_decode_elapsed = benchmark(speculative_decoding_with_same_model)(model, prompt, GENERATE_LENGTH, GAMMA)
 
         base_decode_output = decode_tokens(sampled[0])
         spec_decode_output = decode_tokens(spec_decode_sampled[0])