Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Vermouth7 committed Nov 17, 2024
1 parent 199f2e0 commit 2a420b6
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 64 deletions.
3 changes: 1 addition & 2 deletions bigcode_eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import warnings
from collections import defaultdict
from typing import List, Optional
from types import MethodType

import torch
from torch.utils.data import IterableDataset
Expand Down Expand Up @@ -349,10 +350,8 @@ def complete_code(
generated_tokens = model.generate(
input_ids=inputs,
num_return_sequences=batch_size,
mode=my_mode,
use_cache=False,
output_hidden_states=True,
discriminator=discriminator,
**gen_kwargs,
)

Expand Down
39 changes: 39 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import warnings
from types import MethodType

import datasets
import torch
Expand Down Expand Up @@ -251,6 +252,11 @@ def parse_args():
choices=['no','mask','random'],
default='no',
)
parser.add_argument(
"--activation_mask",
type=str,
default='/root/autodl-tmp/bigcode-evaluation-harness/ds-7b',
)
return parser.parse_args()


Expand Down Expand Up @@ -338,6 +344,21 @@ def main():
args.model,
**model_kwargs,
)
lang=['cpp','go','java','humaneval','js','php']

if args.mask=='mask':
indx=0
for idx, task in enumerate(lang):
if task in args.tasks:
indx=idx
break
activation_masks = torch.load(args.activation_mask)
activation_mask=activation_masks[indx]

for i, layer_mask in enumerate(activation_mask):
obj=model.model.layers[i].mlp
obj.forward = MethodType(factory(layer_mask.to('cuda')), obj)

# model = WrappedModel(model)

elif args.modeltype == "seq2seq":
Expand Down Expand Up @@ -453,6 +474,24 @@ def main():
with open(args.metric_output_path, "w") as f:
f.write(dumped)

def factory(mask):
def llama_forward(self, x):
gate_up, _ = self.gate_up_proj(x) # b, l, 2i
i = gate_up.size(-1)
activation = F.silu(gate_up[:, :, : i // 2])
activation.index_fill_(1, mask, 0)
x = activation * gate_up[:, i // 2 :]
x, _ = self.down_proj(x)
return x

def ds_forward(self, x):
activation=self.act_fn(self.gate_proj(x)) * self.up_proj(x)
activation.index_fill_(-1, mask, 0)
down_proj=self.down_proj(activation)
return down_proj

return ds_forward


if __name__ == "__main__":
main()
141 changes: 79 additions & 62 deletions run.sh
Original file line number Diff line number Diff line change
@@ -1,69 +1,86 @@
# CUDA_VISIBLE_DEVICES=5 accelerate launch --main_process_port 29502 --num_processes=1 main.py \
# --model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
# --tasks multiple-js \
# --max_new_tokens 512 \
# --temperature 0.0 \
# --batch_size 1 \
# --precision bf16 \
# --allow_code_execution \
# --do_sample=False \
# --save_generations \
# --save_generations_path /home/chh/repos/my_ctg/results/multiple/gen_js.json

# CUDA_VISIBLE_DEVICES=5 accelerate launch --main_process_port 29502 --num_processes=1 main.py \
# --model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
# --tasks multiple-py \
# --max_new_tokens 512 \
# --temperature 0.0 \
# --batch_size 1 \
# --precision bf16 \
# --allow_code_execution \
# --do_sample=False \
# --save_generations \
# --save_generations_path /home/chh/repos/my_ctg/results/multiple/gen_py.json

# CUDA_VISIBLE_DEVICES=5 accelerate launch --main_process_port 29502 --num_processes=1 main.py \
# --model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
# --tasks multiple-cpp \
# --max_new_tokens 512 \
# --temperature 0.0 \
# --batch_size 1 \
# --precision bf16 \
# --allow_code_execution \
# --do_sample=False \
# --save_generations \
# --save_generations_path /home/chh/repos/my_ctg/results/multiple/gen_cpp.json

# CUDA_VISIBLE_DEVICES=5 accelerate launch --main_process_port 29502 --num_processes=1 main.py \
# --model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
# --tasks multiple-php \
# --max_new_tokens 512 \
# --temperature 0.0 \
# --batch_size 1 \
# --precision bf16 \
# --allow_code_execution \
# --do_sample=False \
# --save_generations \
# --save_generations_path /home/chh/repos/my_ctg/results/multiple/gen_php.json
accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-cpp \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_cpp.json \
--mask 'mask'


accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-go \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_go.json \
--mask 'mask'

accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-java \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_java.json \
--mask 'mask'

accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks humaneval \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_py.json \
--mask 'mask'

accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-js \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_js.json \
--mask 'mask'



# CUDA_VISIBLE_DEVICES=5 accelerate launch --main_process_port 29502 --num_processes=1 main.py \
# --model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
# --tasks multiple-go \
# --max_new_tokens 512 \
# --temperature 0.0 \
# --batch_size 1 \
# --precision bf16 \
# --allow_code_execution \
# --do_sample=False \
# --save_generations \
# --save_generations_path /home/chh/repos/my_ctg/results/multiple/gen_go.json

accelerate launch --main_process_port 29502 --num_processes=1 main.py \
--model /root/autodl-tmp/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-php \
--max_new_tokens 512 \
--temperature 0.0 \
--batch_size 1 \
--precision bf16 \
--allow_code_execution \
--do_sample=False \
--save_generations \
--save_generations_path ./multiple/gen_php.json \
--mask 'mask'



docker run -v /home/chh/repos/my_ctg/results/multiple/gen_go_multiple-go.json:/app/generations_go.json:ro -it evaluation-harness-multiple \
--model /data1/chh/models/deepseek-ai/deepseek-coder-6.7b-base \
--tasks multiple-go \
--load_generations_path /app/generations_go.json \
--allow_code_execution \
--n_samples 1

0 comments on commit 2a420b6

Please sign in to comment.