-
Notifications
You must be signed in to change notification settings - Fork 1
/
modelutils.py
executable file
·75 lines (66 loc) · 3.09 KB
/
modelutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Codes are token from https://github.com/IST-DASLab/gptq with modifications
import torch
import torch.nn as nn
DEV = torch.device('cuda:0')
print(DEV)
def find_layers(module, layers=[nn.Conv2d, nn.Linear], name='', enable=True, num_layers_to_quantize=1000):
layer_ids = [str(i) for i in list(range(num_layers_to_quantize))]
if (type(module) in layers or "Linear" in str(type(module)) or "Conv2D" in str(
type(
module))) and (enable or any([s in name.split(".") for s in layer_ids])):
print(name)
return {name: module}
res = {}
for name1, child in module.named_children():
res.update(find_layers(
child, layers=layers, name=name + '.' + name1 if name != '' else name1, enable=enable,
num_layers_to_quantize=num_layers_to_quantize
))
return res
def find_quantlinear_layers(module, layers=[nn.Conv2d, nn.Linear], name='', enable=True, num_layers_to_quantize=1000):
layer_ids = [str(i) for i in list(range(num_layers_to_quantize))]
try:
from auto_gptq.nn_modules.qlinear.qlinear_cuda import QuantLinear
if layers == [QuantLinear]:
if type(module) in layers and (enable or any([s in name.split(".") for s in layer_ids])):
print(name)
return {name: module}
res = {}
for name1, child in module.named_children():
res.update(find_layers(
child, layers=layers, name=name + '.' + name1 if name != '' else name1, enable=enable,
num_layers_to_quantize=num_layers_to_quantize
))
else:
if (type(module) in layers or "Linear" in str(type(module)) or "Conv2D" in str(
type(
module))) and (enable or any([s in name.split(".") for s in layer_ids])):
print(name)
return {name: module}
res = {}
for name1, child in module.named_children():
res.update(find_layers(
child, layers=layers, name=name + '.' + name1 if name != '' else name1, enable=enable,
num_layers_to_quantize=num_layers_to_quantize
))
except Exception as e:
print(f"Cannot find QuantLinear layers due to Exception: {e}. Return empty dict")
res = {}
return res
def get_model(model_name, cached=True):
def skip(*args, **kwargs):
pass
torch.nn.init.kaiming_uniform_ = skip
torch.nn.init.uniform_ = skip
torch.nn.init.normal_ = skip
from transformers import AutoModelForCausalLM
print(f"model name: {model_name}")
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)
models_with_seqlen2048 = ["bloom", "falcon"]
models_with_seqlen2_max_position_embeddings = ["opt"]
if any([m in model_name for m in models_with_seqlen2048]):
model.seqlen = 2048
if any([m in model_name for m in models_with_seqlen2_max_position_embeddings]):
model.seqlen = model.config.max_position_embeddings
print(model)
return model