16
16
from __future__ import annotations
17
17
18
18
from kimchima .pkg import logging
19
- from transformers import pipeline
19
+ from transformers import (
20
+ pipeline ,
21
+ AutoModel ,
22
+ AutoTokenizer ,
23
+ AutoModelForCausalLM ,
24
+ )
20
25
21
26
logger = logging .get_logger (__name__ )
22
27
@@ -31,8 +36,10 @@ def __init__(self):
31
36
)
32
37
33
38
@classmethod
34
- def model_downloader (cls , * args , ** kwargs )-> str :
39
+ def model_downloader (cls , * args , ** kwargs ):
35
40
r"""
41
+ Here we will use pipeline from Huggingface to download the model.
42
+ And save the model to the specified folder.
36
43
"""
37
44
model_name = kwargs .pop ("model_name" , None )
38
45
if model_name is None :
@@ -41,3 +48,60 @@ def model_downloader(cls, *args, **kwargs)->str:
41
48
folder_name = kwargs .pop ("folder_name" , None )
42
49
pipe = pipeline (model = model_name )
43
50
pipe .save_pretrained (folder_name if folder_name is not None else model_name )
51
+ logger .info (f"Model { model_name } has been downloaded successfully" )
52
+
53
+
54
+ @classmethod
55
+ def auto_downloader (cls , * args , ** kwargs ):
56
+ r"""
57
+ Here we will use AutoModel from Huggingface to download the model.
58
+ It support a wider range of models beyond causal language models,
59
+ like BERT, RoBERTa, BART, T5 and more.
60
+
61
+ It returns the base model without a specific head, it does not directly
62
+ perform tasks like text generation or translation.
63
+ """
64
+
65
+ model_name = kwargs .pop ("model_name" , None )
66
+ if model_name is None :
67
+ raise ValueError ("model_name is required" )
68
+ folder_name = kwargs .pop ("folder_name" , None )
69
+
70
+ model = AutoModel .from_pretrained (model_name )
71
+ model .save_pretrained (folder_name if folder_name is not None else model_name )
72
+ logger .info (f"Model { model_name } has been downloaded successfully" )
73
+
74
+
75
+ @classmethod
76
+ def casual_downloader (cls , * args , ** kwargs ):
77
+ r"""
78
+ Here we will use AutoModelForCausalLM from Huggingface to download the model
79
+ Like GPT-2 XLNet etc.
80
+ It return a language modeling head which can be used to generate text,
81
+ translate text, write content, answer questions in a informative way.
82
+ """
83
+ model_name = kwargs .pop ("model_name" , None )
84
+ if model_name is None :
85
+ raise ValueError ("model_name is required" )
86
+
87
+ folder_name = kwargs .pop ("folder_name" , None )
88
+ # https://github.com/huggingface/transformers/issues/25296
89
+ # https://github.com/huggingface/accelerate/issues/661
90
+ model = AutoModelForCausalLM .from_pretrained (model_name )
91
+ model .save_pretrained (folder_name if folder_name is not None else model_name )
92
+ logger .info (f"Model { model_name } has been downloaded successfully" )
93
+
94
+ @classmethod
95
+ def auto_token_downloader (cls , * args , ** kwargs ):
96
+ r"""
97
+ Here we will use AutoTokenizer from Huggingface to download the tokenizer congifuration.
98
+ """
99
+ model_name = kwargs .pop ("model_name" , None )
100
+ if model_name is None :
101
+ raise ValueError ("model_name is required" )
102
+
103
+ folder_name = kwargs .pop ("folder_name" , None )
104
+
105
+ tokenizer = AutoTokenizer .from_pretrained (model_name )
106
+ tokenizer .save_pretrained (folder_name if folder_name is not None else model_name )
107
+ logger .info (f"Tokenizer { model_name } has been downloaded successfully" )
0 commit comments