Skip to content

Commit 80d6656

Browse files
authored
Merge pull request PaddlePaddle#363 from ZhangHandi/my_branch
add BERT distillation course
2 parents e51702f + 824da06 commit 80d6656

File tree

160 files changed

+28846
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+28846
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@
300300
| transformer在图像分类中的应用 | [notebook链接](https://aistudio.baidu.com/aistudio/projectdetail/2154618) | [Python实现](./transformer_courses/Application_of_transformer_in_image_classification) | 本章节将为大家详细介绍 Transformer 在 CV 领域中的两个经典算法:ViT 以及 DeiT。带领大家一起学习Transformer 结构在图像分类领域的具体应用。 |
301301
|经典的预训练语言模型 | [notebook链接](https://aistudio.baidu.com/aistudio/projectdetail/2110336) |[Python实现](./transformer_courses/Transformer_Machine_Translation)|本章节将为大家详细介绍NLP领域 Transformer。Transformer的前世今生,包括ELMo,GPT,Transformer,BERT等经典模型,还会介绍Transformer在机器翻译里面的应用 |
302302
| 预训练模型的瘦身策略 – – 高效结构 | [notebook链接](https://aistudio.baidu.com/aistudio/projectdetail/2138857)| [Python实现](./transformer_courses/Transformer_Punctuation_Restoration) | 本章节将为大家详细介绍NLP领域,基于Transformer模型的瘦身技巧。包括 Electra,AlBERT 以及 performer。还会介绍代码实现案例:基于Electra的语音识别后处理中文标点符号预测 |
303+
| BERT蒸馏 | [notebook链接](https://aistudio.baidu.com/aistudio/projectdetail/2177549)| [Python实现](./transformer_courses/BERT_distillation) | 本章节为大家详细介绍了针对BERT模型的蒸馏算法,包括:Patient-KD、DistilBERT、TinyBERT、DynaBERT等模型,同时以代码的形式为大家展现了如何使用DynaBERT的训练策略对TinyBERT进行蒸馏。 |
303304
| | | | |
304305

305306
# 五、 经典深度学习案例集(开发中)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
*.egg-info
2+
build/
3+
./dist/
4+
*.pyc
5+
dist/
6+
*.data
7+
*.log
8+
*.tar
9+
*.tar.gz
10+
*.zip
11+
docs/site
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from __future__ import absolute_import
2+
from __future__ import print_function
3+
from __future__ import unicode_literals
4+
5+
import argparse
6+
import io, re
7+
import sys, os
8+
import subprocess
9+
import platform
10+
11+
COPYRIGHT = '''
12+
Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
13+
14+
Licensed under the Apache License, Version 2.0 (the "License");
15+
you may not use this file except in compliance with the License.
16+
You may obtain a copy of the License at
17+
18+
http://www.apache.org/licenses/LICENSE-2.0
19+
20+
Unless required by applicable law or agreed to in writing, software
21+
distributed under the License is distributed on an "AS IS" BASIS,
22+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23+
See the License for the specific language governing permissions and
24+
limitations under the License.
25+
'''
26+
27+
LANG_COMMENT_MARK = None
28+
29+
NEW_LINE_MARK = None
30+
31+
COPYRIGHT_HEADER = None
32+
33+
if platform.system() == "Windows":
34+
NEW_LINE_MARK = "\r\n"
35+
else:
36+
NEW_LINE_MARK = '\n'
37+
COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
38+
p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
39+
process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
40+
date, err = process.communicate()
41+
date = date.decode("utf-8").rstrip("\n")
42+
COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
43+
44+
45+
def generate_copyright(template, lang='C'):
46+
if lang == 'Python':
47+
LANG_COMMENT_MARK = '#'
48+
else:
49+
LANG_COMMENT_MARK = "//"
50+
51+
lines = template.split(NEW_LINE_MARK)
52+
BLANK = " "
53+
ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
54+
for lino, line in enumerate(lines):
55+
if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
56+
if len(line) == 0:
57+
BLANK = ""
58+
else:
59+
BLANK = " "
60+
ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
61+
62+
return ans + "\n"
63+
64+
65+
def lang_type(filename):
66+
if filename.endswith(".py"):
67+
return "Python"
68+
elif filename.endswith(".h"):
69+
return "C"
70+
elif filename.endswith(".c"):
71+
return "C"
72+
elif filename.endswith(".hpp"):
73+
return "C"
74+
elif filename.endswith(".cc"):
75+
return "C"
76+
elif filename.endswith(".cpp"):
77+
return "C"
78+
elif filename.endswith(".cu"):
79+
return "C"
80+
elif filename.endswith(".cuh"):
81+
return "C"
82+
elif filename.endswith(".go"):
83+
return "C"
84+
elif filename.endswith(".proto"):
85+
return "C"
86+
else:
87+
print("Unsupported filetype %s", filename)
88+
exit(0)
89+
90+
91+
PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
92+
93+
94+
def main(argv=None):
95+
parser = argparse.ArgumentParser(
96+
description='Checker for copyright declaration.')
97+
parser.add_argument('filenames', nargs='*', help='Filenames to check')
98+
args = parser.parse_args(argv)
99+
100+
retv = 0
101+
for filename in args.filenames:
102+
fd = io.open(filename, encoding="utf-8")
103+
first_line = fd.readline()
104+
second_line = fd.readline()
105+
if "COPYRIGHT (C)" in first_line.upper(): continue
106+
if first_line.startswith("#!") or PYTHON_ENCODE.match(
107+
second_line) != None or PYTHON_ENCODE.match(first_line) != None:
108+
continue
109+
original_contents = io.open(filename, encoding="utf-8").read()
110+
new_contents = generate_copyright(
111+
COPYRIGHT, lang_type(filename)) + original_contents
112+
print('Auto Insert Copyright Header {}'.format(filename))
113+
retv = 1
114+
with io.open(filename, 'w') as output_file:
115+
output_file.write(new_contents)
116+
117+
return retv
118+
119+
120+
if __name__ == '__main__':
121+
exit(main())
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
3+
TOTAL_ERRORS=0
4+
5+
6+
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
7+
export PYTHONPATH=$DIR:$PYTHONPATH
8+
9+
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
10+
for file in $(git diff --name-status | awk 'Extra open brace or missing close brace2}'); do
11+
pylint --disable=all --load-plugins=docstring_checker \
12+
--enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file;
13+
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
14+
done
15+
16+
exit $TOTAL_ERRORS
17+
#For now, just warning:
18+
#exit 0
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
repos:
2+
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
3+
sha: v1.0.1
4+
hooks:
5+
- id: remove-crlf
6+
files: .∗
7+
- id: forbid-tabs
8+
files: \.(md|yml)$
9+
- id: remove-tabs
10+
files: \.(md|yml)$
11+
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
12+
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
13+
hooks:
14+
- id: yapf
15+
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
16+
- repo: https://github.com/pre-commit/pre-commit-hooks
17+
sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
18+
hooks:
19+
- id: check-added-large-files
20+
- id: check-merge-conflict
21+
- id: check-symlinks
22+
- id: detect-private-key
23+
files: .∗
24+
- id: end-of-file-fixer
25+
files: \.(md|yml)$
26+
- id: trailing-whitespace
27+
files: \.(md|yml)$
28+
- repo: local
29+
hooks:
30+
- id: pylint-doc-string
31+
name: pylint
32+
description: Check python docstring style using docstring_checker.
33+
entry: bash .hooks/pylint_pre_commit.hook
34+
language: system
35+
files: \.(py)$
36+
- repo: local
37+
hooks:
38+
- id: copyright_checker
39+
name: copyright_checker
40+
entry: python .hooks/copyright.hook
41+
language: system
42+
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
43+
exclude: (?!.*third_party)^.*|(?!.∗book).∗
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[style]
2+
based_on_style = pep8
3+
column_limit = 80

0 commit comments

Comments
 (0)