From 53b7e3bd23bdaf1c95f6d4356a10f8329679d2f2 Mon Sep 17 00:00:00 2001 From: Joey Zhang Date: Mon, 16 Mar 2020 21:50:38 +0800 Subject: [PATCH 1/2] Keep magic comments in the minify output for shebang and encoding declare --- src/python_minifier/__init__.py | 38 ++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/python_minifier/__init__.py b/src/python_minifier/__init__.py index 4c6f6027..4d28fa53 100644 --- a/src/python_minifier/__init__.py +++ b/src/python_minifier/__init__.py @@ -5,6 +5,7 @@ """ import ast +import re from python_minifier.ast_compare import CompareError, compare_ast from python_minifier.module_printer import ModulePrinter @@ -90,6 +91,32 @@ def minify( filename = filename or 'python_minifier.minify source' + # If shebang or encoding is declared in the source code, such magic comments will be kept in the output + shebang_line = None # e.g. '#!/usr/bin/env python' + encoding_line = None # e.g. '# -*- coding: UTF-8 -*-' + source_lines = source.splitlines() + + if isinstance(source, str): + # compatible with Python 2 + first_line = source_line[0] if len(source_lines) > 0 else "" + second_line = source_lines[1] if len(source_lines) > 1 else "" + else: + first_line = source_lines[0].decode() if len(source_lines) > 0 else "" + second_line = source_lines[1].decode() if len(source_lines) > 1 else "" + + # Defined in https://www.python.org/dev/peps/pep-0263/#defining-the-encoding + RE_PEP263 = r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)' + coding_pattern = re.compile(RE_PEP263) + + if first_line.startswith('#!'): + shebang_line = first_line + + if not shebang_line and coding_pattern.match(first_line): + encoding_line = first_line + elif coding_pattern.match(second_line): + encoding_line = second_line + + # This will raise if the source file can't be parsed module = ast.parse(source, filename) @@ -128,7 +155,16 @@ def minify( if convert_posargs_to_args: module = remove_posargs(module) - return unparse(module) + output_code = "" + + if shebang_line: + output_code += shebang_line + '\n' + + if encoding_line: + output_code += encoding_line + '\n' + + output_code += unparse(module) + return output_code def unparse(module): From a91171e2bc307988de6bf216350c2be6344e384c Mon Sep 17 00:00:00 2001 From: zhangz36 Date: Wed, 18 Mar 2020 14:03:27 +0800 Subject: [PATCH 2/2] Refactor code for extracting shebang and encoding comments --- src/python_minifier/__init__.py | 47 +++++++++++++-------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/src/python_minifier/__init__.py b/src/python_minifier/__init__.py index 4d28fa53..0bad376c 100644 --- a/src/python_minifier/__init__.py +++ b/src/python_minifier/__init__.py @@ -91,30 +91,29 @@ def minify( filename = filename or 'python_minifier.minify source' - # If shebang or encoding is declared in the source code, such magic comments will be kept in the output - shebang_line = None # e.g. '#!/usr/bin/env python' - encoding_line = None # e.g. '# -*- coding: UTF-8 -*-' - source_lines = source.splitlines() - - if isinstance(source, str): - # compatible with Python 2 - first_line = source_line[0] if len(source_lines) > 0 else "" - second_line = source_lines[1] if len(source_lines) > 1 else "" - else: - first_line = source_lines[0].decode() if len(source_lines) > 0 else "" - second_line = source_lines[1].decode() if len(source_lines) > 1 else "" - + output_code = "" + # Defined in https://www.python.org/dev/peps/pep-0263/#defining-the-encoding RE_PEP263 = r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)' coding_pattern = re.compile(RE_PEP263) - if first_line.startswith('#!'): - shebang_line = first_line - - if not shebang_line and coding_pattern.match(first_line): - encoding_line = first_line - elif coding_pattern.match(second_line): - encoding_line = second_line + # If shebang or encoding is declared in the source code, such magic comments will be kept in the output + for idx, raw_line in enumerate(source.splitlines()[0:2]): + if isinstance(raw_line, str): + # Python 2 + line = raw_line + else: + # Python 3 + line = raw_line.decode() + + if idx == 0 and line.startswith('#!'): + # Shebang comment found + # e.g. '#!/usr/bin/env python' + output_code += line + '\n' + elif coding_pattern.match(line): + # Encoding comment found + # e.g. '# -*- coding: UTF-8 -*-' + output_code += line + '\n' # This will raise if the source file can't be parsed @@ -155,14 +154,6 @@ def minify( if convert_posargs_to_args: module = remove_posargs(module) - output_code = "" - - if shebang_line: - output_code += shebang_line + '\n' - - if encoding_line: - output_code += encoding_line + '\n' - output_code += unparse(module) return output_code