From fb01de68347638b3d7e23ba9b24f07b456cd957a Mon Sep 17 00:00:00 2001 From: Cody Boisclair Date: Thu, 22 Aug 2013 14:04:48 -0400 Subject: [PATCH 1/2] Upgrade markdown_python library to version 2.3.1. This ensures compatibility with Python 3. --- markdown_python/__init__.py | 145 ++++++++------ markdown_python/__main__.py | 4 +- markdown_python/__version__.py | 28 +++ markdown_python/blockparser.py | 16 +- markdown_python/blockprocessors.py | 47 ++--- markdown_python/etree_loader.py | 31 --- markdown_python/extensions/__init__.py | 8 +- markdown_python/extensions/abbr.py | 20 +- markdown_python/extensions/admonition.py | 118 +++++++++++ markdown_python/extensions/attr_list.py | 38 ++-- markdown_python/extensions/codehilite.py | 56 ++++-- markdown_python/extensions/def_list.py | 30 +-- markdown_python/extensions/extra.py | 12 +- markdown_python/extensions/fenced_code.py | 30 ++- markdown_python/extensions/footnotes.py | 52 +++-- markdown_python/extensions/headerid.py | 42 ++-- markdown_python/extensions/html_tidy.py | 69 ------- markdown_python/extensions/meta.py | 21 +- markdown_python/extensions/nl2br.py | 14 +- markdown_python/extensions/rss.py | 114 ----------- markdown_python/extensions/sane_lists.py | 12 +- markdown_python/extensions/smart_strong.py | 13 +- markdown_python/extensions/tables.py | 12 +- markdown_python/extensions/toc.py | 219 ++++++++++++++------- markdown_python/extensions/wikilinks.py | 30 ++- markdown_python/inlinepatterns.py | 69 ++++--- markdown_python/odict.py | 116 +++++++---- markdown_python/postprocessors.py | 9 +- markdown_python/preprocessors.py | 48 +++-- markdown_python/serializers.py | 6 +- markdown_python/treeprocessors.py | 43 ++-- markdown_python/util.py | 52 +++-- 32 files changed, 843 insertions(+), 681 deletions(-) mode change 100644 => 100755 markdown_python/__init__.py mode change 100644 => 100755 markdown_python/__main__.py create mode 100755 markdown_python/__version__.py mode change 100644 => 100755 markdown_python/blockparser.py mode change 100644 => 100755 markdown_python/blockprocessors.py delete mode 100644 markdown_python/etree_loader.py mode change 100644 => 100755 markdown_python/extensions/__init__.py mode change 100644 => 100755 markdown_python/extensions/abbr.py create mode 100755 markdown_python/extensions/admonition.py mode change 100644 => 100755 markdown_python/extensions/attr_list.py mode change 100644 => 100755 markdown_python/extensions/codehilite.py mode change 100644 => 100755 markdown_python/extensions/def_list.py mode change 100644 => 100755 markdown_python/extensions/extra.py mode change 100644 => 100755 markdown_python/extensions/fenced_code.py mode change 100644 => 100755 markdown_python/extensions/footnotes.py mode change 100644 => 100755 markdown_python/extensions/headerid.py delete mode 100644 markdown_python/extensions/html_tidy.py mode change 100644 => 100755 markdown_python/extensions/meta.py mode change 100644 => 100755 markdown_python/extensions/nl2br.py delete mode 100644 markdown_python/extensions/rss.py mode change 100644 => 100755 markdown_python/extensions/sane_lists.py mode change 100644 => 100755 markdown_python/extensions/smart_strong.py mode change 100644 => 100755 markdown_python/extensions/tables.py mode change 100644 => 100755 markdown_python/extensions/toc.py mode change 100644 => 100755 markdown_python/extensions/wikilinks.py mode change 100644 => 100755 markdown_python/inlinepatterns.py mode change 100644 => 100755 markdown_python/odict.py mode change 100644 => 100755 markdown_python/postprocessors.py mode change 100644 => 100755 markdown_python/preprocessors.py mode change 100644 => 100755 markdown_python/serializers.py mode change 100644 => 100755 markdown_python/treeprocessors.py mode change 100644 => 100755 markdown_python/util.py diff --git a/markdown_python/__init__.py b/markdown_python/__init__.py old mode 100644 new mode 100755 index 013fdc8..1dbaddd --- a/markdown_python/__init__.py +++ b/markdown_python/__init__.py @@ -10,7 +10,7 @@ import markdown html = markdown.markdown(your_text_string) -See for more +See for more information and instructions on how to extend the functionality of Python Markdown. Read that before you try modifying this file. @@ -22,7 +22,7 @@ Contact: markdown@freewisdom.org -Copyright 2007-2012 The Python Markdown Project (v. 1.7 and later) +Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later) Copyright 200? Django Software Foundation (OrderedDict implementation) Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) Copyright 2004 Manfred Stienstra (the original version) @@ -30,32 +30,32 @@ License: BSD (see LICENSE for details). """ -version = "2.1.1" -version_info = (2,1,1, "final") - +from __future__ import absolute_import +from __future__ import unicode_literals +from .__version__ import version, version_info import re import codecs import sys import logging -import util -from preprocessors import build_preprocessors -from blockprocessors import build_block_parser -from treeprocessors import build_treeprocessors -from inlinepatterns import build_inlinepatterns -from postprocessors import build_postprocessors -from extensions import Extension -from serializers import to_html_string, to_xhtml_string +from . import util +from .preprocessors import build_preprocessors +from .blockprocessors import build_block_parser +from .treeprocessors import build_treeprocessors +from .inlinepatterns import build_inlinepatterns +from .postprocessors import build_postprocessors +from .extensions import Extension +from .serializers import to_html_string, to_xhtml_string __all__ = ['Markdown', 'markdown', 'markdownFromFile'] logger = logging.getLogger('MARKDOWN') -class Markdown: +class Markdown(object): """Convert Markdown to HTML.""" doc_tag = "div" # Element used to wrap document - later removed - + option_defaults = { 'html_replacement_text' : '[HTML_REMOVED]', 'tab_length' : 4, @@ -63,7 +63,7 @@ class Markdown: 'smart_emphasis' : True, 'lazy_ol' : True, } - + output_formats = { 'html' : to_html_string, 'html4' : to_html_string, @@ -73,9 +73,9 @@ class Markdown: 'xhtml5': to_xhtml_string, } - ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', + ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!'] - + def __init__(self, *args, **kwargs): """ Creates a new Markdown instance. @@ -86,7 +86,7 @@ def __init__(self, *args, **kwargs): If they are of type string, the module mdx_name.py will be loaded. If they are a subclass of markdown.Extension, they will be used as-is. - * extension-configs: Configuration settingis for extensions. + * extension_configs: Configuration settingis for extensions. * output_format: Format of output. Supported formats are: * "xhtml1": Outputs XHTML 1.x. Default. * "xhtml5": Outputs XHTML style tags of HTML 5 @@ -106,11 +106,11 @@ def __init__(self, *args, **kwargs): """ - # For backward compatability, loop through old positional args + # For backward compatibility, loop through old positional args pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format'] c = 0 for arg in args: - if not kwargs.has_key(pos[c]): + if pos[c] not in kwargs: kwargs[pos[c]] = arg c += 1 if c == len(pos): @@ -119,9 +119,13 @@ def __init__(self, *args, **kwargs): # Loop through kwargs and assign defaults for option, default in self.option_defaults.items(): - setattr(self, option, kwargs.get(option, default)) + setattr(self, option, kwargs.get(option, default)) self.safeMode = kwargs.get('safe_mode', False) + if self.safeMode and 'enable_attributes' not in kwargs: + # Disable attributes in safeMode when not explicitly set + self.enable_attributes = False + self.registeredExtensions = [] self.docType = "" self.stripTopLevelTags = True @@ -130,15 +134,15 @@ def __init__(self, *args, **kwargs): self.references = {} self.htmlStash = util.HtmlStash() + self.set_output_format(kwargs.get('output_format', 'xhtml1')) self.registerExtensions(extensions=kwargs.get('extensions', []), configs=kwargs.get('extension_configs', {})) - self.set_output_format(kwargs.get('output_format', 'xhtml1')) self.reset() def build_parser(self): """ Build the parser from the various parts. """ self.preprocessors = build_preprocessors(self) - self.parser = build_block_parser(self) + self.parser = build_block_parser(self) self.inlinePatterns = build_inlinepatterns(self) self.treeprocessors = build_treeprocessors(self) self.postprocessors = build_postprocessors(self) @@ -156,13 +160,13 @@ def registerExtensions(self, extensions, configs): """ for ext in extensions: - if isinstance(ext, basestring): + if isinstance(ext, util.string_type): ext = self.build_extension(ext, configs.get(ext, [])) if isinstance(ext, Extension): - # might raise NotImplementedError, but that's the extension author's problem ext.extendMarkdown(self, globals()) - else: - raise ValueError('Extension "%s.%s" must be of type: "markdown.Extension".' \ + elif ext is not None: + raise TypeError( + 'Extension "%s.%s" must be of type: "markdown.Extension"' % (ext.__class__.__module__, ext.__class__.__name__)) return self @@ -196,20 +200,23 @@ def build_extension(self, ext_name, configs = []): module_name_old_style = '_'.join(['mdx', ext_name]) try: # Old style (mdx_) module = __import__(module_name_old_style) - except ImportError: - logger.warn("Failed loading extension '%s' from '%s' or '%s'" - % (ext_name, module_name, module_name_old_style)) - # Return None so we don't try to initiate none-existant extension - return None + except ImportError as e: + message = "Failed loading extension '%s' from '%s' or '%s'" \ + % (ext_name, module_name, module_name_old_style) + e.args = (message,) + e.args[1:] + raise # If the module is loaded successfully, we expect it to define a # function called makeExtension() try: return module.makeExtension(configs.items()) - except AttributeError, e: - logger.warn("Failed to initiate extension '%s': %s" % (ext_name, e)) - return None - + except AttributeError as e: + message = e.args[0] + message = "Failed to initiate extension " \ + "'%s': %s" % (ext_name, message) + e.args = (message,) + e.args[1:] + raise + def registerExtension(self, extension): """ This gets called by the extension """ self.registeredExtensions.append(extension) @@ -230,11 +237,17 @@ def reset(self): def set_output_format(self, format): """ Set the output format for the class instance. """ + self.output_format = format.lower() try: - self.serializer = self.output_formats[format.lower()] - except KeyError: - raise KeyError('Invalid Output Format: "%s". Use one of %s.' \ - % (format, self.output_formats.keys())) + self.serializer = self.output_formats[self.output_format] + except KeyError as e: + valid_formats = list(self.output_formats.keys()) + valid_formats.sort() + message = 'Invalid Output Format: "%s". Use one of %s.' \ + % (self.output_format, + '"' + '", "'.join(valid_formats) + '"') + e.args = (message,) + e.args[1:] + raise return self def convert(self, source): @@ -250,10 +263,10 @@ def convert(self, source): 1. A bunch of "preprocessors" munge the input text. 2. BlockParser() parses the high-level structural elements of the pre-processed text into an ElementTree. - 3. A bunch of "treeprocessors" are run against the ElementTree. One - such treeprocessor runs InlinePatterns against the ElementTree, + 3. A bunch of "treeprocessors" are run against the ElementTree. One + such treeprocessor runs InlinePatterns against the ElementTree, detecting inline markup. - 4. Some post-processors are run against the text after the ElementTree + 4. Some post-processors are run against the text after the ElementTree has been serialized into text. 5. The output is written to a string. @@ -261,20 +274,15 @@ def convert(self, source): # Fixup the source text if not source.strip(): - return u"" # a blank unicode string - + return '' # a blank unicode string + try: - source = unicode(source) - except UnicodeDecodeError, e: + source = util.text_type(source) + except UnicodeDecodeError as e: # Customise error message while maintaining original trackback e.reason += '. -- Note: Markdown only accepts unicode input!' raise - source = source.replace(util.STX, "").replace(util.ETX, "") - source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" - source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(self.tab_length) - # Split into lines and run the line preprocessors. self.lines = source.split("\n") for prep in self.preprocessors.values(): @@ -335,7 +343,7 @@ def convertFile(self, input=None, output=None, encoding=None): # Read the source if input: - if isinstance(input, str): + if isinstance(input, util.string_type): input_file = codecs.open(input, mode="r", encoding=encoding) else: input_file = codecs.getreader(encoding)(input) @@ -343,7 +351,7 @@ def convertFile(self, input=None, output=None, encoding=None): input_file.close() else: text = sys.stdin.read() - if not isinstance(text, unicode): + if not isinstance(text, util.text_type): text = text.decode(encoding) text = text.lstrip('\ufeff') # remove the byte-order mark @@ -353,9 +361,9 @@ def convertFile(self, input=None, output=None, encoding=None): # Write to file or stdout if output: - if isinstance(output, str): - output_file = codecs.open(output, "w", - encoding=encoding, + if isinstance(output, util.string_type): + output_file = codecs.open(output, "w", + encoding=encoding, errors="xmlcharrefreplace") output_file.write(html) output_file.close() @@ -365,7 +373,14 @@ def convertFile(self, input=None, output=None, encoding=None): output_file.write(html) # Don't close here. User may want to write more. else: - sys.stdout.write(html) + # Encode manually and write bytes to stdout. + html = html.encode(encoding, "xmlcharrefreplace") + try: + # Write bytes directly to buffer (Python 3). + sys.stdout.buffer.write(html) + except AttributeError: + # Probably Python 2, which works with bytes by default. + sys.stdout.write(html) return self @@ -399,30 +414,30 @@ def markdown(text, *args, **kwargs): def markdownFromFile(*args, **kwargs): """Read markdown code from a file and write it to a file or a stream. - + This is a shortcut function which initializes an instance of Markdown, and calls the convertFile method rather than convert. - + Keyword arguments: - + * input: a file name or readable object. * output: a file name or writable object. * encoding: Encoding of input and output. * Any arguments accepted by the Markdown class. - + """ # For backward compatibility loop through positional args pos = ['input', 'output', 'extensions', 'encoding'] c = 0 for arg in args: - if not kwargs.has_key(pos[c]): + if pos[c] not in kwargs: kwargs[pos[c]] = arg c += 1 if c == len(pos): break md = Markdown(**kwargs) - md.convertFile(kwargs.get('input', None), + md.convertFile(kwargs.get('input', None), kwargs.get('output', None), kwargs.get('encoding', None)) diff --git a/markdown_python/__main__.py b/markdown_python/__main__.py old mode 100644 new mode 100755 index b6a2e23..8ee8c82 --- a/markdown_python/__main__.py +++ b/markdown_python/__main__.py @@ -20,11 +20,11 @@ def parse_options(): usage = """%prog [options] [INPUTFILE] (STDIN is assumed if no INPUTFILE is given)""" desc = "A Python implementation of John Gruber's Markdown. " \ - "http://www.freewisdom.org/projects/python-markdown/" + "http://packages.python.org/Markdown/" ver = "%%prog %s" % markdown.version parser = optparse.OptionParser(usage=usage, description=desc, version=ver) - parser.add_option("-f", "--file", dest="filename", default=sys.stdout, + parser.add_option("-f", "--file", dest="filename", default=None, help="Write output to OUTPUT_FILE. Defaults to STDOUT.", metavar="OUTPUT_FILE") parser.add_option("-e", "--encoding", dest="encoding", diff --git a/markdown_python/__version__.py b/markdown_python/__version__.py new file mode 100755 index 0000000..a13559c --- /dev/null +++ b/markdown_python/__version__.py @@ -0,0 +1,28 @@ +# +# markdown/__version__.py +# +# version_info should conform to PEP 386 +# (major, minor, micro, alpha/beta/rc/final, #) +# (1, 1, 2, 'alpha', 0) => "1.1.2.dev" +# (1, 2, 0, 'beta', 2) => "1.2b2" +version_info = (2, 3, 1, 'final', 0) + +def _get_version(): + " Returns a PEP 386-compliant version number from version_info. " + assert len(version_info) == 5 + assert version_info[3] in ('alpha', 'beta', 'rc', 'final') + + parts = 2 if version_info[2] == 0 else 3 + main = '.'.join(map(str, version_info[:parts])) + + sub = '' + if version_info[3] == 'alpha' and version_info[4] == 0: + # TODO: maybe append some sort of git info here?? + sub = '.dev' + elif version_info[3] != 'final': + mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'c'} + sub = mapping[version_info[3]] + str(version_info[4]) + + return str(main + sub) + +version = _get_version() diff --git a/markdown_python/blockparser.py b/markdown_python/blockparser.py old mode 100644 new mode 100755 index fae136c..4504a16 --- a/markdown_python/blockparser.py +++ b/markdown_python/blockparser.py @@ -1,6 +1,7 @@ - -import util -import odict +from __future__ import unicode_literals +from __future__ import absolute_import +from . import util +from . import odict class State(list): """ Track the current and nested state of the parser. @@ -89,9 +90,10 @@ def parseBlocks(self, parent, blocks): """ while blocks: - for processor in self.blockprocessors.values(): - if processor.test(parent, blocks[0]): - processor.run(parent, blocks) - break + for processor in self.blockprocessors.values(): + if processor.test(parent, blocks[0]): + if processor.run(parent, blocks) is not False: + # run returns True or None + break diff --git a/markdown_python/blockprocessors.py b/markdown_python/blockprocessors.py old mode 100644 new mode 100755 index 7b14a85..61977b4 --- a/markdown_python/blockprocessors.py +++ b/markdown_python/blockprocessors.py @@ -1,21 +1,23 @@ """ CORE MARKDOWN BLOCKPARSER -============================================================================= +=========================================================================== This parser handles basic parsing of Markdown blocks. It doesn't concern itself -with inline elements such as **bold** or *italics*, but rather just catches +with inline elements such as **bold** or *italics*, but rather just catches blocks, lists, quotes, etc. -The BlockParser is made up of a bunch of BlockProssors, each handling a +The BlockParser is made up of a bunch of BlockProssors, each handling a different type of block. Extensions may add/replace/remove BlockProcessors as they need to alter how markdown blocks are parsed. - """ +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals import logging import re -import util -from blockparser import BlockParser +from . import util +from .blockparser import BlockParser logger = logging.getLogger('MARKDOWN') @@ -485,7 +487,7 @@ def run(self, parent, blocks): # Recursively parse lines before hr so they get parsed first. self.parser.parseBlocks(parent, [prelines]) # create hr - hr = util.etree.SubElement(parent, 'hr') + util.etree.SubElement(parent, 'hr') # check for lines in block after hr. postlines = block[self.match.end():].lstrip('\n') if postlines: @@ -495,26 +497,27 @@ def run(self, parent, blocks): class EmptyBlockProcessor(BlockProcessor): - """ Process blocks and start with an empty line. """ - - # Detect a block that only contains whitespace - # or only whitespace on the first line. - RE = re.compile(r'^\s*\n') + """ Process blocks that are empty or start with an empty line. """ def test(self, parent, block): - return bool(self.RE.match(block)) + return not block or block.startswith('\n') def run(self, parent, blocks): block = blocks.pop(0) - m = self.RE.match(block) - if m: - # Add remaining line to master blocks for later. - blocks.insert(0, block[m.end():]) - sibling = self.lastChild(parent) - if sibling and sibling.tag == 'pre' and sibling[0] and \ - sibling[0].tag == 'code': - # Last block is a codeblock. Append to preserve whitespace. - sibling[0].text = util.AtomicString('%s/n/n/n' % sibling[0].text ) + filler = '\n\n' + if block: + # Starts with empty line + # Only replace a single line. + filler = '\n' + # Save the rest for later. + theRest = block[1:] + if theRest: + # Add remaining lines to master blocks for later. + blocks.insert(0, theRest) + sibling = self.lastChild(parent) + if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag == 'code': + # Last block is a codeblock. Append to preserve whitespace. + sibling[0].text = util.AtomicString('%s%s' % (sibling[0].text, filler)) class ParagraphProcessor(BlockProcessor): diff --git a/markdown_python/etree_loader.py b/markdown_python/etree_loader.py deleted file mode 100644 index f605aa2..0000000 --- a/markdown_python/etree_loader.py +++ /dev/null @@ -1,31 +0,0 @@ - -## Import -def importETree(): - """Import the best implementation of ElementTree, return a module object.""" - etree_in_c = None - try: # Is it Python 2.5+ with C implemenation of ElementTree installed? - import xml.etree.cElementTree as etree_in_c - from xml.etree.ElementTree import Comment - except ImportError: - try: # Is it Python 2.5+ with Python implementation of ElementTree? - import xml.etree.ElementTree as etree - except ImportError: - try: # An earlier version of Python with cElementTree installed? - import cElementTree as etree_in_c - from elementtree.ElementTree import Comment - except ImportError: - try: # An earlier version of Python with Python ElementTree? - import elementtree.ElementTree as etree - except ImportError: - raise ImportError("Failed to import ElementTree") - if etree_in_c: - if etree_in_c.VERSION < "1.0.5": - raise RuntimeError("cElementTree version 1.0.5 or higher is required.") - # Third party serializers (including ours) test with non-c Comment - etree_in_c.test_comment = Comment - return etree_in_c - elif etree.VERSION < "1.1": - raise RuntimeError("ElementTree version 1.1 or higher is required") - else: - return etree - diff --git a/markdown_python/extensions/__init__.py b/markdown_python/extensions/__init__.py old mode 100644 new mode 100755 index 0222c91..184c4d1 --- a/markdown_python/extensions/__init__.py +++ b/markdown_python/extensions/__init__.py @@ -3,7 +3,9 @@ ----------------------------------------------------------------------------- """ -class Extension: +from __future__ import unicode_literals + +class Extension(object): """ Base class for extensions to subclass. """ def __init__(self, configs = {}): """Create an instance of an Extention. @@ -46,6 +48,6 @@ def extendMarkdown(self, md, md_globals): * md_globals: Global variables in the markdown module namespace. """ - raise NotImplementedError, 'Extension "%s.%s" must define an "extendMarkdown"' \ - 'method.' % (self.__class__.__module__, self.__class__.__name__) + raise NotImplementedError('Extension "%s.%s" must define an "extendMarkdown"' \ + 'method.' % (self.__class__.__module__, self.__class__.__name__)) diff --git a/markdown_python/extensions/abbr.py b/markdown_python/extensions/abbr.py old mode 100644 new mode 100755 index 45663c0..5e46f1d --- a/markdown_python/extensions/abbr.py +++ b/markdown_python/extensions/abbr.py @@ -23,14 +23,18 @@ ''' +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from ..inlinepatterns import Pattern +from ..util import etree import re -import markdown -from markdown.util import etree # Global Vars ABBR_REF_RE = re.compile(r'[*]\[(?P[^\]]*)\][ ]?:\s*(?P.*)') -class AbbrExtension(markdown.Extension): +class AbbrExtension(Extension): """ Abbreviation Extension for Python-Markdown. """ def extendMarkdown(self, md, md_globals): @@ -38,7 +42,7 @@ def extendMarkdown(self, md, md_globals): md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference') -class AbbrPreprocessor(markdown.preprocessors.Preprocessor): +class AbbrPreprocessor(Preprocessor): """ Abbreviation Preprocessor - parse text for abbr references. """ def run(self, lines): @@ -75,11 +79,11 @@ def _generate_pattern(self, text): return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) -class AbbrPattern(markdown.inlinepatterns.Pattern): +class AbbrPattern(Pattern): """ Abbreviation inline pattern. """ def __init__(self, pattern, title): - markdown.inlinepatterns.Pattern.__init__(self, pattern) + super(AbbrPattern, self).__init__(pattern) self.title = title def handleMatch(self, m): @@ -90,7 +94,3 @@ def handleMatch(self, m): def makeExtension(configs=None): return AbbrExtension(configs=configs) - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/markdown_python/extensions/admonition.py b/markdown_python/extensions/admonition.py new file mode 100755 index 0000000..9a45b92 --- /dev/null +++ b/markdown_python/extensions/admonition.py @@ -0,0 +1,118 @@ +""" +Admonition extension for Python-Markdown +======================================== + +Adds rST-style admonitions. Inspired by [rST][] feature with the same name. + +The syntax is (followed by an indented block with the contents): + !!! [type] [optional explicit title] + +Where `type` is used as a CSS class name of the div. If not present, `title` +defaults to the capitalized `type`, so "note" -> "Note". + +rST suggests the following `types`, but you're free to use whatever you want: + attention, caution, danger, error, hint, important, note, tip, warning + + +A simple example: + !!! note + This is the first line inside the box. + +Outputs: + <div class="admonition note"> + <p class="admonition-title">Note</p> + <p>This is the first line inside the box</p> + </div> + +You can also specify the title and CSS class of the admonition: + !!! custom "Did you know?" + Another line here. + +Outputs: + <div class="admonition custom"> + <p class="admonition-title">Did you know?</p> + <p>Another line here.</p> + </div> + +[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions + +By [Tiago Serafim](http://www.tiagoserafim.com/). + +""" + +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor +from ..util import etree +import re + + +class AdmonitionExtension(Extension): + """ Admonition extension for Python-Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Add Admonition to Markdown instance. """ + md.registerExtension(self) + + md.parser.blockprocessors.add('admonition', + AdmonitionProcessor(md.parser), + '_begin') + + +class AdmonitionProcessor(BlockProcessor): + + CLASSNAME = 'admonition' + CLASSNAME_TITLE = 'admonition-title' + RE = re.compile(r'(?:^|\n)!!!\ ?([\w\-]+)(?:\ "(.*?)")?') + + def test(self, parent, block): + sibling = self.lastChild(parent) + return self.RE.search(block) or \ + (block.startswith(' ' * self.tab_length) and sibling and \ + sibling.get('class', '').find(self.CLASSNAME) != -1) + + def run(self, parent, blocks): + sibling = self.lastChild(parent) + block = blocks.pop(0) + m = self.RE.search(block) + + if m: + block = block[m.end() + 1:] # removes the first line + + block, theRest = self.detab(block) + + if m: + klass, title = self.get_class_and_title(m) + div = etree.SubElement(parent, 'div') + div.set('class', '%s %s' % (self.CLASSNAME, klass)) + if title: + p = etree.SubElement(div, 'p') + p.text = title + p.set('class', self.CLASSNAME_TITLE) + else: + div = sibling + + self.parser.parseChunk(div, block) + + if theRest: + # This block contained unindented line(s) after the first indented + # line. Insert these lines as the first block of the master blocks + # list for future processing. + blocks.insert(0, theRest) + + def get_class_and_title(self, match): + klass, title = match.group(1).lower(), match.group(2) + if title is None: + # no title was provided, use the capitalized classname as title + # e.g.: `!!! note` will render `<p class="admonition-title">Note</p>` + title = klass.capitalize() + elif title == '': + # an explicit blank title should not be rendered + # e.g.: `!!! warning ""` will *not* render `p` with a title + title = None + return klass, title + + +def makeExtension(configs={}): + return AdmonitionExtension(configs=configs) diff --git a/markdown_python/extensions/attr_list.py b/markdown_python/extensions/attr_list.py old mode 100644 new mode 100755 index 60287fe..c98aa85 --- a/markdown_python/extensions/attr_list.py +++ b/markdown_python/extensions/attr_list.py @@ -10,17 +10,20 @@ Contact: markdown@freewisdom.org -License: BSD (see ../../LICENSE for details) +License: BSD (see ../LICENSE.md for details) Dependencies: * [Python 2.4+](http://python.org) -* [Markdown 2.1+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.1+](http://packages.python.org/Markdown/) """ -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import isBlockLevel import re -from markdown.util import isBlockLevel try: Scanner = re.Scanner @@ -41,9 +44,9 @@ def _handle_key_value(s, t): def _handle_word(s, t): if t.startswith('.'): - return u'.', t[1:] + return '.', t[1:] if t.startswith('#'): - return u'id', t[1:] + return 'id', t[1:] return t, t _scanner = Scanner([ @@ -61,16 +64,19 @@ def get_attrs(str): def isheader(elem): return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] -class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): +class AttrListTreeprocessor(Treeprocessor): BASE_RE = r'\{\:?([^\}]*)\}' HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE) BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) INLINE_RE = re.compile(r'^%s' % BASE_RE) + NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d' + r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef' + r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd' + r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') def run(self, doc): for elem in doc.getiterator(): - #import pdb; pdb.set_trace() if isBlockLevel(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE @@ -114,14 +120,20 @@ def assign_attrs(self, elem, attrs): else: elem.set('class', v) else: - # assing attr k with v - elem.set(k, v) + # assign attr k with v + elem.set(self.sanitize_name(k), v) + def sanitize_name(self, name): + """ + Sanitize name as 'an XML Name, minus the ":"'. + See http://www.w3.org/TR/REC-xml-names/#NT-NCName + """ + return self.NAME_RE.sub('_', name) -class AttrListExtension(markdown.extensions.Extension): + +class AttrListExtension(Extension): def extendMarkdown(self, md, md_globals): - # insert after 'inline' treeprocessor - md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>inline') + md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify') def makeExtension(configs={}): diff --git a/markdown_python/extensions/codehilite.py b/markdown_python/extensions/codehilite.py old mode 100644 new mode 100755 index 5df820f..72b40fd --- a/markdown_python/extensions/codehilite.py +++ b/markdown_python/extensions/codehilite.py @@ -1,5 +1,3 @@ -#!/usr/bin/python - """ CodeHilite Extension for Python-Markdown ======================================== @@ -8,19 +6,23 @@ Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). -Project website: <http://www.freewisdom.org/project/python-markdown/CodeHilite> +Project website: <http://packages.python.org/Markdown/extensions/code_hilite.html> Contact: markdown@freewisdom.org -License: BSD (see ../docs/LICENSE for details) +License: BSD (see ../LICENSE.md for details) Dependencies: * [Python 2.3+](http://python.org/) -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.0+](http://packages.python.org/Markdown/) * [Pygments](http://pygments.org/) """ -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +import warnings try: from pygments import highlight from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer @@ -30,7 +32,7 @@ pygments = False # ------------------ The Main CodeHilite Class ---------------------- -class CodeHilite: +class CodeHilite(object): """ Determine language of source code, and pass it into the pygments hilighter. @@ -40,9 +42,10 @@ class CodeHilite: * src: Source string or any object with a .readline attribute. - * linenos: (Boolen) Turn line numbering 'on' or 'off' (off by default). + * linenums: (Boolean) Set line numbering to 'on' (True), 'off' (False) or 'auto'(None). + Set to 'auto' by default. - * guess_lang: (Boolen) Turn language auto-detection 'on' or 'off' (on by default). + * guess_lang: (Boolean) Turn language auto-detection 'on' or 'off' (on by default). * css_class: Set class name of wrapper div ('codehilite' by default). @@ -54,12 +57,12 @@ class CodeHilite: """ - def __init__(self, src=None, linenos=False, guess_lang=True, + def __init__(self, src=None, linenums=None, guess_lang=True, css_class="codehilite", lang=None, style='default', noclasses=False, tab_length=4): self.src = src self.lang = lang - self.linenos = linenos + self.linenums = linenums self.guess_lang = guess_lang self.css_class = css_class self.style = style @@ -93,7 +96,7 @@ def hilite(self): lexer = TextLexer() except ValueError: lexer = TextLexer() - formatter = HtmlFormatter(linenos=self.linenos, + formatter = HtmlFormatter(linenos=self.linenums, cssclass=self.css_class, style=self.style, noclasses=self.noclasses) @@ -107,7 +110,7 @@ def hilite(self): classes = [] if self.lang: classes.append('language-%s' % self.lang) - if self.linenos: + if self.linenums: classes.append('linenums') class_str = '' if classes: @@ -138,7 +141,7 @@ def _getLang(self): fl = lines.pop(0) c = re.compile(r''' - (?:(?:::+)|(?P<shebang>[#]!)) # Shebang or 2 or more colons. + (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons. (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path (?P<lang>[\w+-]*) # The language ''', re.VERBOSE) @@ -153,9 +156,9 @@ def _getLang(self): if m.group('path'): # path exists - restore first line lines.insert(0, fl) - if m.group('shebang'): - # shebang exists - use line numbers - self.linenos = True + if self.linenums is None and m.group('shebang'): + # Overridable and Shebang exists - use line numbers + self.linenums = True else: # No match lines.insert(0, fl) @@ -165,7 +168,7 @@ def _getLang(self): # ------------------ The Markdown Extension ------------------------------- -class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor): +class HiliteTreeprocessor(Treeprocessor): """ Hilight source code in code blocks. """ def run(self, root): @@ -175,7 +178,7 @@ def run(self, root): children = block.getchildren() if len(children) == 1 and children[0].tag == 'code': code = CodeHilite(children[0].text, - linenos=self.config['force_linenos'], + linenums=self.config['linenums'], guess_lang=self.config['guess_lang'], css_class=self.config['css_class'], style=self.config['pygments_style'], @@ -191,13 +194,14 @@ def run(self, root): block.text = placeholder -class CodeHiliteExtension(markdown.Extension): +class CodeHiliteExtension(Extension): """ Add source code hilighting to markdown codeblocks. """ def __init__(self, configs): # define default configs self.config = { - 'force_linenos' : [False, "Force line numbers - Default: False"], + 'linenums': [None, "Use lines numbers. True=yes, False=no, None=auto"], + 'force_linenos' : [False, "Depreciated! Use 'linenums' instead. Force line numbers - Default: False"], 'guess_lang' : [True, "Automatic language detection - Default: True"], 'css_class' : ["codehilite", "Set class name for wrapper <div> - Default: codehilite"], @@ -210,6 +214,16 @@ def __init__(self, configs): # convert strings to booleans if value == 'True': value = True if value == 'False': value = False + if value == 'None': value = None + + if key == 'force_linenos': + warnings.warn('The "force_linenos" config setting' + ' to the CodeHilite extension is deprecrecated.' + ' Use "linenums" instead.', PendingDeprecationWarning) + if value: + # Carry 'force_linenos' over to new 'linenos'. + self.setConfig('linenums', True) + self.setConfig(key, value) def extendMarkdown(self, md, md_globals): diff --git a/markdown_python/extensions/def_list.py b/markdown_python/extensions/def_list.py old mode 100644 new mode 100755 index da1726a..8684652 --- a/markdown_python/extensions/def_list.py +++ b/markdown_python/extensions/def_list.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Definition List Extension for Python-Markdown ============================================= @@ -19,12 +18,15 @@ """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor, ListIndentProcessor +from ..util import etree import re -import markdown -from markdown.util import etree -class DefListProcessor(markdown.blockprocessors.BlockProcessor): +class DefListProcessor(BlockProcessor): """ Process Definition Lists. """ RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') @@ -34,10 +36,11 @@ def test(self, parent, block): return bool(self.RE.search(block)) def run(self, parent, blocks): - block = blocks.pop(0) - m = self.RE.search(block) - terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()] - block = block[m.end():] + + raw_block = blocks.pop(0) + m = self.RE.search(raw_block) + terms = [l.strip() for l in raw_block[:m.start()].split('\n') if l.strip()] + block = raw_block[m.end():] no_indent = self.NO_INDENT_RE.match(block) if no_indent: d, theRest = (block, None) @@ -48,6 +51,11 @@ def run(self, parent, blocks): else: d = m.group(2) sibling = self.lastChild(parent) + if not terms and sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the begining of a document or list. + blocks.insert(0, raw_block) + return False if not terms and sibling.tag == 'p': # The previous paragraph contains the terms state = 'looselist' @@ -79,7 +87,7 @@ def run(self, parent, blocks): if theRest: blocks.insert(0, theRest) -class DefListIndentProcessor(markdown.blockprocessors.ListIndentProcessor): +class DefListIndentProcessor(ListIndentProcessor): """ Process indented children of definition list items. """ ITEM_TYPES = ['dd'] @@ -87,12 +95,12 @@ class DefListIndentProcessor(markdown.blockprocessors.ListIndentProcessor): def create_item(self, parent, block): """ Create a new dd and parse the block with it as the parent. """ - dd = markdown.etree.SubElement(parent, 'dd') + dd = etree.SubElement(parent, 'dd') self.parser.parseBlocks(dd, [block]) -class DefListExtension(markdown.Extension): +class DefListExtension(Extension): """ Add definition lists to Markdown. """ def extendMarkdown(self, md, md_globals): diff --git a/markdown_python/extensions/extra.py b/markdown_python/extensions/extra.py old mode 100644 new mode 100755 index 2c7915e..e6a1e82 --- a/markdown_python/extensions/extra.py +++ b/markdown_python/extensions/extra.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Python-Markdown Extra Extension =============================== @@ -27,7 +26,9 @@ """ -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension extensions = ['smart_strong', 'fenced_code', @@ -39,14 +40,15 @@ ] -class ExtraExtension(markdown.Extension): +class ExtraExtension(Extension): """ Add various extensions to Markdown class.""" def extendMarkdown(self, md, md_globals): """ Register extension instances. """ md.registerExtensions(extensions, self.config) - # Turn on processing of markdown text within raw html - md.preprocessors['html_block'].markdown_in_raw = True + if not md.safeMode: + # Turn on processing of markdown text within raw html + md.preprocessors['html_block'].markdown_in_raw = True def makeExtension(configs={}): return ExtraExtension(configs=dict(configs)) diff --git a/markdown_python/extensions/fenced_code.py b/markdown_python/extensions/fenced_code.py old mode 100644 new mode 100755 index 95fe3b4..ecdb20d --- a/markdown_python/extensions/fenced_code.py +++ b/markdown_python/extensions/fenced_code.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - """ Fenced Code Extension for Python Markdown ========================================= @@ -63,31 +61,34 @@ Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/). -Project website: <http://www.freewisdom.org/project/python-markdown/Fenced__Code__Blocks> +Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html> Contact: markdown@freewisdom.org License: BSD (see ../docs/LICENSE for details) Dependencies: * [Python 2.4+](http://python.org) -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.0+](http://packages.python.org/Markdown/) * [Pygments (optional)](http://pygments.org) """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from .codehilite import CodeHilite, CodeHiliteExtension import re -import markdown -from markdown.extensions.codehilite import CodeHilite, CodeHiliteExtension # Global vars FENCED_BLOCK_RE = re.compile( \ - r'(?P<fence>^(?:~{3,}|`{3,}))[ ]*(\{?\.?(?P<lang>[a-zA-Z0-9_-]*)\}?)?[ ]*\n(?P<code>.*?)(?<=\n)(?P=fence)[ ]*$', + r'(?P<fence>^(?:~{3,}|`{3,}))[ ]*(\{?\.?(?P<lang>[a-zA-Z0-9_+-]*)\}?)?[ ]*\n(?P<code>.*?)(?<=\n)(?P=fence)[ ]*$', re.MULTILINE|re.DOTALL ) CODE_WRAP = '<pre><code%s>%s</code></pre>' LANG_TAG = ' class="%s"' -class FencedCodeExtension(markdown.Extension): +class FencedCodeExtension(Extension): def extendMarkdown(self, md, md_globals): """ Add FencedBlockPreprocessor to the Markdown instance. """ @@ -95,13 +96,13 @@ def extendMarkdown(self, md, md_globals): md.preprocessors.add('fenced_code_block', FencedBlockPreprocessor(md), - "_begin") + ">normalize_whitespace") -class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): +class FencedBlockPreprocessor(Preprocessor): def __init__(self, md): - markdown.preprocessors.Preprocessor.__init__(self, md) + super(FencedBlockPreprocessor, self).__init__(md) self.checked_for_codehilite = False self.codehilite_conf = {} @@ -130,7 +131,7 @@ def run(self, lines): # is enabled, so we call it to highlite the code if self.codehilite_conf: highliter = CodeHilite(m.group('code'), - linenos=self.codehilite_conf['force_linenos'][0], + linenums=self.codehilite_conf['linenums'][0], guess_lang=self.codehilite_conf['guess_lang'][0], css_class=self.codehilite_conf['css_class'][0], style=self.codehilite_conf['pygments_style'][0], @@ -158,8 +159,3 @@ def _escape(self, txt): def makeExtension(configs=None): return FencedCodeExtension(configs=configs) - - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/markdown_python/extensions/footnotes.py b/markdown_python/extensions/footnotes.py old mode 100644 new mode 100755 index 3d83807..65ed597 --- a/markdown_python/extensions/footnotes.py +++ b/markdown_python/extensions/footnotes.py @@ -23,16 +23,23 @@ """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +from ..inlinepatterns import Pattern +from ..treeprocessors import Treeprocessor +from ..postprocessors import Postprocessor +from ..util import etree, text_type +from ..odict import OrderedDict import re -import markdown -from markdown.util import etree FN_BACKLINK_TEXT = "zz1337820767766393qq" NBSP_PLACEHOLDER = "qq3936677670287331zz" DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)') TABBED_RE = re.compile(r'((\t)|( ))(.*)') -class FootnoteExtension(markdown.Extension): +class FootnoteExtension(Extension): """ Footnote Extension. """ def __init__ (self, configs): @@ -61,6 +68,10 @@ def extendMarkdown(self, md, md_globals): """ Add pieces to Markdown. """ md.registerExtension(self) self.parser = md.parser + self.md = md + self.sep = ':' + if self.md.output_format in ['html5', 'xhtml5']: + self.sep = '-' # Insert a preprocessor before ReferencePreprocessor md.preprocessors.add("footnote", FootnotePreprocessor(self), "<reference") @@ -79,7 +90,7 @@ def extendMarkdown(self, md, md_globals): def reset(self): """ Clear the footnotes on reset, and prepare for a distinct document. """ - self.footnotes = markdown.odict.OrderedDict() + self.footnotes = OrderedDict() self.unique_prefix += 1 def findFootnotesPlaceholder(self, root): @@ -105,26 +116,26 @@ def setFootnote(self, id, text): def makeFootnoteId(self, id): """ Return footnote link id. """ if self.getConfig("UNIQUE_IDS"): - return 'fn:%d-%s' % (self.unique_prefix, id) + return 'fn%s%d-%s' % (self.sep, self.unique_prefix, id) else: - return 'fn:%s' % id + return 'fn%s%s' % (self.sep, id) def makeFootnoteRefId(self, id): """ Return footnote back-link id. """ if self.getConfig("UNIQUE_IDS"): - return 'fnref:%d-%s' % (self.unique_prefix, id) + return 'fnref%s%d-%s' % (self.sep, self.unique_prefix, id) else: - return 'fnref:%s' % id + return 'fnref%s%s' % (self.sep, id) def makeFootnotesDiv(self, root): """ Return div of footnotes as et Element. """ - if not self.footnotes.keys(): + if not list(self.footnotes.keys()): return None div = etree.Element("div") div.set('class', 'footnote') - hr = etree.SubElement(div, "hr") + etree.SubElement(div, "hr") ol = etree.SubElement(div, "ol") for id in self.footnotes.keys(): @@ -133,7 +144,9 @@ def makeFootnotesDiv(self, root): self.parser.parseChunk(li, self.footnotes[id]) backlink = etree.Element("a") backlink.set("href", "#" + self.makeFootnoteRefId(id)) - backlink.set("rev", "footnote") + if self.md.output_format not in ['html5', 'xhtml5']: + backlink.set("rev", "footnote") # Invalid in HTML5 + backlink.set("class", "footnote-backref") backlink.set("title", "Jump back to footnote %d in the text" % \ (self.footnotes.index(id)+1)) backlink.text = FN_BACKLINK_TEXT @@ -149,7 +162,7 @@ def makeFootnotesDiv(self, root): return div -class FootnotePreprocessor(markdown.preprocessors.Preprocessor): +class FootnotePreprocessor(Preprocessor): """ Find all footnote references and store for later use. """ def __init__ (self, footnotes): @@ -168,7 +181,6 @@ def run(self, lines): """ newlines = [] i = 0 - #import pdb; pdb.set_trace() #for i, line in enumerate(lines): while True: m = DEF_RE.match(lines[i]) if m: @@ -241,11 +253,11 @@ def detab(line): return items, i -class FootnotePattern(markdown.inlinepatterns.Pattern): +class FootnotePattern(Pattern): """ InlinePattern for footnote markers in a document's body text. """ def __init__(self, pattern, footnotes): - markdown.inlinepatterns.Pattern.__init__(self, pattern) + super(FootnotePattern, self).__init__(pattern) self.footnotes = footnotes def handleMatch(self, m): @@ -255,14 +267,16 @@ def handleMatch(self, m): a = etree.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) - a.set('rel', 'footnote') - a.text = unicode(self.footnotes.footnotes.index(id) + 1) + if self.footnotes.md.output_format not in ['html5', 'xhtml5']: + a.set('rel', 'footnote') # invalid in HTML5 + a.set('class', 'footnote-ref') + a.text = text_type(self.footnotes.footnotes.index(id) + 1) return sup else: return None -class FootnoteTreeprocessor(markdown.treeprocessors.Treeprocessor): +class FootnoteTreeprocessor(Treeprocessor): """ Build and append footnote div to end of document. """ def __init__ (self, footnotes): @@ -284,7 +298,7 @@ def run(self, root): else: root.append(footnotesDiv) -class FootnotePostprocessor(markdown.postprocessors.Postprocessor): +class FootnotePostprocessor(Postprocessor): """ Replace placeholders with html entities. """ def __init__(self, footnotes): self.footnotes = footnotes diff --git a/markdown_python/extensions/headerid.py b/markdown_python/extensions/headerid.py old mode 100644 new mode 100755 index b0e37e2..7681b8d --- a/markdown_python/extensions/headerid.py +++ b/markdown_python/extensions/headerid.py @@ -1,5 +1,3 @@ -#!/usr/bin/python - """ HeaderID Extension for Python-Markdown ====================================== @@ -65,21 +63,22 @@ Copyright 2007-2011 [Waylan Limberg](http://achinghead.com/). -Project website: <http://www.freewisdom.org/project/python-markdown/HeaderId> +Project website: <http://packages.python.org/Markdown/extensions/header_id.html> Contact: markdown@freewisdom.org License: BSD (see ../docs/LICENSE for details) Dependencies: * [Python 2.3+](http://python.org) -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.0+](http://packages.python.org/Markdown/) """ -import markdown -from markdown.util import etree +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor import re -from string import ascii_lowercase, digits, punctuation import logging import unicodedata @@ -97,13 +96,13 @@ def slugify(value, separator): def unique(id, ids): """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ - while id in ids: + while id in ids or not id: m = IDCOUNT_RE.match(id) if m: id = '%s_%d'% (m.group(1), int(m.group(2))+1) else: id = '%s_%d'% (id, 1) - ids.append(id) + ids.add(id) return id @@ -122,7 +121,7 @@ def itertext(elem): yield e.tail -class HeaderIdTreeprocessor(markdown.treeprocessors.Treeprocessor): +class HeaderIdTreeprocessor(Treeprocessor): """ Assign IDs to headers. """ IDs = set() @@ -135,7 +134,7 @@ def run(self, doc): if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if force_id: if "id" in elem.attrib: - id = elem.id + id = elem.get('id') else: id = slugify(''.join(itertext(elem)), sep) elem.set('id', unique(id, self.IDs)) @@ -151,9 +150,9 @@ def _get_meta(self): level = int(self.config['level']) - 1 force = self._str2bool(self.config['forceid']) if hasattr(self.md, 'Meta'): - if self.md.Meta.has_key('header_level'): + if 'header_level' in self.md.Meta: level = int(self.md.Meta['header_level'][0]) - 1 - if self.md.Meta.has_key('header_forceid'): + if 'header_forceid' in self.md.Meta: force = self._str2bool(self.md.Meta['header_forceid'][0]) return level, force @@ -167,7 +166,7 @@ def _str2bool(self, s, default=False): return default -class HeaderIdExtension (markdown.Extension): +class HeaderIdExtension(Extension): def __init__(self, configs): # set defaults self.config = { @@ -185,17 +184,16 @@ def extendMarkdown(self, md, md_globals): self.processor = HeaderIdTreeprocessor() self.processor.md = md self.processor.config = self.getConfigs() - # Replace existing hasheader in place. - md.treeprocessors.add('headerid', self.processor, '>inline') + if 'attr_list' in md.treeprocessors.keys(): + # insert after attr_list treeprocessor + md.treeprocessors.add('headerid', self.processor, '>attr_list') + else: + # insert after 'prettify' treeprocessor. + md.treeprocessors.add('headerid', self.processor, '>prettify') def reset(self): - self.processor.IDs = [] + self.processor.IDs = set() def makeExtension(configs=None): return HeaderIdExtension(configs=configs) - -if __name__ == "__main__": - import doctest - doctest.testmod() - diff --git a/markdown_python/extensions/html_tidy.py b/markdown_python/extensions/html_tidy.py deleted file mode 100644 index 6aee083..0000000 --- a/markdown_python/extensions/html_tidy.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python - -""" -HTML Tidy Extension for Python-Markdown -======================================= - -Runs [HTML Tidy][] on the output of Python-Markdown using the [uTidylib][] -Python wrapper. Both libtidy and uTidylib must be installed on your system. - -Note than any Tidy [options][] can be passed in as extension configs. So, -for example, to output HTML rather than XHTML, set ``output_xhtml=0``. To -indent the output, set ``indent=auto`` and to have Tidy wrap the output in -``<html>`` and ``<body>`` tags, set ``show_body_only=0``. - -[HTML Tidy]: http://tidy.sourceforge.net/ -[uTidylib]: http://utidylib.berlios.de/ -[options]: http://tidy.sourceforge.net/docs/quickref.html - -Copyright (c)2008 [Waylan Limberg](http://achinghead.com) - -License: [BSD](http://www.opensource.org/licenses/bsd-license.php) - -Dependencies: -* [Python2.3+](http://python.org) -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) -* [HTML Tidy](http://utidylib.berlios.de/) -* [uTidylib](http://utidylib.berlios.de/) - -""" - -import markdown -try: - import tidy -except ImportError: - tidy = None - -class TidyExtension(markdown.Extension): - - def __init__(self, configs): - # Set defaults to match typical markdown behavior. - self.config = dict(output_xhtml=1, - show_body_only=1, - char_encoding='utf8' - ) - # Merge in user defined configs overriding any present if nessecary. - for c in configs: - self.config[c[0]] = c[1] - - def extendMarkdown(self, md, md_globals): - # Save options to markdown instance - md.tidy_options = self.config - # Add TidyProcessor to postprocessors - if tidy: - md.postprocessors['tidy'] = TidyProcessor(md) - - -class TidyProcessor(markdown.postprocessors.Postprocessor): - - def run(self, text): - # Pass text to Tidy. As Tidy does not accept unicode we need to encode - # it and decode its return value. - enc = self.markdown.tidy_options.get('char_encoding', 'utf8') - return unicode(tidy.parseString(text.encode(enc), - **self.markdown.tidy_options), - encoding=enc) - - -def makeExtension(configs=None): - return TidyExtension(configs=configs) diff --git a/markdown_python/extensions/meta.py b/markdown_python/extensions/meta.py old mode 100644 new mode 100755 index a3407da..aaff436 --- a/markdown_python/extensions/meta.py +++ b/markdown_python/extensions/meta.py @@ -1,5 +1,3 @@ -#!usr/bin/python - """ Meta Data Extension for Python-Markdown ======================================= @@ -34,21 +32,24 @@ Copyright 2007-2008 [Waylan Limberg](http://achinghead.com). -Project website: <http://www.freewisdom.org/project/python-markdown/Meta-Data> +Project website: <http://packages.python.org/Markdown/meta_data.html> Contact: markdown@freewisdom.org -License: BSD (see ../docs/LICENSE for details) +License: BSD (see ../LICENSE.md for details) """ -import re -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..preprocessors import Preprocessor +import re # Global Vars META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)') META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)') -class MetaExtension (markdown.Extension): +class MetaExtension (Extension): """ Meta-Data extension for Python-Markdown. """ def extendMarkdown(self, md, md_globals): @@ -57,7 +58,7 @@ def extendMarkdown(self, md, md_globals): md.preprocessors.add("meta", MetaPreprocessor(md), "_begin") -class MetaPreprocessor(markdown.preprocessors.Preprocessor): +class MetaPreprocessor(Preprocessor): """ Get Meta-Data. """ def run(self, lines): @@ -90,7 +91,3 @@ def run(self, lines): def makeExtension(configs={}): return MetaExtension(configs=configs) - -if __name__ == "__main__": - import doctest - doctest.testmod() diff --git a/markdown_python/extensions/nl2br.py b/markdown_python/extensions/nl2br.py old mode 100644 new mode 100755 index 5ba08a9..da4b339 --- a/markdown_python/extensions/nl2br.py +++ b/markdown_python/extensions/nl2br.py @@ -3,7 +3,7 @@ =============== A Python-Markdown extension to treat newlines as hard breaks; like -StackOverflow and GitHub flavored Markdown do. +GitHub-flavored Markdown does. Usage: @@ -16,21 +16,23 @@ Dependencies: * [Python 2.4+](http://python.org) -* [Markdown 2.1+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.1+](http://packages.python.org/Markdown/) """ -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import SubstituteTagPattern BR_RE = r'\n' -class Nl2BrExtension(markdown.Extension): +class Nl2BrExtension(Extension): def extendMarkdown(self, md, md_globals): - br_tag = markdown.inlinepatterns.SubstituteTagPattern(BR_RE, 'br') + br_tag = SubstituteTagPattern(BR_RE, 'br') md.inlinePatterns.add('nl', br_tag, '_end') def makeExtension(configs=None): return Nl2BrExtension(configs) - diff --git a/markdown_python/extensions/rss.py b/markdown_python/extensions/rss.py deleted file mode 100644 index ae43220..0000000 --- a/markdown_python/extensions/rss.py +++ /dev/null @@ -1,114 +0,0 @@ -import markdown -from markdown.util import etree - -DEFAULT_URL = "http://www.freewisdom.org/projects/python-markdown/" -DEFAULT_CREATOR = "Yuri Takhteyev" -DEFAULT_TITLE = "Markdown in Python" -GENERATOR = "http://www.freewisdom.org/projects/python-markdown/markdown2rss" - -month_map = { "Jan" : "01", - "Feb" : "02", - "March" : "03", - "April" : "04", - "May" : "05", - "June" : "06", - "July" : "07", - "August" : "08", - "September" : "09", - "October" : "10", - "November" : "11", - "December" : "12" } - -def get_time(heading): - - heading = heading.split("-")[0] - heading = heading.strip().replace(",", " ").replace(".", " ") - - month, date, year = heading.split() - month = month_map[month] - - return rdftime(" ".join((month, date, year, "12:00:00 AM"))) - -def rdftime(time): - - time = time.replace(":", " ") - time = time.replace("/", " ") - time = time.split() - return "%s-%s-%sT%s:%s:%s-08:00" % (time[0], time[1], time[2], - time[3], time[4], time[5]) - - -def get_date(text): - return "date" - -class RssExtension (markdown.Extension): - - def extendMarkdown(self, md, md_globals): - - self.config = { 'URL' : [DEFAULT_URL, "Main URL"], - 'CREATOR' : [DEFAULT_CREATOR, "Feed creator's name"], - 'TITLE' : [DEFAULT_TITLE, "Feed title"] } - - md.xml_mode = True - - # Insert a tree-processor that would actually add the title tag - treeprocessor = RssTreeProcessor(md) - treeprocessor.ext = self - md.treeprocessors['rss'] = treeprocessor - md.stripTopLevelTags = 0 - md.docType = '<?xml version="1.0" encoding="utf-8"?>\n' - -class RssTreeProcessor(markdown.treeprocessors.Treeprocessor): - - def run (self, root): - - rss = etree.Element("rss") - rss.set("version", "2.0") - - channel = etree.SubElement(rss, "channel") - - for tag, text in (("title", self.ext.getConfig("TITLE")), - ("link", self.ext.getConfig("URL")), - ("description", None)): - - element = etree.SubElement(channel, tag) - element.text = text - - for child in root: - - if child.tag in ["h1", "h2", "h3", "h4", "h5"]: - - heading = child.text.strip() - item = etree.SubElement(channel, "item") - link = etree.SubElement(item, "link") - link.text = self.ext.getConfig("URL") - title = etree.SubElement(item, "title") - title.text = heading - - guid = ''.join([x for x in heading if x.isalnum()]) - guidElem = etree.SubElement(item, "guid") - guidElem.text = guid - guidElem.set("isPermaLink", "false") - - elif child.tag in ["p"]: - try: - description = etree.SubElement(item, "description") - except UnboundLocalError: - # Item not defined - moving on - pass - else: - if len(child): - content = "\n".join([etree.tostring(node) - for node in child]) - else: - content = child.text - pholder = self.markdown.htmlStash.store( - "<![CDATA[ %s]]>" % content) - description.text = pholder - - return rss - - -def makeExtension(configs): - - return RssExtension(configs) diff --git a/markdown_python/extensions/sane_lists.py b/markdown_python/extensions/sane_lists.py old mode 100644 new mode 100755 index dce04ea..23e9a7f --- a/markdown_python/extensions/sane_lists.py +++ b/markdown_python/extensions/sane_lists.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Sane List Extension for Python-Markdown ======================================= @@ -19,23 +18,26 @@ """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import OListProcessor, UListProcessor import re -import markdown -class SaneOListProcessor(markdown.blockprocessors.OListProcessor): +class SaneOListProcessor(OListProcessor): CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.))[ ]+(.*)') SIBLING_TAGS = ['ol'] -class SaneUListProcessor(markdown.blockprocessors.UListProcessor): +class SaneUListProcessor(UListProcessor): CHILD_RE = re.compile(r'^[ ]{0,3}(([*+-]))[ ]+(.*)') SIBLING_TAGS = ['ul'] -class SaneListExtension(markdown.Extension): +class SaneListExtension(Extension): """ Add sane lists to Markdown. """ def extendMarkdown(self, md, md_globals): diff --git a/markdown_python/extensions/smart_strong.py b/markdown_python/extensions/smart_strong.py old mode 100644 new mode 100755 index 3ed3560..4818cf9 --- a/markdown_python/extensions/smart_strong.py +++ b/markdown_python/extensions/smart_strong.py @@ -22,14 +22,15 @@ ''' -import re -import markdown -from markdown.inlinepatterns import SimpleTagPattern +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import SimpleTagPattern SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)' STRONG_RE = r'(\*{2})(.+?)\2' -class SmartEmphasisExtension(markdown.extensions.Extension): +class SmartEmphasisExtension(Extension): """ Add smart_emphasis extension to Markdown class.""" def extendMarkdown(self, md, md_globals): @@ -39,7 +40,3 @@ def extendMarkdown(self, md, md_globals): def makeExtension(configs={}): return SmartEmphasisExtension(configs=dict(configs)) - -if __name__ == '__main__': - import doctest - doctest.testmod() diff --git a/markdown_python/extensions/tables.py b/markdown_python/extensions/tables.py old mode 100644 new mode 100755 index 1388cb5..ad52ec1 --- a/markdown_python/extensions/tables.py +++ b/markdown_python/extensions/tables.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Tables Extension for Python-Markdown ==================================== @@ -14,11 +13,14 @@ Copyright 2009 - [Waylan Limberg](http://achinghead.com) """ -import markdown -from markdown.util import etree +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..blockprocessors import BlockProcessor +from ..util import etree -class TableProcessor(markdown.blockprocessors.BlockProcessor): +class TableProcessor(BlockProcessor): """ Process Tables. """ def test(self, parent, block): @@ -84,7 +86,7 @@ def _split_row(self, row, border): return row.split('|') -class TableExtension(markdown.Extension): +class TableExtension(Extension): """ Add tables to Markdown. """ def extendMarkdown(self, md, md_globals): diff --git a/markdown_python/extensions/toc.py b/markdown_python/extensions/toc.py old mode 100644 new mode 100755 index f00a249..73b0844 --- a/markdown_python/extensions/toc.py +++ b/markdown_python/extensions/toc.py @@ -5,46 +5,138 @@ (c) 2008 [Jack Miller](http://codezen.org) Dependencies: -* [Markdown 2.1+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.1+](http://packages.python.org/Markdown/) """ -import markdown -from markdown.util import etree -from markdown.extensions.headerid import slugify, unique, itertext +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import etree +from .headerid import slugify, unique, itertext import re -class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): +def order_toc_list(toc_list): + """Given an unsorted list with errors and skips, return a nested one. + [{'level': 1}, {'level': 2}] + => + [{'level': 1, 'children': [{'level': 2, 'children': []}]}] + + A wrong list is also converted: + [{'level': 2}, {'level': 1}] + => + [{'level': 2, 'children': []}, {'level': 1, 'children': []}] + """ + + def build_correct(remaining_list, prev_elements=[{'level': 1000}]): + + if not remaining_list: + return [], [] + + current = remaining_list.pop(0) + if not 'children' in current.keys(): + current['children'] = [] + + if not prev_elements: + # This happens for instance with [8, 1, 1], ie. when some + # header level is outside a scope. We treat it as a + # top-level + next_elements, children = build_correct(remaining_list, [current]) + current['children'].append(children) + return [current] + next_elements, [] + + prev_element = prev_elements.pop() + children = [] + next_elements = [] + # Is current part of the child list or next list? + if current['level'] > prev_element['level']: + #print "%d is a child of %d" % (current['level'], prev_element['level']) + prev_elements.append(prev_element) + prev_elements.append(current) + prev_element['children'].append(current) + next_elements2, children2 = build_correct(remaining_list, prev_elements) + children += children2 + next_elements += next_elements2 + else: + #print "%d is ancestor of %d" % (current['level'], prev_element['level']) + if not prev_elements: + #print "No previous elements, so appending to the next set" + next_elements.append(current) + prev_elements = [current] + next_elements2, children2 = build_correct(remaining_list, prev_elements) + current['children'].extend(children2) + else: + #print "Previous elements, comparing to those first" + remaining_list.insert(0, current) + next_elements2, children2 = build_correct(remaining_list, prev_elements) + children.extend(children2) + next_elements += next_elements2 + + return next_elements, children + + ordered_list, __ = build_correct(toc_list) + return ordered_list + + +class TocTreeprocessor(Treeprocessor): + # Iterator wrapper to get parent and child all at once def iterparent(self, root): for parent in root.getiterator(): for child in parent: yield parent, child - - def run(self, doc): - marker_found = False - - div = etree.Element("div") - div.attrib["class"] = "toc" - last_li = None - + + def add_anchor(self, c, elem_id): #@ReservedAssignment + if self.use_anchors: + anchor = etree.Element("a") + anchor.text = c.text + anchor.attrib["href"] = "#" + elem_id + anchor.attrib["class"] = "toclink" + c.text = "" + for elem in c.getchildren(): + anchor.append(elem) + c.remove(elem) + c.append(anchor) + + def build_toc_etree(self, div, toc_list): # Add title to the div if self.config["title"]: header = etree.SubElement(div, "span") header.attrib["class"] = "toctitle" header.text = self.config["title"] - level = 0 - list_stack=[div] - header_rgx = re.compile("[Hh][123456]") + def build_etree_ul(toc_list, parent): + ul = etree.SubElement(parent, "ul") + for item in toc_list: + # List item link, to be inserted into the toc div + li = etree.SubElement(ul, "li") + link = etree.SubElement(li, "a") + link.text = item.get('name', '') + link.attrib["href"] = '#' + item.get('id', '') + if item['children']: + build_etree_ul(item['children'], li) + return ul + + return build_etree_ul(toc_list, div) + + def run(self, doc): + div = etree.Element("div") + div.attrib["class"] = "toc" + header_rgx = re.compile("[Hh][123456]") + + self.use_anchors = self.config["anchorlink"] in [1, '1', True, 'True', 'true'] + # Get a list of id attributes - used_ids = [] + used_ids = set() for c in doc.getiterator(): if "id" in c.attrib: - used_ids.append(c.attrib["id"]) + used_ids.add(c.attrib["id"]) + toc_list = [] + marker_found = False for (p, c) in self.iterparent(doc): text = ''.join(itertext(c)).strip() if not text: @@ -56,7 +148,6 @@ def run(self, doc): # We do not allow the marker inside a header as that # would causes an enless loop of placing a new TOC # inside previously generated TOC. - if c.text and c.text.strip() == self.config["marker"] and \ not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']: for i in range(len(p)): @@ -64,66 +155,41 @@ def run(self, doc): p[i] = div break marker_found = True - + if header_rgx.match(c.tag): - try: - tag_level = int(c.tag[-1]) - - while tag_level < level: - list_stack.pop() - level -= 1 - - if tag_level > level: - newlist = etree.Element("ul") - if last_li: - last_li.append(newlist) - else: - list_stack[-1].append(newlist) - list_stack.append(newlist) - if level == 0: - level = tag_level - else: - level += 1 - - # Do not override pre-existing ids - if not "id" in c.attrib: - id = unique(self.config["slugify"](text, '-'), used_ids) - c.attrib["id"] = id - else: - id = c.attrib["id"] - - # List item link, to be inserted into the toc div - last_li = etree.Element("li") - link = etree.SubElement(last_li, "a") - link.text = text - link.attrib["href"] = '#' + id - - if self.config["anchorlink"] in [1, '1', True, 'True', 'true']: - anchor = etree.Element("a") - anchor.text = c.text - anchor.attrib["href"] = "#" + id - anchor.attrib["class"] = "toclink" - c.text = "" - for elem in c.getchildren(): - anchor.append(elem) - c.remove(elem) - c.append(anchor) - - list_stack[-1].append(last_li) - except IndexError: - # We have bad ordering of headers. Just move on. - pass + + # Do not override pre-existing ids + if not "id" in c.attrib: + elem_id = unique(self.config["slugify"](text, '-'), used_ids) + c.attrib["id"] = elem_id + else: + elem_id = c.attrib["id"] + + tag_level = int(c.tag[-1]) + + toc_list.append({'level': tag_level, + 'id': elem_id, + 'name': text}) + + self.add_anchor(c, elem_id) + + toc_list_nested = order_toc_list(toc_list) + self.build_toc_etree(div, toc_list_nested) + prettify = self.markdown.treeprocessors.get('prettify') + if prettify: prettify.run(div) if not marker_found: - # searialize and attach to markdown instance. - prettify = self.markdown.treeprocessors.get('prettify') - if prettify: prettify.run(div) + # serialize and attach to markdown instance. toc = self.markdown.serializer(div) for pp in self.markdown.postprocessors.values(): toc = pp.run(toc) self.markdown.toc = toc -class TocExtension(markdown.Extension): - def __init__(self, configs): + +class TocExtension(Extension): + + TreeProcessorClass = TocTreeprocessor + + def __init__(self, configs=[]): self.config = { "marker" : ["[TOC]", "Text to find and replace with Table of Contents -" "Defaults to \"[TOC]\""], @@ -141,14 +207,15 @@ def __init__(self, configs): self.setConfig(key, value) def extendMarkdown(self, md, md_globals): - tocext = TocTreeprocessor(md) + tocext = self.TreeProcessorClass(md) tocext.config = self.getConfigs() - # Headerid ext is set to '>inline'. With this set to '<prettify', + # Headerid ext is set to '>prettify'. With this set to '_end', # it should always come after headerid ext (and honor ids assinged # by the header id extension) if both are used. Same goes for # attr_list extension. This must come last because we don't want # to redefine ids after toc is created. But we do want toc prettified. - md.treeprocessors.add("toc", tocext, "<prettify") - + md.treeprocessors.add("toc", tocext, "_end") + + def makeExtension(configs={}): return TocExtension(configs=configs) diff --git a/markdown_python/extensions/wikilinks.py b/markdown_python/extensions/wikilinks.py old mode 100644 new mode 100755 index af43bba..877890b --- a/markdown_python/extensions/wikilinks.py +++ b/markdown_python/extensions/wikilinks.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - ''' WikiLinks Extension for Python-Markdown ====================================== @@ -75,10 +73,14 @@ Dependencies: * [Python 2.3+](http://python.org) -* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/) +* [Markdown 2.0+](http://packages.python.org/Markdown/) ''' -import markdown +from __future__ import absolute_import +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import Pattern +from ..util import etree import re def build_url(label, base, end): @@ -87,7 +89,7 @@ def build_url(label, base, end): return '%s%s%s'% (base, clean_label, end) -class WikiLinkExtension(markdown.Extension): +class WikiLinkExtension(Extension): def __init__(self, configs): # set extension defaults self.config = { @@ -111,9 +113,9 @@ def extendMarkdown(self, md, md_globals): md.inlinePatterns.add('wikilink', wikilinkPattern, "<not_strong") -class WikiLinks(markdown.inlinepatterns.Pattern): +class WikiLinks(Pattern): def __init__(self, pattern, config): - markdown.inlinepatterns.Pattern.__init__(self, pattern) + super(WikiLinks, self).__init__(pattern) self.config = config def handleMatch(self, m): @@ -121,7 +123,7 @@ def handleMatch(self, m): base_url, end_url, html_class = self._getMeta() label = m.group(2).strip() url = self.config['build_url'](label, base_url, end_url) - a = markdown.util.etree.Element('a') + a = etree.Element('a') a.text = label a.set('href', url) if html_class: @@ -136,20 +138,14 @@ def _getMeta(self): end_url = self.config['end_url'] html_class = self.config['html_class'] if hasattr(self.md, 'Meta'): - if self.md.Meta.has_key('wiki_base_url'): + if 'wiki_base_url' in self.md.Meta: base_url = self.md.Meta['wiki_base_url'][0] - if self.md.Meta.has_key('wiki_end_url'): + if 'wiki_end_url' in self.md.Meta: end_url = self.md.Meta['wiki_end_url'][0] - if self.md.Meta.has_key('wiki_html_class'): + if 'wiki_html_class' in self.md.Meta: html_class = self.md.Meta['wiki_html_class'][0] return base_url, end_url, html_class def makeExtension(configs=None) : return WikiLinkExtension(configs=configs) - - -if __name__ == "__main__": - import doctest - doctest.testmod() - diff --git a/markdown_python/inlinepatterns.py b/markdown_python/inlinepatterns.py old mode 100644 new mode 100755 index 6cb4bdc..de957ef --- a/markdown_python/inlinepatterns.py +++ b/markdown_python/inlinepatterns.py @@ -41,18 +41,19 @@ * finally we apply strong and emphasis """ -import util -import odict +from __future__ import absolute_import +from __future__ import unicode_literals +from . import util +from . import odict import re -from urlparse import urlparse, urlunparse -import sys -# If you see an ImportError for htmlentitydefs after using 2to3 to convert for -# use by Python3, then you are probably using the buggy version from Python 3.0. -# We recomend using the tool from Python 3.1 even if you will be running the -# code on Python 3.0. The following line should be converted by the tool to: -# `from html import entities` and later calls to `htmlentitydefs` should be -# changed to call `entities`. Python 3.1's tool does this but 3.0's does not. -import htmlentitydefs +try: + from urllib.parse import urlparse, urlunparse +except ImportError: + from urlparse import urlparse, urlunparse +try: + from html import entities +except ImportError: + import htmlentitydefs as entities def build_inlinepatterns(md_instance, **kwargs): @@ -69,7 +70,6 @@ def build_inlinepatterns(md_instance, **kwargs): ReferencePattern(SHORT_REF_RE, md_instance) inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) - inlinePatterns["linebreak2"] = SubstituteTagPattern(LINE_BREAK_2_RE, 'br') inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') if md_instance.safeMode != 'escape': inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) @@ -119,7 +119,6 @@ def build_inlinepatterns(md_instance, **kwargs): HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...> ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & LINE_BREAK_RE = r' \n' # two spaces at end of line -LINE_BREAK_2_RE = r' $' # two spaces at end of text def dequote(string): @@ -144,7 +143,7 @@ def attributeCallback(match): ----------------------------------------------------------------------------- """ -class Pattern: +class Pattern(object): """Base class that inline patterns subclass. """ def __init__(self, pattern, markdown_instance=None): @@ -191,10 +190,27 @@ def unescape(self, text): stash = self.markdown.treeprocessors['inline'].stashed_nodes except KeyError: return text + def itertext(el): + ' Reimplement Element.itertext for older python versions ' + tag = el.tag + if not isinstance(tag, util.string_type) and tag is not None: + return + if el.text: + yield el.text + for e in el: + for s in itertext(e): + yield s + if e.tail: + yield e.tail def get_stash(m): id = m.group(1) if id in stash: - return stash.get(id) + value = stash.get(id) + if isinstance(value, util.string_type): + return value + else: + # An etree Element - return text content only + return ''.join(itertext(value)) return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) @@ -235,7 +251,7 @@ def handleMatch(self, m): class SubstituteTagPattern(SimpleTagPattern): - """ Return a eLement of type `tag` with no children. """ + """ Return an element of type `tag` with no children. """ def handleMatch (self, m): return util.etree.Element(self.tag) @@ -328,6 +344,7 @@ def sanitize_url(self, url): `username:password@host:port`. """ + url = url.replace(' ', '%20') if not self.markdown.safeMode: # Return immediately bipassing parsing. return url @@ -339,14 +356,18 @@ def sanitize_url(self, url): return '' locless_schemes = ['', 'mailto', 'news'] + allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] + if scheme not in allowed_schemes: + # Not a known (allowed) scheme. Not safe. + return '' + if netloc == '' and scheme not in locless_schemes: - # This fails regardless of anything else. - # Return immediately to save additional proccessing + # This should not happen. Treat as suspect. return '' for part in url[2:]: if ":" in part: - # Not a safe url + # A colon in "path", "parameters", "query" or "fragment" is suspect. return '' # Url passes all tests. Return url as-is. @@ -372,7 +393,7 @@ def handleMatch(self, m): else: truealt = m.group(2) - el.set('alt', truealt) + el.set('alt', self.unescape(truealt)) return el class ReferencePattern(LinkPattern): @@ -417,7 +438,11 @@ def makeTag(self, href, title, text): el.set("src", self.sanitize_url(href)) if title: el.set("title", title) - el.set("alt", text) + + if self.markdown.enable_attributes: + text = handleAttributes(text, el) + + el.set("alt", self.unescape(text)) return el @@ -441,7 +466,7 @@ def handleMatch(self, m): def codepoint2name(code): """Return entity definition by code, or the code if not defined.""" - entity = htmlentitydefs.codepoint2name.get(code) + entity = entities.codepoint2name.get(code) if entity: return "%s%s;" % (util.AMP_SUBSTITUTE, entity) else: diff --git a/markdown_python/odict.py b/markdown_python/odict.py old mode 100644 new mode 100755 index bf3ef07..8089ece --- a/markdown_python/odict.py +++ b/markdown_python/odict.py @@ -1,3 +1,14 @@ +from __future__ import unicode_literals +from __future__ import absolute_import +from . import util + +from copy import deepcopy + +def iteritems_compat(d): + """Return an iterator over the (key, value) pairs of a dictionary. + Copied from `six` module.""" + return iter(getattr(d, _iteritems)()) + class OrderedDict(dict): """ A dictionary that keeps its keys in the order in which they're inserted. @@ -11,34 +22,44 @@ def __new__(cls, *args, **kwargs): return instance def __init__(self, data=None): - if data is None: - data = {} - super(OrderedDict, self).__init__(data) - if isinstance(data, dict): - self.keyOrder = data.keys() + if data is None or isinstance(data, dict): + data = data or [] + super(OrderedDict, self).__init__(data) + self.keyOrder = list(data) if data else [] else: - self.keyOrder = [] + super(OrderedDict, self).__init__() + super_set = super(OrderedDict, self).__setitem__ for key, value in data: - if key not in self.keyOrder: + # Take the ordering from first key + if key not in self: self.keyOrder.append(key) + # But override with last value in data (dict() does this) + super_set(key, value) def __deepcopy__(self, memo): - from copy import deepcopy return self.__class__([(key, deepcopy(value, memo)) - for key, value in self.iteritems()]) + for key, value in self.items()]) + + def __copy__(self): + # The Python's default copy implementation will alter the state + # of self. The reason for this seems complex but is likely related to + # subclassing dict. + return self.copy() def __setitem__(self, key, value): - super(OrderedDict, self).__setitem__(key, value) - if key not in self.keyOrder: + if key not in self: self.keyOrder.append(key) + super(OrderedDict, self).__setitem__(key, value) def __delitem__(self, key): super(OrderedDict, self).__delitem__(key) self.keyOrder.remove(key) def __iter__(self): - for k in self.keyOrder: - yield k + return iter(self.keyOrder) + + def __reversed__(self): + return reversed(self.keyOrder) def pop(self, k, *args): result = super(OrderedDict, self).pop(k, *args) @@ -54,41 +75,51 @@ def popitem(self): self.keyOrder.remove(result[0]) return result - def items(self): - return zip(self.keyOrder, self.values()) + def _iteritems(self): + for key in self.keyOrder: + yield key, self[key] - def iteritems(self): + def _iterkeys(self): for key in self.keyOrder: - yield key, super(OrderedDict, self).__getitem__(key) + yield key - def keys(self): - return self.keyOrder[:] + def _itervalues(self): + for key in self.keyOrder: + yield self[key] - def iterkeys(self): - return iter(self.keyOrder) + if util.PY3: + items = _iteritems + keys = _iterkeys + values = _itervalues + else: + iteritems = _iteritems + iterkeys = _iterkeys + itervalues = _itervalues - def values(self): - return [super(OrderedDict, self).__getitem__(k) for k in self.keyOrder] + def items(self): + return [(k, self[k]) for k in self.keyOrder] - def itervalues(self): - for key in self.keyOrder: - yield super(OrderedDict, self).__getitem__(key) + def keys(self): + return self.keyOrder[:] + + def values(self): + return [self[k] for k in self.keyOrder] def update(self, dict_): - for k, v in dict_.items(): - self.__setitem__(k, v) + for k, v in iteritems_compat(dict_): + self[k] = v def setdefault(self, key, default): - if key not in self.keyOrder: + if key not in self: self.keyOrder.append(key) return super(OrderedDict, self).setdefault(key, default) def value_for_index(self, index): - """Return the value of the item at the given zero-based index.""" + """Returns the value of the item at the given zero-based index.""" return self[self.keyOrder[index]] def insert(self, index, key, value): - """Insert the key, value pair before the item with the given index.""" + """Inserts the key, value pair before the item with the given index.""" if key in self.keyOrder: n = self.keyOrder.index(key) del self.keyOrder[n] @@ -98,18 +129,16 @@ def insert(self, index, key, value): super(OrderedDict, self).__setitem__(key, value) def copy(self): - """Return a copy of this object.""" + """Returns a copy of this object.""" # This way of initializing the copy means it works for subclasses, too. - obj = self.__class__(self) - obj.keyOrder = self.keyOrder[:] - return obj + return self.__class__(self) def __repr__(self): """ - Replace the normal dict.__repr__ with a version that returns the keys - in their sorted order. + Replaces the normal dict.__repr__ with a version that returns the keys + in their Ordered order. """ - return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()]) + return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in iteritems_compat(self)]) def clear(self): super(OrderedDict, self).clear() @@ -117,7 +146,10 @@ def clear(self): def index(self, key): """ Return the index of a given key. """ - return self.keyOrder.index(key) + try: + return self.keyOrder.index(key) + except ValueError: + raise ValueError("Element '%s' was not found in OrderedDict" % key) def index_for_location(self, location): """ Return index or None for a given location. """ @@ -150,13 +182,13 @@ def link(self, key, location): """ Change location of an existing item. """ n = self.keyOrder.index(key) del self.keyOrder[n] - i = self.index_for_location(location) try: + i = self.index_for_location(location) if i is not None: self.keyOrder.insert(i, key) else: self.keyOrder.append(key) - except Error: + except Exception as e: # restore to prevent data loss and reraise self.keyOrder.insert(n, key) - raise Error + raise e diff --git a/markdown_python/postprocessors.py b/markdown_python/postprocessors.py old mode 100644 new mode 100755 index 071791a..5f3f032 --- a/markdown_python/postprocessors.py +++ b/markdown_python/postprocessors.py @@ -8,9 +8,12 @@ """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import util +from . import odict import re -import util -import odict + def build_postprocessors(md_instance, **kwargs): """ Build the default postprocessors for Markdown. """ @@ -95,7 +98,7 @@ class UnescapePostprocessor(Postprocessor): RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) def unescape(self, m): - return unichr(int(m.group(1))) + return util.int2str(int(m.group(1))) def run(self, text): return self.RE.sub(self.unescape, text) diff --git a/markdown_python/preprocessors.py b/markdown_python/preprocessors.py old mode 100644 new mode 100755 index 55dd9ab..72b2ed6 --- a/markdown_python/preprocessors.py +++ b/markdown_python/preprocessors.py @@ -6,14 +6,17 @@ complicated. """ +from __future__ import absolute_import +from __future__ import unicode_literals +from . import util +from . import odict import re -import util -import odict def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) if md_instance.safeMode != 'escape': preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) @@ -41,6 +44,18 @@ def run(self, lines): pass +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistant parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.markdown.tab_length) + source = re.sub(r'(?<=\n) +\n', '\n', source) + return source.split('\n') + + class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" @@ -127,7 +142,7 @@ def _is_oneliner(self, tag): def run(self, lines): text = "\n".join(lines) new_blocks = [] - text = text.split("\n\n") + text = text.rsplit("\n\n") items = [] left_tag = '' right_tag = '' @@ -257,25 +272,26 @@ def run(self, lines): class ReferencePreprocessor(Preprocessor): """ Remove reference definitions from text and store for later use. """ - RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL) + TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*' + RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL) + TITLE_RE = re.compile(r'^%s$' % TITLE) def run (self, lines): new_text = []; - for line in lines: + while lines: + line = lines.pop(0) m = self.RE.match(line) if m: - id = m.group(2).strip().lower() - link = m.group(3).lstrip('<').rstrip('>') - t = m.group(4).strip() # potential title + id = m.group(1).strip().lower() + link = m.group(2).lstrip('<').rstrip('>') + t = m.group(5) or m.group(6) or m.group(7) if not t: - self.markdown.references[id] = (link, t) - elif (len(t) >= 2 - and (t[0] == t[-1] == "\"" - or t[0] == t[-1] == "\'" - or (t[0] == "(" and t[-1] == ")") ) ): - self.markdown.references[id] = (link, t[1:-1]) - else: - new_text.append(line) + # Check next line for title + tm = self.TITLE_RE.match(lines[0]) + if tm: + lines.pop(0) + t = tm.group(2) or tm.group(3) or tm.group(4) + self.markdown.references[id] = (link, t) else: new_text.append(line) diff --git a/markdown_python/serializers.py b/markdown_python/serializers.py old mode 100644 new mode 100755 index 22a83d4..b19d61c --- a/markdown_python/serializers.py +++ b/markdown_python/serializers.py @@ -37,7 +37,9 @@ # -------------------------------------------------------------------- -import util +from __future__ import absolute_import +from __future__ import unicode_literals +from . import util ElementTree = util.etree.ElementTree QName = util.etree.QName if hasattr(util.etree, 'test_comment'): @@ -251,7 +253,7 @@ def add_qname(qname): tag = elem.tag if isinstance(tag, QName) and tag.text not in qnames: add_qname(tag.text) - elif isinstance(tag, basestring): + elif isinstance(tag, util.string_type): if tag not in qnames: add_qname(tag) elif tag is not None and tag is not Comment and tag is not PI: diff --git a/markdown_python/treeprocessors.py b/markdown_python/treeprocessors.py old mode 100644 new mode 100755 index 3340554..e6d3dc9 --- a/markdown_python/treeprocessors.py +++ b/markdown_python/treeprocessors.py @@ -1,7 +1,8 @@ -import re -import inlinepatterns -import util -import odict +from __future__ import unicode_literals +from __future__ import absolute_import +from . import util +from . import odict +from . import inlinepatterns def build_treeprocessors(md_instance, **kwargs): @@ -15,17 +16,11 @@ def build_treeprocessors(md_instance, **kwargs): def isString(s): """ Check if it's string """ if not isinstance(s, util.AtomicString): - return isinstance(s, basestring) + return isinstance(s, util.string_type) return False -class Processor: - def __init__(self, markdown_instance=None): - if markdown_instance: - self.markdown = markdown_instance - - -class Treeprocessor(Processor): +class Treeprocessor(util.Processor): """ Treeprocessors are run on the ElementTree object before serialization. @@ -304,25 +299,26 @@ def run(self, tree): if child.getchildren(): stack.append(child) - if self.markdown.enable_attributes: - for element, lst in insertQueue: - if element.text: + for element, lst in insertQueue: + if self.markdown.enable_attributes: + if element.text and isString(element.text): element.text = \ inlinepatterns.handleAttributes(element.text, element) - i = 0 - for newChild in lst: + i = 0 + for newChild in lst: + if self.markdown.enable_attributes: # Processing attributes - if newChild.tail: + if newChild.tail and isString(newChild.tail): newChild.tail = \ inlinepatterns.handleAttributes(newChild.tail, element) - if newChild.text: + if newChild.text and isString(newChild.text): newChild.text = \ inlinepatterns.handleAttributes(newChild.text, newChild) - element.insert(i, newChild) - i += 1 + element.insert(i, newChild) + i += 1 return tree @@ -357,3 +353,8 @@ def run(self, root): br.tail = '\n' else: br.tail = '\n%s' % br.tail + # Clean up extra empty lines at end of code blocks. + pres = root.getiterator('pre') + for pre in pres: + if len(pre) and pre[0].tag == 'code': + pre[0].text = pre[0].text.rstrip() + '\n' diff --git a/markdown_python/util.py b/markdown_python/util.py old mode 100644 new mode 100755 index 998211e..1036197 --- a/markdown_python/util.py +++ b/markdown_python/util.py @@ -1,14 +1,24 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re -from logging import CRITICAL - -import etree_loader +import sys """ -CONSTANTS +Python 3 Stuff ============================================================================= """ +PY3 = sys.version_info[0] == 3 + +if PY3: + string_type = str + text_type = str + int2str = chr +else: + string_type = basestring + text_type = unicode + int2str = unichr + """ Constants you might want to modify @@ -17,13 +27,13 @@ BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" "|script|noscript|form|fieldset|iframe|math" - "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" + "|hr|hr/|style|li|dt|dd|thead|tbody" "|tr|th|td|section|footer|header|group|figure" "|figcaption|aside|article|canvas|output" - "|progress|video)$") + "|progress|video)$", re.IGNORECASE) # Placeholders -STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder -ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder +STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder +ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') @@ -34,17 +44,29 @@ ----------------------------------------------------------------------------- """ -RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), +RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), # Hebrew (0590-05FF), Arabic (0600-06FF), # Syriac (0700-074F), Arabic supplement (0750-077F), # Thaana (0780-07BF), Nko (07C0-07FF). - (u'\u2D30', u'\u2D7F'), # Tifinagh + ('\u2D30', '\u2D7F'), # Tifinagh ) # Extensions should use "markdown.util.etree" instead of "etree" (or do `from # markdown.util import etree`). Do not import it by yourself. -etree = etree_loader.importETree() +try: # Is the C implemenation of ElementTree available? + import xml.etree.cElementTree as etree + from xml.etree.ElementTree import Comment + # Serializers (including ours) test with non-c Comment + etree.test_comment = Comment + if etree.VERSION < "1.0.5": + raise RuntimeError("cElementTree version 1.0.5 or higher is required.") +except (ImportError, RuntimeError): + # Use the Python implementation of ElementTree? + import xml.etree.ElementTree as etree + if etree.VERSION < "1.1": + raise RuntimeError("ElementTree version 1.1 or higher is required") + """ AUXILIARY GLOBAL FUNCTIONS @@ -54,7 +76,7 @@ def isBlockLevel(tag): """Check if the tag is a block level HTML tag.""" - if isinstance(tag, basestring): + if isinstance(tag, string_type): return BLOCK_LEVEL_ELEMENTS.match(tag) # Some ElementTree tags are not strings, so return False. return False @@ -64,18 +86,18 @@ def isBlockLevel(tag): ============================================================================= """ -class AtomicString(unicode): +class AtomicString(text_type): """A string which should not be further processed.""" pass -class Processor: +class Processor(object): def __init__(self, markdown_instance=None): if markdown_instance: self.markdown = markdown_instance -class HtmlStash: +class HtmlStash(object): """ This class is used for stashing HTML objects that we extract in the beginning and replace with place-holders. From 646213c69c2f97a8ace81ecce69c19b65b18342a Mon Sep 17 00:00:00 2001 From: Cody Boisclair <cody@zone38.net> Date: Thu, 22 Aug 2013 14:06:10 -0400 Subject: [PATCH 2/2] Use relative import for markdown_python for Sublime 3 compatibility. --- MarkdownBuild.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MarkdownBuild.py b/MarkdownBuild.py index 3a8372f..175277c 100644 --- a/MarkdownBuild.py +++ b/MarkdownBuild.py @@ -1,6 +1,6 @@ import sublime import sublime_plugin -import markdown_python +from .markdown_python import markdown import os import tempfile import webbrowser @@ -27,7 +27,7 @@ def run(self): if not file_name: return contents = view.substr(sublime.Region(0, view.size())) - md = markdown_python.markdown(contents) + md = markdown(contents) html = '<html><meta charset="' + charset + '">' if use_css: css = os.path.join(sublime.packages_path(), 'MarkdownBuild', 'markdown.css')