From b4a09947dc0f6e5ad52a9857fed59f0f6f3916ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Tue, 6 Jun 2017 23:39:13 +0200
Subject: [PATCH] Introduce Jinja2 filters for converting links in additional
 fields.

Until now, the link replacing worked only on article and page
contents or summaries. With this patch, if one needes to replace
links in custom fields, there are two new Jinja2 filters that can do
that. For fields that are referenced in the `FORMATTED_FIELDS` setting,
one can use the `expand_links` Jinja2 filter in the template, passing
the field name as a parameter:

    {{ article|expand_links('legal') }}

If the custom field consists of just one link (for example a link to
article cover image for a social meta tag), one can use the
`expand_link` Jinja2 filter:

    {{ article|expand_link('cover') }}

With the above being in a template and `FORMATTED_FIELDS` setting
containing the `'legal'` field, a RST article making use of both fields
could look like this:

    An article
    ##########

    :date: 2017-06-22
    :legal: This article is released under `CC0 {filename}/license.rst`.
    :cover: {filename}/img/article-cover.jpg
---
 docs/changelog.rst          |   2 +
 docs/content.rst            |  23 +++++++
 pelican/contents.py         | 126 ++++++++++++++++++------------------
 pelican/generators.py       |   9 ++-
 pelican/tests/test_utils.py |  42 ++++++++++++
 pelican/utils.py            |  34 ++++++++++
 6 files changed, 171 insertions(+), 65 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index aa594a2c9..aa036cd8b 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -5,6 +5,8 @@ Next release
 ============
 
 * New signal: ``feed_generated``
+* Introduced ``expand_link`` and ``expand_links`` Jinja2 filters to allow URL
+  replacement in user-defined metadata fields.
 
 3.7.1 (2017-01-10)
 ==================
diff --git a/docs/content.rst b/docs/content.rst
index 507593bf4..9c5a6742e 100644
--- a/docs/content.rst
+++ b/docs/content.rst
@@ -202,6 +202,29 @@ and ``article2.md``::
     [a link relative to the current file]({filename}category/article1.rst)
     [a link relative to the content root]({filename}/category/article1.rst)
 
+The link replacing works by default on article and page contents as well as
+summaries. If you need to replace links in custom formatted fields that are
+referenced in the ``FORMATTED_FIELDS`` setting, use the ``expand_links``
+Jinja2 filter in your template, passing the field name as a parameter::
+
+    {{ article|expand_links('legal') }}
+
+If your custom field consists of just one link (for example a link to article
+cover image for a social meta tag), use the ``expand_link`` Jinja2 filter::
+
+    {{ article|expand_link('cover') }}
+
+With the above being in a template and ``FORMATTED_FIELDS`` setting containing
+the ``'legal'`` field, a RST article making use of both fields could look like
+this::
+
+    An article
+    ##########
+
+    :date: 2017-06-22
+    :legal: This article is released under `CC0 {filename}/license.rst`.
+    :cover: {filename}/img/article-cover.jpg
+
 Linking to static files
 -----------------------
 
diff --git a/pelican/contents.py b/pelican/contents.py
index 3d1128c9b..f29783eeb 100644
--- a/pelican/contents.py
+++ b/pelican/contents.py
@@ -204,6 +204,68 @@ def get_url_setting(self, key):
         key = key if self.in_default_lang else 'lang_%s' % key
         return self._expand_settings(key)
 
+    def _link_replacer(self, siteurl, m):
+        what = m.group('what')
+        value = urlparse(m.group('value'))
+        path = value.path
+        origin = m.group('path')
+
+        # XXX Put this in a different location.
+        if what in {'filename', 'attach'}:
+            if path.startswith('/'):
+                path = path[1:]
+            else:
+                # relative to the source path of this content
+                path = self.get_relative_source_path(
+                    os.path.join(self.relative_dir, path)
+                )
+
+            if path not in self._context['filenames']:
+                unquoted_path = path.replace('%20', ' ')
+
+                if unquoted_path in self._context['filenames']:
+                    path = unquoted_path
+
+            linked_content = self._context['filenames'].get(path)
+            if linked_content:
+                if what == 'attach':
+                    if isinstance(linked_content, Static):
+                        linked_content.attach_to(self)
+                    else:
+                        logger.warning(
+                            "%s used {attach} link syntax on a "
+                            "non-static file. Use {filename} instead.",
+                            self.get_relative_source_path())
+                origin = '/'.join((siteurl, linked_content.url))
+                origin = origin.replace('\\', '/')  # for Windows paths.
+            else:
+                logger.warning(
+                    "Unable to find '%s', skipping url replacement.",
+                    value.geturl(), extra={
+                        'limit_msg': ("Other resources were not found "
+                                      "and their urls not replaced")})
+        elif what == 'category':
+            origin = '/'.join((siteurl, Category(path, self.settings).url))
+        elif what == 'tag':
+            origin = '/'.join((siteurl, Tag(path, self.settings).url))
+        elif what == 'index':
+            origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
+        elif what == 'author':
+            origin = '/'.join((siteurl, Author(path, self.settings).url))
+        else:
+            logger.warning(
+                "Replacement Indicator '%s' not recognized, "
+                "skipping replacement",
+                what)
+
+        # keep all other parts, such as query, fragment, etc.
+        parts = list(value)
+        parts[2] = origin
+        origin = urlunparse(parts)
+
+        return ''.join((m.group('markup'), m.group('quote'), origin,
+                        m.group('quote')))
+
     def _update_content(self, content, siteurl):
         """Update the content attribute.
 
@@ -227,69 +289,7 @@ def _update_content(self, content, siteurl):
             \2""".format(instrasite_link_regex)
         hrefs = re.compile(regex, re.X)
 
-        def replacer(m):
-            what = m.group('what')
-            value = urlparse(m.group('value'))
-            path = value.path
-            origin = m.group('path')
-
-            # XXX Put this in a different location.
-            if what in {'filename', 'attach'}:
-                if path.startswith('/'):
-                    path = path[1:]
-                else:
-                    # relative to the source path of this content
-                    path = self.get_relative_source_path(
-                        os.path.join(self.relative_dir, path)
-                    )
-
-                if path not in self._context['filenames']:
-                    unquoted_path = path.replace('%20', ' ')
-
-                    if unquoted_path in self._context['filenames']:
-                        path = unquoted_path
-
-                linked_content = self._context['filenames'].get(path)
-                if linked_content:
-                    if what == 'attach':
-                        if isinstance(linked_content, Static):
-                            linked_content.attach_to(self)
-                        else:
-                            logger.warning(
-                                "%s used {attach} link syntax on a "
-                                "non-static file. Use {filename} instead.",
-                                self.get_relative_source_path())
-                    origin = '/'.join((siteurl, linked_content.url))
-                    origin = origin.replace('\\', '/')  # for Windows paths.
-                else:
-                    logger.warning(
-                        "Unable to find '%s', skipping url replacement.",
-                        value.geturl(), extra={
-                            'limit_msg': ("Other resources were not found "
-                                          "and their urls not replaced")})
-            elif what == 'category':
-                origin = '/'.join((siteurl, Category(path, self.settings).url))
-            elif what == 'tag':
-                origin = '/'.join((siteurl, Tag(path, self.settings).url))
-            elif what == 'index':
-                origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
-            elif what == 'author':
-                origin = '/'.join((siteurl, Author(path, self.settings).url))
-            else:
-                logger.warning(
-                    "Replacement Indicator '%s' not recognized, "
-                    "skipping replacement",
-                    what)
-
-            # keep all other parts, such as query, fragment, etc.
-            parts = list(value)
-            parts[2] = origin
-            origin = urlunparse(parts)
-
-            return ''.join((m.group('markup'), m.group('quote'), origin,
-                            m.group('quote')))
-
-        return hrefs.sub(replacer, content)
+        return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content)
 
     def get_siteurl(self):
         return self._context.get('localsiteurl', '')
diff --git a/pelican/generators.py b/pelican/generators.py
index f3590155f..ec5c520be 100644
--- a/pelican/generators.py
+++ b/pelican/generators.py
@@ -21,8 +21,9 @@
 from pelican.cache import FileStampDataCacher
 from pelican.contents import Article, Draft, Page, Static, is_valid_content
 from pelican.readers import Readers
-from pelican.utils import (DateFormatter, copy, mkdir_p, posixize_path,
-                           process_translations, python_2_unicode_compatible)
+from pelican.utils import (DateFormatter, HtmlLinkExpander, LinkExpander, copy,
+                           mkdir_p, posixize_path, process_translations,
+                           python_2_unicode_compatible)
 
 
 logger = logging.getLogger(__name__)
@@ -74,6 +75,10 @@ def __init__(self, context, settings, path, theme, output_path,
         # provide utils.strftime as a jinja filter
         self.env.filters.update({'strftime': DateFormatter()})
 
+        # provide link expansion as a jinja filter
+        self.env.filters.update({'expand_link': LinkExpander(settings)})
+        self.env.filters.update({'expand_links': HtmlLinkExpander()})
+
         # get custom Jinja filters from user settings
         custom_filters = self.settings['JINJA_FILTERS']
         self.env.filters.update(custom_filters)
diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py
index 9a7109d6d..4455bc9eb 100644
--- a/pelican/tests/test_utils.py
+++ b/pelican/tests/test_utils.py
@@ -9,11 +9,13 @@
 from sys import platform
 from tempfile import mkdtemp
 
+from jinja2 import DictLoader, Environment
 import pytz
 
 import six
 
 from pelican import utils
+from pelican.contents import Article, Page, Static
 from pelican.generators import TemplatePagesGenerator
 from pelican.settings import read_settings
 from pelican.tests.support import (LoggedTestCase, get_article,
@@ -670,6 +672,46 @@ def test_turkish_locale(self):
                              utils.strftime(self.date, 'date = %A, %d %B %Y'))
 
 
+class TestLinkExpanders(unittest.TestCase):
+    """Tests Jinja2 expand_link() and expand_links() filters."""
+
+    def test_expand_link(self):
+        settings = read_settings()
+        env = Environment(
+            loader=DictLoader({'a.html': "{{article|expand_link('cover')}}"})
+        )
+        env.filters.update({'expand_link': utils.LinkExpander(settings)})
+
+        linked_image = Static('', source_path='image.png')
+        context = {'filenames': {'image.png': linked_image},
+                   'localsiteurl': 'https://my.cool.site'}
+        content_mock = Article('', metadata={
+            'title': 'Article',
+            'cover': "{filename}/image.png"}, context=context)
+        result = env.get_template('a.html').render(article=content_mock)
+        self.assertEqual('https://my.cool.site/image.png', result)
+
+    def test_expand_links(self):
+        env = Environment(
+            loader=DictLoader({'a.html': "{{article|expand_links('legal')}}"})
+        )
+        env.filters.update({'expand_links': utils.HtmlLinkExpander()})
+
+        linked_page = Page('', source_path='legal.rst',
+                           metadata={'slug': 'license'})
+        context = {'filenames': {'legal.rst': linked_page},
+                   'localsiteurl': 'https://my.cool.site'}
+        content_mock = Article(
+            '', metadata={
+                'title': 'Article',
+                'legal': "<a href=\"{filename}/legal.rst\">License</a>"
+            }, context=context)
+        result = env.get_template('a.html').render(article=content_mock)
+        self.assertEqual(
+            '<a href="https://my.cool.site/pages/license.html">License</a>',
+            result)
+
+
 class TestSanitisedJoin(unittest.TestCase):
     def test_detect_parent_breakout(self):
         with six.assertRaisesRegex(
diff --git a/pelican/utils.py b/pelican/utils.py
index ef9da23b3..95d4c2244 100644
--- a/pelican/utils.py
+++ b/pelican/utils.py
@@ -147,6 +147,40 @@ def __call__(self, date, date_format):
         return formatted
 
 
+class LinkExpander(object):
+    """Link expander object used as a jinja filter
+
+    Expands a custom field that contains just a link to internal content. The
+    same rules as when links are expanded in article/page contents and
+    summaries apply.
+    """
+
+    def __init__(self, settings):
+        self.intrasite_link_regex = settings['INTRASITE_LINK_REGEX']
+
+    def __call__(self, content, attr):
+        link_regex = r"""^
+            (?P<markup>)(?P<quote>)
+            (?P<path>{0}(?P<value>.*))
+            $""".format(self.intrasite_link_regex)
+        links = re.compile(link_regex, re.X)
+        return links.sub(
+            lambda m: content._link_replacer(content.get_siteurl(), m),
+            getattr(content, attr))
+
+
+class HtmlLinkExpander(object):
+    """HTML link expander object used as a jinja filter
+
+    Expands links to internal contents in a custom HTML field. The same rules
+    as when links are expanded in article/page contents and summaries apply.
+    """
+
+    def __call__(self, content, attr):
+        return content._update_content(getattr(content, attr),
+                                       content.get_siteurl())
+
+
 def python_2_unicode_compatible(klass):
     """
     A decorator that defines __unicode__ and __str__ methods under Python 2.