Make URL part joining aware of absolute URLs.

Previously, when both SITEURL and STATIC_URL were absolute, the final generate data URLs looked wrong like this (two absolute URLs joined by `/`): http://your.site/http://static.your.site/image.png With this patch, the data URLs are correctly: http://static.your.site/image.png This also applies to all *_URL configuration options (for example, ability to have pages and articles on different domains) and behaves like one expects even with URLs starting with just `//`.
getpelican · Oct 24, 2017 · 7eaae71 · 7eaae71
1 parent 1a96d37
commit 7eaae71
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 10 deletions.
diff --git a/pelican/contents.py b/pelican/contents.py
@@ -11,7 +11,7 @@
 import pytz
 
 import six
-from six.moves.urllib.parse import urlparse, urlunparse
+from six.moves.urllib.parse import urljoin, urlparse, urlunparse
 
 from pelican import signals
 from pelican.settings import DEFAULT_CONFIG
@@ -210,6 +210,12 @@ def _link_replacer(self, siteurl, m):
         path = value.path
         origin = m.group('path')
 
+        # In order to stay compatible with behavior of Pelican 3.7.1, where
+        # having empty SITEURL would still produce proper absolute links
+        # starting with /, I'm adding a trailing slash to it so urljoin does
+        # the right thing.
+        if not siteurl.endswith('/'): siteurl += '/'
+
         # XXX Put this in a different location.
         if what in {'filename', 'attach'}:
             if path.startswith('/'):
@@ -236,7 +242,7 @@ def _link_replacer(self, siteurl, m):
                             "%s used {attach} link syntax on a "
                             "non-static file. Use {filename} instead.",
                             self.get_relative_source_path())
-                origin = '/'.join((siteurl, linked_content.url))
+                origin = urljoin(siteurl, linked_content.url)
                 origin = origin.replace('\\', '/')  # for Windows paths.
             else:
                 logger.warning(
@@ -245,13 +251,13 @@ def _link_replacer(self, siteurl, m):
                         'limit_msg': ("Other resources were not found "
                                       "and their urls not replaced")})
         elif what == 'category':
-            origin = '/'.join((siteurl, Category(path, self.settings).url))
+            origin = urljoin(siteurl, Category(path, self.settings).url)
         elif what == 'tag':
-            origin = '/'.join((siteurl, Tag(path, self.settings).url))
+            origin = urljoin(siteurl, Tag(path, self.settings).url)
         elif what == 'index':
-            origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
+            origin = urljoin(siteurl, self.settings['INDEX_SAVE_AS'])
         elif what == 'author':
-            origin = '/'.join((siteurl, Author(path, self.settings).url))
+            origin = urljoin(siteurl, Author(path, self.settings).url)
         else:
             logger.warning(
                 "Replacement Indicator '%s' not recognized, "

diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py
@@ -10,6 +10,7 @@
 from jinja2.utils import generate_lorem_ipsum
 
 import six
+from six.moves.urllib.parse import urljoin
 
 from pelican.contents import Article, Author, Category, Page, Static, Tag
 from pelican.settings import DEFAULT_CONFIG
@@ -395,6 +396,54 @@ def test_intrasite_link_more(self):
             '</blockquote>'
         )
 
+    def test_intrasite_link_absolute(self):
+        """Test that absolute URLs are merged properly."""
+
+        args = self.page_kwargs.copy()
+        args['settings'] = get_settings(
+            STATIC_URL='http://static.cool.site/{path}',
+            ARTICLE_URL='http://blog.cool.site/{slug}.html')
+        args['source_path'] = 'content'
+        args['context']['filenames'] = {
+            'images/poster.jpg': Static('',
+                                        settings=args['settings'],
+                                        source_path='images/poster.jpg'),
+            'article.rst': Article('',
+                                   settings=args['settings'],
+                                   metadata={'slug': 'article',
+                                             'title': 'Article'})
+        }
+
+        # Article link will go to blog
+        args['content'] = (
+            '<a href="{filename}article.rst">Article</a>'
+        )
+        content = Page(**args).get_content('http://cool.site')
+        self.assertEqual(
+            content,
+            '<a href="http://blog.cool.site/article.html">Article</a>'
+        )
+
+        # Page link will go to the main site
+        args['content'] = (
+            '<a href="{index}">Index</a>'
+        )
+        content = Page(**args).get_content('http://cool.site')
+        self.assertEqual(
+            content,
+            '<a href="http://cool.site/index.html">Index</a>'
+        )
+
+        # Image link will go to static
+        args['content'] = (
+            '<img src="{filename}/images/poster.jpg"/>'
+        )
+        content = Page(**args).get_content('http://cool.site')
+        self.assertEqual(
+            content,
+            '<img src="http://static.cool.site/images/poster.jpg"/>'
+        )
+
     def test_intrasite_link_markdown_spaces(self):
         # Markdown introduces %20 instead of spaces, this tests that
         # we support markdown doing this.
@@ -734,8 +783,8 @@ def test_index_link_syntax(self):
         self.assertNotEqual(content, html)
 
         expected_html = ('<a href="' +
-                         '/'.join((self.settings['SITEURL'],
-                                   self.settings['INDEX_SAVE_AS'])) +
+                         urljoin(self.settings['SITEURL'],
+                                 self.settings['INDEX_SAVE_AS']) +
                          '">link</a>')
         self.assertEqual(content, expected_html)
 
@@ -788,7 +837,7 @@ def test_index_link_syntax_with_spaces(self):
         self.assertNotEqual(content, html)
 
         expected_html = ('<a href = "' +
-                         '/'.join((self.settings['SITEURL'],
-                                   self.settings['INDEX_SAVE_AS'])) +
+                         urljoin(self.settings['SITEURL'],
+                                 self.settings['INDEX_SAVE_AS']) +
                          '">link</a>')
         self.assertEqual(content, expected_html)