Skip to content

Commit

Permalink
Make URL part joining aware of absolute URLs.
Browse files Browse the repository at this point in the history
Previously, when both SITEURL and STATIC_URL were absolute, the final
generate data URLs looked wrong like this (two absolute URLs joined by
`/`):

    http://your.site/http://static.your.site/image.png

With this patch, the data URLs are correctly:

    http://static.your.site/image.png

This also applies to all *_URL configuration options (for example,
ability to have pages and articles on different domains) and behaves
like one expects even with URLs starting with just `//`.
  • Loading branch information
mosra committed Oct 24, 2017
1 parent 1a96d37 commit 7eaae71
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 10 deletions.
18 changes: 12 additions & 6 deletions pelican/contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pytz

import six
from six.moves.urllib.parse import urlparse, urlunparse
from six.moves.urllib.parse import urljoin, urlparse, urlunparse

from pelican import signals
from pelican.settings import DEFAULT_CONFIG
Expand Down Expand Up @@ -210,6 +210,12 @@ def _link_replacer(self, siteurl, m):
path = value.path
origin = m.group('path')

# In order to stay compatible with behavior of Pelican 3.7.1, where
# having empty SITEURL would still produce proper absolute links
# starting with /, I'm adding a trailing slash to it so urljoin does
# the right thing.
if not siteurl.endswith('/'): siteurl += '/'

# XXX Put this in a different location.
if what in {'filename', 'attach'}:
if path.startswith('/'):
Expand All @@ -236,7 +242,7 @@ def _link_replacer(self, siteurl, m):
"%s used {attach} link syntax on a "
"non-static file. Use {filename} instead.",
self.get_relative_source_path())
origin = '/'.join((siteurl, linked_content.url))
origin = urljoin(siteurl, linked_content.url)
origin = origin.replace('\\', '/') # for Windows paths.
else:
logger.warning(
Expand All @@ -245,13 +251,13 @@ def _link_replacer(self, siteurl, m):
'limit_msg': ("Other resources were not found "
"and their urls not replaced")})
elif what == 'category':
origin = '/'.join((siteurl, Category(path, self.settings).url))
origin = urljoin(siteurl, Category(path, self.settings).url)
elif what == 'tag':
origin = '/'.join((siteurl, Tag(path, self.settings).url))
origin = urljoin(siteurl, Tag(path, self.settings).url)
elif what == 'index':
origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS']))
origin = urljoin(siteurl, self.settings['INDEX_SAVE_AS'])
elif what == 'author':
origin = '/'.join((siteurl, Author(path, self.settings).url))
origin = urljoin(siteurl, Author(path, self.settings).url)
else:
logger.warning(
"Replacement Indicator '%s' not recognized, "
Expand Down
57 changes: 53 additions & 4 deletions pelican/tests/test_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from jinja2.utils import generate_lorem_ipsum

import six
from six.moves.urllib.parse import urljoin

from pelican.contents import Article, Author, Category, Page, Static, Tag
from pelican.settings import DEFAULT_CONFIG
Expand Down Expand Up @@ -395,6 +396,54 @@ def test_intrasite_link_more(self):
'</blockquote>'
)

def test_intrasite_link_absolute(self):
"""Test that absolute URLs are merged properly."""

args = self.page_kwargs.copy()
args['settings'] = get_settings(
STATIC_URL='http://static.cool.site/{path}',
ARTICLE_URL='http://blog.cool.site/{slug}.html')
args['source_path'] = 'content'
args['context']['filenames'] = {
'images/poster.jpg': Static('',
settings=args['settings'],
source_path='images/poster.jpg'),
'article.rst': Article('',
settings=args['settings'],
metadata={'slug': 'article',
'title': 'Article'})
}

# Article link will go to blog
args['content'] = (
'<a href="{filename}article.rst">Article</a>'
)
content = Page(**args).get_content('http://cool.site')
self.assertEqual(
content,
'<a href="http://blog.cool.site/article.html">Article</a>'
)

# Page link will go to the main site
args['content'] = (
'<a href="{index}">Index</a>'
)
content = Page(**args).get_content('http://cool.site')
self.assertEqual(
content,
'<a href="http://cool.site/index.html">Index</a>'
)

# Image link will go to static
args['content'] = (
'<img src="{filename}/images/poster.jpg"/>'
)
content = Page(**args).get_content('http://cool.site')
self.assertEqual(
content,
'<img src="http://static.cool.site/images/poster.jpg"/>'
)

def test_intrasite_link_markdown_spaces(self):
# Markdown introduces %20 instead of spaces, this tests that
# we support markdown doing this.
Expand Down Expand Up @@ -734,8 +783,8 @@ def test_index_link_syntax(self):
self.assertNotEqual(content, html)

expected_html = ('<a href="' +
'/'.join((self.settings['SITEURL'],
self.settings['INDEX_SAVE_AS'])) +
urljoin(self.settings['SITEURL'],
self.settings['INDEX_SAVE_AS']) +
'">link</a>')
self.assertEqual(content, expected_html)

Expand Down Expand Up @@ -788,7 +837,7 @@ def test_index_link_syntax_with_spaces(self):
self.assertNotEqual(content, html)

expected_html = ('<a href = "' +
'/'.join((self.settings['SITEURL'],
self.settings['INDEX_SAVE_AS'])) +
urljoin(self.settings['SITEURL'],
self.settings['INDEX_SAVE_AS']) +
'">link</a>')
self.assertEqual(content, expected_html)

0 comments on commit 7eaae71

Please sign in to comment.