diff --git a/docs/specs/escape.yml b/docs/specs/escape.yml
index dd7cc76b1eb..18bc623d957 100644
--- a/docs/specs/escape.yml
+++ b/docs/specs/escape.yml
@@ -13,6 +13,7 @@ repos:
'a b.md':
a&b.md:
a&b.md:
+ a – b c/ab.md:
a++.png:
outputs:
links.json: |
@@ -30,6 +31,7 @@ outputs:
.publish.json: |
{
"files": [
+ { "url": "/a – b c/ab", "path": "a – b c/ab.json" },
{ "url": "/a b", "path": "a b.json" },
{ "url": "/a&b", "path": "a&b.json" },
{ "url": "/a&b", "path": "a&b.json" },
@@ -39,8 +41,8 @@ outputs:
}
'a b.json': |
{
- "canonical_url": "https://docs.com/en-us/a b",
- "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a b.md",
+ "canonical_url": "https://docs.com/en-us/a%20b",
+ "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a b.md",
"content_git_url": "https://github.com/escape/markdown/blob/main/a b.md",
"original_content_git_url": "https://github.com/escape/markdown/blob/main/a b.md",
"original_content_git_url_template": "{repo}/blob/{branch}/a b.md"
@@ -48,7 +50,7 @@ outputs:
a&b.json: |
{
"canonical_url": "https://docs.com/en-us/a&b",
- "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a&b.md",
+ "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a&b.md",
"content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md",
"original_content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md",
"original_content_git_url_template": "{repo}/blob/{branch}/a&b.md"
@@ -56,11 +58,15 @@ outputs:
a&b.json: |
{
"canonical_url": "https://docs.com/en-us/a&b",
- "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a&b.md",
+ "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a&b.md",
"content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md",
"original_content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md",
"original_content_git_url_template": "{repo}/blob/{branch}/a&b.md"
}
+ a – b c/ab.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/a%20%e2%80%93%20b%20c/ab"
+ }
a++.png:
.errors.log: |
{ "message_severity":"warning", "code":"file-not-found", "file":"links.md", "line":5 }
@@ -184,3 +190,40 @@ outputs:
{
"conceptual": "
the ServiceFuture<String> object\nObservable<Void>
\n",
}
+---
+# Canonical url for ugly url
+inputs:
+ docfx.yml: |
+ urlType: Ugly
+ a/index.md: |
+ a/main.md: |
+outputs:
+ a/index.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/a/index.html",
+ }
+ a/main.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/a/main.html",
+ }
+---
+# Escape Reserved Characters and Do not escape Unreserved Characters in Canonical url
+inputs:
+ docfx.yml:
+ a/reserved-#[]@.md: |
+ a/reserved2-!$&'()+,;=.md: |
+ a/unreserved-._~%.md: |
+outputs:
+ a/reserved-#[]@.json : |
+ {
+ "canonical_url": "https://docs.com/en-us/a/reserved-%23%5b%5d@",
+ }
+ a/reserved2-!$&'()+,;=.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/a/reserved2-!$&'()+,;=",
+ }
+ a/unreserved-._~%.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/a/unreserved-._~%25",
+ }
+---
diff --git a/docs/specs/moniker.yml b/docs/specs/moniker.yml
index 2fde488e032..1b508f5fa8a 100644
--- a/docs/specs/moniker.yml
+++ b/docs/specs/moniker.yml
@@ -2217,4 +2217,175 @@ repos:
outputs:
10b6c9f144d519f0f8a48bc02c54ca34/a.json: |
{"conceptual": "\n"}
-
+---
+# Append latest moniker to canonical url if configured
+inputs:
+ docfx.yml: |
+ isCanonicalUrlWithMoniker: true
+ monikerRange:
+ 'docs/v1/**': '< netcore-2.0'
+ 'docs/v2/**': '>= netcore-2.0'
+ hostName: docs.com
+ basePath: /docs
+ routes:
+ docs/: .
+ docs/v1/: .
+ docs/v2/: .
+ monikerDefinition:
+ monikers:
+ - { moniker_name: netcore-1.0, product_name: .NET Core }
+ - { moniker_name: netcore-1.1, product_name: .NET Core }
+ - { moniker_name: netcore-2.0, product_name: .NET Core }
+ - { moniker_name: netcore-2.1, product_name: .NET Core }
+ docs/v1/a.md: |
+ Moniker: netcore-1.0, netcore-1.1
+ docs/v2/b.md: |
+ Moniker: netcore-2.0, netcore-2.1
+ docs/v2/c.yml: |
+ ### YamlMime:TestData
+ metadata:
+ monikers:
+ - netcore-2.0
+ - netcore-2.1
+ _themes/ContentTemplate/schemas/TestData.schema.json: |
+ {
+ "properties": {
+ "metadata": {
+ "properties": {
+ "monikers": { "type": "array" }
+ }
+ }
+ }
+ }
+ _themes/ContentTemplate/TestData.html.primary.tmpl:
+outputs:
+ docs/17b9fe681514513cbf7d5c90e32f107a/a.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1"
+ }
+ docs/ed8f7746ec932ae7c9f595c1f2c97d5a/b.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/docs/b?view=netcore-2.1"
+ }
+ docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.json: |
+ {
+ "metadata": {
+ "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1"
+ }
+ }
+ .publish.json: |
+ {
+ "files":[
+ {
+ "url":"/docs/a",
+ "moniker_group":"17b9fe681514513cbf7d5c90e32f107a",
+ "config_moniker_range": "< netcore-2.0",
+ "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1"
+ },
+ {
+ "url":"/docs/b",
+ "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a",
+ "config_moniker_range": ">= netcore-2.0",
+ "canonical_url": "https://docs.com/en-us/docs/b?view=netcore-2.1"
+ },
+ {
+ "url":"/docs/c",
+ "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a",
+ "config_moniker_range": ">= netcore-2.0",
+ "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1"
+ }
+ ],
+ "moniker_groups": {
+ "ed8f7746ec932ae7c9f595c1f2c97d5a": ["netcore-2.0","netcore-2.1"],
+ "17b9fe681514513cbf7d5c90e32f107a": ["netcore-1.0","netcore-1.1"]
+ }
+ }
+---
+# Overwrite canonical url for page json output
+inputs:
+ docfx.yml: |
+ outputType: pageJson
+ isCanonicalUrlWithMoniker: true
+ monikerRange:
+ 'docs/v1/**': '< netcore-2.0'
+ 'docs/v2/**': '>= netcore-2.0'
+ hostName: docs.com
+ basePath: /docs
+ routes:
+ docs/: .
+ docs/v1/: .
+ docs/v2/: .
+ monikerDefinition:
+ monikers:
+ - { moniker_name: netcore-1.0, product_name: .NET Core }
+ - { moniker_name: netcore-1.1, product_name: .NET Core }
+ - { moniker_name: netcore-2.0, product_name: .NET Core }
+ - { moniker_name: netcore-2.1, product_name: .NET Core }
+ docs/v1/a.md: |
+ Moniker: netcore-1.0, netcore-1.1
+ docs/v2/c.yml: |
+ ### YamlMime:TestData
+ metadata:
+ monikers:
+ - netcore-2.0
+ - netcore-2.1
+ _themes/ContentTemplate/schemas/TestData.schema.json: |
+ {
+ "properties": {
+ "metadata": {
+ "properties": {
+ "monikers": { "type": "array" }
+ }
+ }
+ }
+ }
+ _themes/ContentTemplate/TestData.html.primary.tmpl:
+outputs:
+ docs/17b9fe681514513cbf7d5c90e32f107a/a.raw.page.json: |
+ {
+ "rawMetadata": {
+ "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1",
+ "_op_canonicalUrl": "https://docs.com/en-us/docs/a?view=netcore-1.1",
+ }
+ }
+ docs/17b9fe681514513cbf7d5c90e32f107a/a.mta.json: |
+ {
+ "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1",
+ }
+ docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.raw.page.json: |
+ {
+ "rawMetadata": {
+ "metadata": {
+ "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1",
+ },
+ "_op_canonicalUrl": "https://docs.com/en-us/docs/c?view=netcore-2.1"
+ }
+ }
+ docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.mta.json: |
+ {
+ "metadata": {
+ "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1",
+ }
+ }
+ .publish.json: |
+ {
+ "files":[
+ {
+ "url":"/docs/a",
+ "moniker_group":"17b9fe681514513cbf7d5c90e32f107a",
+ "config_moniker_range": "< netcore-2.0",
+ "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1"
+ },
+ {
+ "url":"/docs/c",
+ "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a",
+ "config_moniker_range": ">= netcore-2.0",
+ "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1"
+ }
+ ],
+ "moniker_groups": {
+ "ed8f7746ec932ae7c9f595c1f2c97d5a": ["netcore-2.0","netcore-2.1"],
+ "17b9fe681514513cbf7d5c90e32f107a": ["netcore-1.0","netcore-1.1"]
+ }
+ }
+---
diff --git a/src/docfx/build/DocsetBuilder.cs b/src/docfx/build/DocsetBuilder.cs
index 57cd989a24d..34214a45cfe 100644
--- a/src/docfx/build/DocsetBuilder.cs
+++ b/src/docfx/build/DocsetBuilder.cs
@@ -161,7 +161,7 @@ public void Build(string[]? files)
var publishModelBuilder = new PublishModelBuilder(_config, _errors, _monikerProvider, _buildOptions, _sourceMap, _documentProvider, _contributionProvider);
var resourceBuilder = new ResourceBuilder(_input, _documentProvider, _config, output, publishModelBuilder);
var learnHierarchyBuilder = new LearnHierarchyBuilder(_contentValidator);
- var pageBuilder = new PageBuilder(_config, _buildOptions, _input, output, _documentProvider, _metadataProvider, _monikerProvider, _templateEngine, _tocMap, _linkResolver, _xrefResolver, _contributionProvider, _bookmarkValidator, publishModelBuilder, _contentValidator, _metadataValidator, _markdownEngine, _redirectionProvider, _jsonSchemaTransformer, learnHierarchyBuilder);
+ var pageBuilder = new PageBuilder(_config, _buildOptions, _input, output, _documentProvider, _metadataProvider, _monikerProvider, _publishUrlMap, _templateEngine, _tocMap, _linkResolver, _xrefResolver, _contributionProvider, _bookmarkValidator, publishModelBuilder, _contentValidator, _metadataValidator, _markdownEngine, _redirectionProvider, _jsonSchemaTransformer, learnHierarchyBuilder);
var tocBuilder = new TocBuilder(_config, _tocLoader, _contentValidator, _metadataProvider, _metadataValidator, _documentProvider, _monikerProvider, publishModelBuilder, _templateEngine, output);
var redirectionBuilder = new RedirectionBuilder(publishModelBuilder, _redirectionProvider, _documentProvider);
diff --git a/src/docfx/build/document/DocumentProvider.cs b/src/docfx/build/document/DocumentProvider.cs
index c922811ba21..b7621f919e2 100644
--- a/src/docfx/build/document/DocumentProvider.cs
+++ b/src/docfx/build/document/DocumentProvider.cs
@@ -256,6 +256,7 @@ private static string PathToRelativeUrl(string path, ContentType contentType, Ur
}
if (urlType == UrlType.Docs && contentType != ContentType.Toc)
{
+ // remove extension
var i = url.LastIndexOf('.');
return i >= 0 ? url[..i] : url;
}
@@ -263,13 +264,14 @@ private static string PathToRelativeUrl(string path, ContentType contentType, Ur
return url;
}
- ///
- /// In docs, canonical URL is later overwritten by template JINT code.
- /// TODO: need to handle the logic difference when template code is removed.
- ///
- private string GetCanonicalUrl(string siteUrl)
+ private string GetCanonicalUrl(string? siteUrl)
{
- return $"https://{_config.HostName}/{_buildOptions.Locale}{siteUrl}";
+ if (siteUrl == null)
+ {
+ return "";
+ }
+
+ return $"https://{_config.HostName}/{_buildOptions.Locale}{UrlUtility.EscapeUrlPath(siteUrl).ToLowerInvariant()}";
}
private PathString ApplyRoutes(PathString path)
diff --git a/src/docfx/build/page/PageBuilder.cs b/src/docfx/build/page/PageBuilder.cs
index cc658a709c3..6463d14a38c 100644
--- a/src/docfx/build/page/PageBuilder.cs
+++ b/src/docfx/build/page/PageBuilder.cs
@@ -16,6 +16,7 @@ internal class PageBuilder
private readonly DocumentProvider _documentProvider;
private readonly MetadataProvider _metadataProvider;
private readonly MonikerProvider _monikerProvider;
+ private readonly PublishUrlMap _publishUrlMap;
private readonly TemplateEngine _templateEngine;
private readonly TocMap _tocMap;
private readonly LinkResolver _linkResolver;
@@ -38,6 +39,7 @@ public PageBuilder(
DocumentProvider documentProvider,
MetadataProvider metadataProvider,
MonikerProvider monikerProvider,
+ PublishUrlMap publishUrlMap,
TemplateEngine templateEngine,
TocMap tocMap,
LinkResolver linkResolver,
@@ -59,6 +61,7 @@ public PageBuilder(
_documentProvider = documentProvider;
_metadataProvider = metadataProvider;
_monikerProvider = monikerProvider;
+ _publishUrlMap = publishUrlMap;
_templateEngine = templateEngine;
_tocMap = tocMap;
_linkResolver = linkResolver;
@@ -232,7 +235,9 @@ private SystemMetadata CreateSystemMetadata(ErrorBuilder errors, FilePath file,
_contributionProvider.GetContributionInfo(errors, file, userMetadata.Author);
systemMetadata.Locale = _buildOptions.Locale;
- systemMetadata.CanonicalUrl = userMetadata.PageType != "profile" ? _documentProvider.GetCanonicalUrl(file) : null;
+
+ systemMetadata.CanonicalUrl = GetCanonicalUrlWithMonikerIfNecessary(userMetadata.PageType, file);
+
systemMetadata.Path = _documentProvider.GetSitePath(file);
systemMetadata.Rel = PathUtility.GetRelativePathToRoot(systemMetadata.Path);
systemMetadata.CanonicalUrlPrefix = UrlUtility.Combine($"https://{_config.HostName}", systemMetadata.Locale, _config.BasePath) + "/";
@@ -261,6 +266,27 @@ private SystemMetadata CreateSystemMetadata(ErrorBuilder errors, FilePath file,
return systemMetadata;
}
+ private string? GetCanonicalUrlWithMonikerIfNecessary(string? pageType, FilePath file)
+ {
+ if (pageType == "profile")
+ {
+ return null;
+ }
+
+ var canonicalUrl = _documentProvider.GetCanonicalUrl(file);
+
+ if (_config.IsCanonicalUrlWithMoniker)
+ {
+ var canonicalVersion = _publishUrlMap.GetCanonicalVersion(file);
+ if (!string.IsNullOrEmpty(canonicalVersion))
+ {
+ return $"{canonicalUrl}?view={UrlUtility.EscapeUrlQueryOrFragment(canonicalVersion)}";
+ }
+ }
+
+ return canonicalUrl;
+ }
+
private JObject LoadMarkdown(ErrorBuilder errors, FilePath file)
{
var content = _input.ReadString(file);
diff --git a/src/docfx/config/Config.cs b/src/docfx/config/Config.cs
index 6afb78dd689..564e8a5bbea 100644
--- a/src/docfx/config/Config.cs
+++ b/src/docfx/config/Config.cs
@@ -351,6 +351,8 @@ internal class Config : PreloadConfig
public string AlternativeHostName { get; init; } = string.Empty;
+ public bool IsCanonicalUrlWithMoniker { get; init; }
+
public IEnumerable> GetFileReferences()
{
foreach (var url in Xref)
diff --git a/src/docfx/lib/UrlUtility.cs b/src/docfx/lib/UrlUtility.cs
index da25bb02ecc..03ef1eb1a95 100644
--- a/src/docfx/lib/UrlUtility.cs
+++ b/src/docfx/lib/UrlUtility.cs
@@ -29,6 +29,68 @@ public static string SanitizeUrl(string? url)
return Regex.Replace(url ?? "", @"^(https:\/\/.+?.blob.core.windows.net\/)(.*)\?(.*)$", match => $"{match.Groups[1]}{match.Groups[2]}");
}
+ ///
+ /// Escapse the characters of URL path and keep the most of allowable characters unescaped.
+ /// refer to https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
+ /// refer to workitem: https://dev.azure.com/ceapex/Engineering/_workitems/edit/126389
+ /// The following reserved characters are allowed in path:
+ /// %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D %3A %40
+ /// ! $ & ' ( ) * + , ; = : @
+ ///
+ /// path of URL
+ /// escapsed path
+ public static string EscapeUrlPath(string urlPath)
+ {
+ var segments = urlPath.Split(new char[] { '\\', '/' });
+ for (var i = 0; i < segments.Length; i++)
+ {
+ segments[i] = Uri.EscapeDataString(segments[i])
+ .Replace("%21", "!")
+ .Replace("%24", "$")
+ .Replace("%26", "&")
+ .Replace("%27", "'")
+ .Replace("%28", "(")
+ .Replace("%29", ")")
+ .Replace("%2A", "*")
+ .Replace("%2B", "+")
+ .Replace("%2C", ",")
+ .Replace("%3B", ";")
+ .Replace("%3D", "=")
+ .Replace("%3A", ":")
+ .Replace("%40", "@");
+ }
+ return string.Join('/', segments);
+ }
+
+ ///
+ /// Escapse the characters of URL query or fragment and keep the most of allowable characters unescaped.
+ /// refer to https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
+ /// The following reserved characters are allowed in query:
+ /// %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D %3A %40 %2F %3F
+ /// ! $ & ' ( ) * + , ; = : @ / ?
+ ///
+ /// query or fragment of URL without ? and #
+ /// escapsed path
+ public static string EscapeUrlQueryOrFragment(string urlQuery)
+ {
+ return Uri.EscapeDataString(urlQuery)
+ .Replace("%21", "!")
+ .Replace("%24", "$")
+ .Replace("%26", "&")
+ .Replace("%27", "'")
+ .Replace("%28", "(")
+ .Replace("%29", ")")
+ .Replace("%2A", "*")
+ .Replace("%2B", "+")
+ .Replace("%2C", ",")
+ .Replace("%3B", ";")
+ .Replace("%3D", "=")
+ .Replace("%3A", ":")
+ .Replace("%40", "@")
+ .Replace("%2F", "/")
+ .Replace("%3F", "?");
+ }
+
///
/// Split href to path, fragment and query
///
@@ -352,9 +414,9 @@ public static string GetBookmark(string uid)
var ch = char.ToLowerInvariant(uid[i]);
switch (ch)
{
- case '"'or '\'' or '%' or '^' or '\\':
+ case '"' or '\'' or '%' or '^' or '\\':
continue;
- case '<'or '[':
+ case '<' or '[':
sb.Append('(');
break;
case '>' or ']':
diff --git a/src/docfx/template/TemplateEngine.cs b/src/docfx/template/TemplateEngine.cs
index 73135c4a2d3..822f100e0cb 100644
--- a/src/docfx/template/TemplateEngine.cs
+++ b/src/docfx/template/TemplateEngine.cs
@@ -173,6 +173,8 @@ public void CopyAssetsToOutput(Output output, bool selfContained = true)
templateMetadata["xrefs"] = pageModel["metadata"]!["xrefs"];
}
+ OverWriteCanonicalUrl(pageModel, templateMetadata);
+
if (JsonSchemaProvider.IsLandingData(mime))
{
templateMetadata.Remove("conceptual");
@@ -192,6 +194,18 @@ public void CopyAssetsToOutput(Output output, bool selfContained = true)
return (model, metadata);
}
+ // overwrite canonical_url generated by JINT template
+ // remove the following code after docs.ui remove canonical_url generation logic
+ private static void OverWriteCanonicalUrl(JObject pageModel, JObject templateMetadata)
+ {
+ var canonicalUrlByDocFx = pageModel["canonical_url"] ?? pageModel["metadata"]?["canonical_url"];
+ if (canonicalUrlByDocFx != null)
+ {
+ templateMetadata["canonical_url"] = canonicalUrlByDocFx;
+ templateMetadata["_op_canonicalUrl"] = canonicalUrlByDocFx;
+ }
+ }
+
private string ProcessHtml(FilePath file, string html)
{
var bookmarks = new HashSet();
diff --git a/test/docfx.Test/lib/UrlUtilityTest.cs b/test/docfx.Test/lib/UrlUtilityTest.cs
index a6b368aaed5..f50fe11b310 100644
--- a/test/docfx.Test/lib/UrlUtilityTest.cs
+++ b/test/docfx.Test/lib/UrlUtilityTest.cs
@@ -217,4 +217,24 @@ public static void RemoveHostName(string url, string hostName, bool removeLocale
var result = UrlUtility.RemoveLeadingHostName(url, hostName, removeLocale);
Assert.Equal(expected, result);
}
+
+ [Theory]
+ [InlineData("/abc123-._~/def", "/abc123-._~/def")]
+ [InlineData("/en-us/-._~!$&'()*+,;=:@", "/en-us/-._~!$&'()*+,;=:@")]
+ [InlineData("/en-us/%[]", "/en-us/%25%5B%5D")]
+ public static void EscapeUrlPathTest(string path, string expected)
+ {
+ var result = UrlUtility.EscapeUrlPath(path);
+ Assert.Equal(expected, result);
+ }
+
+ [Theory]
+ [InlineData("abc123-._~def", "abc123-._~def")]
+ [InlineData("-._~!$&'()*+,;=:@/?", "-._~!$&'()*+,;=:@/?")]
+ [InlineData("%[]", "%25%5B%5D")]
+ public static void EscapeUrlQueryTest(string queryOrFragment, string expected)
+ {
+ var result = UrlUtility.EscapeUrlQueryOrFragment(queryOrFragment);
+ Assert.Equal(expected, result);
+ }
}