diff --git a/docs/specs/escape.yml b/docs/specs/escape.yml index dd7cc76b1eb..18bc623d957 100644 --- a/docs/specs/escape.yml +++ b/docs/specs/escape.yml @@ -13,6 +13,7 @@ repos: 'a b.md': a&b.md: a&b.md: + a – b c/ab.md: a++.png: outputs: links.json: | @@ -30,6 +31,7 @@ outputs: .publish.json: | { "files": [ + { "url": "/a – b c/ab", "path": "a – b c/ab.json" }, { "url": "/a b", "path": "a b.json" }, { "url": "/a&b", "path": "a&b.json" }, { "url": "/a&b", "path": "a&b.json" }, @@ -39,8 +41,8 @@ outputs: } 'a b.json': | { - "canonical_url": "https://docs.com/en-us/a b", - "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a b.md", + "canonical_url": "https://docs.com/en-us/a%20b", + "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a b.md", "content_git_url": "https://github.com/escape/markdown/blob/main/a b.md", "original_content_git_url": "https://github.com/escape/markdown/blob/main/a b.md", "original_content_git_url_template": "{repo}/blob/{branch}/a b.md" @@ -48,7 +50,7 @@ outputs: a&b.json: | { "canonical_url": "https://docs.com/en-us/a&b", - "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a&b.md", + "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a&b.md", "content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md", "original_content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md", "original_content_git_url_template": "{repo}/blob/{branch}/a&b.md" @@ -56,11 +58,15 @@ outputs: a&b.json: | { "canonical_url": "https://docs.com/en-us/a&b", - "gitcommit": "https://github.com/escape/markdown/blob/c4c07cb69d114fa9ba16e55763d47f2344083dad/a&b.md", + "gitcommit": "https://github.com/escape/markdown/blob/03cb6e2fa19c69303dd589df63df0c40448b241b/a&b.md", "content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md", "original_content_git_url": "https://github.com/escape/markdown/blob/main/a&b.md", "original_content_git_url_template": "{repo}/blob/{branch}/a&b.md" } + a – b c/ab.json: | + { + "canonical_url": "https://docs.com/en-us/a%20%e2%80%93%20b%20c/ab" + } a++.png: .errors.log: | { "message_severity":"warning", "code":"file-not-found", "file":"links.md", "line":5 } @@ -184,3 +190,40 @@ outputs: { "conceptual": "

the ServiceFuture<String> object\nObservable<Void>

\n", } +--- +# Canonical url for ugly url +inputs: + docfx.yml: | + urlType: Ugly + a/index.md: | + a/main.md: | +outputs: + a/index.json: | + { + "canonical_url": "https://docs.com/en-us/a/index.html", + } + a/main.json: | + { + "canonical_url": "https://docs.com/en-us/a/main.html", + } +--- +# Escape Reserved Characters and Do not escape Unreserved Characters in Canonical url +inputs: + docfx.yml: + a/reserved-#[]@.md: | + a/reserved2-!$&'()+,;=.md: | + a/unreserved-._~%.md: | +outputs: + a/reserved-#[]@.json : | + { + "canonical_url": "https://docs.com/en-us/a/reserved-%23%5b%5d@", + } + a/reserved2-!$&'()+,;=.json: | + { + "canonical_url": "https://docs.com/en-us/a/reserved2-!$&'()+,;=", + } + a/unreserved-._~%.json: | + { + "canonical_url": "https://docs.com/en-us/a/unreserved-._~%25", + } +--- diff --git a/docs/specs/moniker.yml b/docs/specs/moniker.yml index 2fde488e032..1b508f5fa8a 100644 --- a/docs/specs/moniker.yml +++ b/docs/specs/moniker.yml @@ -2217,4 +2217,175 @@ repos: outputs: 10b6c9f144d519f0f8a48bc02c54ca34/a.json: | {"conceptual": "
\n

moniker: tfs-2013

\n
\n"} - +--- +# Append latest moniker to canonical url if configured +inputs: + docfx.yml: | + isCanonicalUrlWithMoniker: true + monikerRange: + 'docs/v1/**': '< netcore-2.0' + 'docs/v2/**': '>= netcore-2.0' + hostName: docs.com + basePath: /docs + routes: + docs/: . + docs/v1/: . + docs/v2/: . + monikerDefinition: + monikers: + - { moniker_name: netcore-1.0, product_name: .NET Core } + - { moniker_name: netcore-1.1, product_name: .NET Core } + - { moniker_name: netcore-2.0, product_name: .NET Core } + - { moniker_name: netcore-2.1, product_name: .NET Core } + docs/v1/a.md: | + Moniker: netcore-1.0, netcore-1.1 + docs/v2/b.md: | + Moniker: netcore-2.0, netcore-2.1 + docs/v2/c.yml: | + ### YamlMime:TestData + metadata: + monikers: + - netcore-2.0 + - netcore-2.1 + _themes/ContentTemplate/schemas/TestData.schema.json: | + { + "properties": { + "metadata": { + "properties": { + "monikers": { "type": "array" } + } + } + } + } + _themes/ContentTemplate/TestData.html.primary.tmpl: +outputs: + docs/17b9fe681514513cbf7d5c90e32f107a/a.json: | + { + "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1" + } + docs/ed8f7746ec932ae7c9f595c1f2c97d5a/b.json: | + { + "canonical_url": "https://docs.com/en-us/docs/b?view=netcore-2.1" + } + docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.json: | + { + "metadata": { + "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1" + } + } + .publish.json: | + { + "files":[ + { + "url":"/docs/a", + "moniker_group":"17b9fe681514513cbf7d5c90e32f107a", + "config_moniker_range": "< netcore-2.0", + "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1" + }, + { + "url":"/docs/b", + "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a", + "config_moniker_range": ">= netcore-2.0", + "canonical_url": "https://docs.com/en-us/docs/b?view=netcore-2.1" + }, + { + "url":"/docs/c", + "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a", + "config_moniker_range": ">= netcore-2.0", + "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1" + } + ], + "moniker_groups": { + "ed8f7746ec932ae7c9f595c1f2c97d5a": ["netcore-2.0","netcore-2.1"], + "17b9fe681514513cbf7d5c90e32f107a": ["netcore-1.0","netcore-1.1"] + } + } +--- +# Overwrite canonical url for page json output +inputs: + docfx.yml: | + outputType: pageJson + isCanonicalUrlWithMoniker: true + monikerRange: + 'docs/v1/**': '< netcore-2.0' + 'docs/v2/**': '>= netcore-2.0' + hostName: docs.com + basePath: /docs + routes: + docs/: . + docs/v1/: . + docs/v2/: . + monikerDefinition: + monikers: + - { moniker_name: netcore-1.0, product_name: .NET Core } + - { moniker_name: netcore-1.1, product_name: .NET Core } + - { moniker_name: netcore-2.0, product_name: .NET Core } + - { moniker_name: netcore-2.1, product_name: .NET Core } + docs/v1/a.md: | + Moniker: netcore-1.0, netcore-1.1 + docs/v2/c.yml: | + ### YamlMime:TestData + metadata: + monikers: + - netcore-2.0 + - netcore-2.1 + _themes/ContentTemplate/schemas/TestData.schema.json: | + { + "properties": { + "metadata": { + "properties": { + "monikers": { "type": "array" } + } + } + } + } + _themes/ContentTemplate/TestData.html.primary.tmpl: +outputs: + docs/17b9fe681514513cbf7d5c90e32f107a/a.raw.page.json: | + { + "rawMetadata": { + "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1", + "_op_canonicalUrl": "https://docs.com/en-us/docs/a?view=netcore-1.1", + } + } + docs/17b9fe681514513cbf7d5c90e32f107a/a.mta.json: | + { + "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1", + } + docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.raw.page.json: | + { + "rawMetadata": { + "metadata": { + "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1", + }, + "_op_canonicalUrl": "https://docs.com/en-us/docs/c?view=netcore-2.1" + } + } + docs/ed8f7746ec932ae7c9f595c1f2c97d5a/c.mta.json: | + { + "metadata": { + "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1", + } + } + .publish.json: | + { + "files":[ + { + "url":"/docs/a", + "moniker_group":"17b9fe681514513cbf7d5c90e32f107a", + "config_moniker_range": "< netcore-2.0", + "canonical_url": "https://docs.com/en-us/docs/a?view=netcore-1.1" + }, + { + "url":"/docs/c", + "moniker_group":"ed8f7746ec932ae7c9f595c1f2c97d5a", + "config_moniker_range": ">= netcore-2.0", + "canonical_url": "https://docs.com/en-us/docs/c?view=netcore-2.1" + } + ], + "moniker_groups": { + "ed8f7746ec932ae7c9f595c1f2c97d5a": ["netcore-2.0","netcore-2.1"], + "17b9fe681514513cbf7d5c90e32f107a": ["netcore-1.0","netcore-1.1"] + } + } +--- diff --git a/src/docfx/build/DocsetBuilder.cs b/src/docfx/build/DocsetBuilder.cs index 57cd989a24d..34214a45cfe 100644 --- a/src/docfx/build/DocsetBuilder.cs +++ b/src/docfx/build/DocsetBuilder.cs @@ -161,7 +161,7 @@ public void Build(string[]? files) var publishModelBuilder = new PublishModelBuilder(_config, _errors, _monikerProvider, _buildOptions, _sourceMap, _documentProvider, _contributionProvider); var resourceBuilder = new ResourceBuilder(_input, _documentProvider, _config, output, publishModelBuilder); var learnHierarchyBuilder = new LearnHierarchyBuilder(_contentValidator); - var pageBuilder = new PageBuilder(_config, _buildOptions, _input, output, _documentProvider, _metadataProvider, _monikerProvider, _templateEngine, _tocMap, _linkResolver, _xrefResolver, _contributionProvider, _bookmarkValidator, publishModelBuilder, _contentValidator, _metadataValidator, _markdownEngine, _redirectionProvider, _jsonSchemaTransformer, learnHierarchyBuilder); + var pageBuilder = new PageBuilder(_config, _buildOptions, _input, output, _documentProvider, _metadataProvider, _monikerProvider, _publishUrlMap, _templateEngine, _tocMap, _linkResolver, _xrefResolver, _contributionProvider, _bookmarkValidator, publishModelBuilder, _contentValidator, _metadataValidator, _markdownEngine, _redirectionProvider, _jsonSchemaTransformer, learnHierarchyBuilder); var tocBuilder = new TocBuilder(_config, _tocLoader, _contentValidator, _metadataProvider, _metadataValidator, _documentProvider, _monikerProvider, publishModelBuilder, _templateEngine, output); var redirectionBuilder = new RedirectionBuilder(publishModelBuilder, _redirectionProvider, _documentProvider); diff --git a/src/docfx/build/document/DocumentProvider.cs b/src/docfx/build/document/DocumentProvider.cs index c922811ba21..b7621f919e2 100644 --- a/src/docfx/build/document/DocumentProvider.cs +++ b/src/docfx/build/document/DocumentProvider.cs @@ -256,6 +256,7 @@ private static string PathToRelativeUrl(string path, ContentType contentType, Ur } if (urlType == UrlType.Docs && contentType != ContentType.Toc) { + // remove extension var i = url.LastIndexOf('.'); return i >= 0 ? url[..i] : url; } @@ -263,13 +264,14 @@ private static string PathToRelativeUrl(string path, ContentType contentType, Ur return url; } - /// - /// In docs, canonical URL is later overwritten by template JINT code. - /// TODO: need to handle the logic difference when template code is removed. - /// - private string GetCanonicalUrl(string siteUrl) + private string GetCanonicalUrl(string? siteUrl) { - return $"https://{_config.HostName}/{_buildOptions.Locale}{siteUrl}"; + if (siteUrl == null) + { + return ""; + } + + return $"https://{_config.HostName}/{_buildOptions.Locale}{UrlUtility.EscapeUrlPath(siteUrl).ToLowerInvariant()}"; } private PathString ApplyRoutes(PathString path) diff --git a/src/docfx/build/page/PageBuilder.cs b/src/docfx/build/page/PageBuilder.cs index cc658a709c3..6463d14a38c 100644 --- a/src/docfx/build/page/PageBuilder.cs +++ b/src/docfx/build/page/PageBuilder.cs @@ -16,6 +16,7 @@ internal class PageBuilder private readonly DocumentProvider _documentProvider; private readonly MetadataProvider _metadataProvider; private readonly MonikerProvider _monikerProvider; + private readonly PublishUrlMap _publishUrlMap; private readonly TemplateEngine _templateEngine; private readonly TocMap _tocMap; private readonly LinkResolver _linkResolver; @@ -38,6 +39,7 @@ public PageBuilder( DocumentProvider documentProvider, MetadataProvider metadataProvider, MonikerProvider monikerProvider, + PublishUrlMap publishUrlMap, TemplateEngine templateEngine, TocMap tocMap, LinkResolver linkResolver, @@ -59,6 +61,7 @@ public PageBuilder( _documentProvider = documentProvider; _metadataProvider = metadataProvider; _monikerProvider = monikerProvider; + _publishUrlMap = publishUrlMap; _templateEngine = templateEngine; _tocMap = tocMap; _linkResolver = linkResolver; @@ -232,7 +235,9 @@ private SystemMetadata CreateSystemMetadata(ErrorBuilder errors, FilePath file, _contributionProvider.GetContributionInfo(errors, file, userMetadata.Author); systemMetadata.Locale = _buildOptions.Locale; - systemMetadata.CanonicalUrl = userMetadata.PageType != "profile" ? _documentProvider.GetCanonicalUrl(file) : null; + + systemMetadata.CanonicalUrl = GetCanonicalUrlWithMonikerIfNecessary(userMetadata.PageType, file); + systemMetadata.Path = _documentProvider.GetSitePath(file); systemMetadata.Rel = PathUtility.GetRelativePathToRoot(systemMetadata.Path); systemMetadata.CanonicalUrlPrefix = UrlUtility.Combine($"https://{_config.HostName}", systemMetadata.Locale, _config.BasePath) + "/"; @@ -261,6 +266,27 @@ private SystemMetadata CreateSystemMetadata(ErrorBuilder errors, FilePath file, return systemMetadata; } + private string? GetCanonicalUrlWithMonikerIfNecessary(string? pageType, FilePath file) + { + if (pageType == "profile") + { + return null; + } + + var canonicalUrl = _documentProvider.GetCanonicalUrl(file); + + if (_config.IsCanonicalUrlWithMoniker) + { + var canonicalVersion = _publishUrlMap.GetCanonicalVersion(file); + if (!string.IsNullOrEmpty(canonicalVersion)) + { + return $"{canonicalUrl}?view={UrlUtility.EscapeUrlQueryOrFragment(canonicalVersion)}"; + } + } + + return canonicalUrl; + } + private JObject LoadMarkdown(ErrorBuilder errors, FilePath file) { var content = _input.ReadString(file); diff --git a/src/docfx/config/Config.cs b/src/docfx/config/Config.cs index 6afb78dd689..564e8a5bbea 100644 --- a/src/docfx/config/Config.cs +++ b/src/docfx/config/Config.cs @@ -351,6 +351,8 @@ internal class Config : PreloadConfig public string AlternativeHostName { get; init; } = string.Empty; + public bool IsCanonicalUrlWithMoniker { get; init; } + public IEnumerable> GetFileReferences() { foreach (var url in Xref) diff --git a/src/docfx/lib/UrlUtility.cs b/src/docfx/lib/UrlUtility.cs index da25bb02ecc..03ef1eb1a95 100644 --- a/src/docfx/lib/UrlUtility.cs +++ b/src/docfx/lib/UrlUtility.cs @@ -29,6 +29,68 @@ public static string SanitizeUrl(string? url) return Regex.Replace(url ?? "", @"^(https:\/\/.+?.blob.core.windows.net\/)(.*)\?(.*)$", match => $"{match.Groups[1]}{match.Groups[2]}"); } + /// + /// Escapse the characters of URL path and keep the most of allowable characters unescaped. + /// refer to https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 + /// refer to workitem: https://dev.azure.com/ceapex/Engineering/_workitems/edit/126389 + /// The following reserved characters are allowed in path: + /// %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D %3A %40 + /// ! $ & ' ( ) * + , ; = : @ + /// + /// path of URL + /// escapsed path + public static string EscapeUrlPath(string urlPath) + { + var segments = urlPath.Split(new char[] { '\\', '/' }); + for (var i = 0; i < segments.Length; i++) + { + segments[i] = Uri.EscapeDataString(segments[i]) + .Replace("%21", "!") + .Replace("%24", "$") + .Replace("%26", "&") + .Replace("%27", "'") + .Replace("%28", "(") + .Replace("%29", ")") + .Replace("%2A", "*") + .Replace("%2B", "+") + .Replace("%2C", ",") + .Replace("%3B", ";") + .Replace("%3D", "=") + .Replace("%3A", ":") + .Replace("%40", "@"); + } + return string.Join('/', segments); + } + + /// + /// Escapse the characters of URL query or fragment and keep the most of allowable characters unescaped. + /// refer to https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4 + /// The following reserved characters are allowed in query: + /// %21 %24 %26 %27 %28 %29 %2A %2B %2C %3B %3D %3A %40 %2F %3F + /// ! $ & ' ( ) * + , ; = : @ / ? + /// + /// query or fragment of URL without ? and # + /// escapsed path + public static string EscapeUrlQueryOrFragment(string urlQuery) + { + return Uri.EscapeDataString(urlQuery) + .Replace("%21", "!") + .Replace("%24", "$") + .Replace("%26", "&") + .Replace("%27", "'") + .Replace("%28", "(") + .Replace("%29", ")") + .Replace("%2A", "*") + .Replace("%2B", "+") + .Replace("%2C", ",") + .Replace("%3B", ";") + .Replace("%3D", "=") + .Replace("%3A", ":") + .Replace("%40", "@") + .Replace("%2F", "/") + .Replace("%3F", "?"); + } + /// /// Split href to path, fragment and query /// @@ -352,9 +414,9 @@ public static string GetBookmark(string uid) var ch = char.ToLowerInvariant(uid[i]); switch (ch) { - case '"'or '\'' or '%' or '^' or '\\': + case '"' or '\'' or '%' or '^' or '\\': continue; - case '<'or '[': + case '<' or '[': sb.Append('('); break; case '>' or ']': diff --git a/src/docfx/template/TemplateEngine.cs b/src/docfx/template/TemplateEngine.cs index 73135c4a2d3..822f100e0cb 100644 --- a/src/docfx/template/TemplateEngine.cs +++ b/src/docfx/template/TemplateEngine.cs @@ -173,6 +173,8 @@ public void CopyAssetsToOutput(Output output, bool selfContained = true) templateMetadata["xrefs"] = pageModel["metadata"]!["xrefs"]; } + OverWriteCanonicalUrl(pageModel, templateMetadata); + if (JsonSchemaProvider.IsLandingData(mime)) { templateMetadata.Remove("conceptual"); @@ -192,6 +194,18 @@ public void CopyAssetsToOutput(Output output, bool selfContained = true) return (model, metadata); } + // overwrite canonical_url generated by JINT template + // remove the following code after docs.ui remove canonical_url generation logic + private static void OverWriteCanonicalUrl(JObject pageModel, JObject templateMetadata) + { + var canonicalUrlByDocFx = pageModel["canonical_url"] ?? pageModel["metadata"]?["canonical_url"]; + if (canonicalUrlByDocFx != null) + { + templateMetadata["canonical_url"] = canonicalUrlByDocFx; + templateMetadata["_op_canonicalUrl"] = canonicalUrlByDocFx; + } + } + private string ProcessHtml(FilePath file, string html) { var bookmarks = new HashSet(); diff --git a/test/docfx.Test/lib/UrlUtilityTest.cs b/test/docfx.Test/lib/UrlUtilityTest.cs index a6b368aaed5..f50fe11b310 100644 --- a/test/docfx.Test/lib/UrlUtilityTest.cs +++ b/test/docfx.Test/lib/UrlUtilityTest.cs @@ -217,4 +217,24 @@ public static void RemoveHostName(string url, string hostName, bool removeLocale var result = UrlUtility.RemoveLeadingHostName(url, hostName, removeLocale); Assert.Equal(expected, result); } + + [Theory] + [InlineData("/abc123-._~/def", "/abc123-._~/def")] + [InlineData("/en-us/-._~!$&'()*+,;=:@", "/en-us/-._~!$&'()*+,;=:@")] + [InlineData("/en-us/%[]", "/en-us/%25%5B%5D")] + public static void EscapeUrlPathTest(string path, string expected) + { + var result = UrlUtility.EscapeUrlPath(path); + Assert.Equal(expected, result); + } + + [Theory] + [InlineData("abc123-._~def", "abc123-._~def")] + [InlineData("-._~!$&'()*+,;=:@/?", "-._~!$&'()*+,;=:@/?")] + [InlineData("%[]", "%25%5B%5D")] + public static void EscapeUrlQueryTest(string queryOrFragment, string expected) + { + var result = UrlUtility.EscapeUrlQueryOrFragment(queryOrFragment); + Assert.Equal(expected, result); + } }