From 7c5132afecf42b916aa6b0c9aba03e7da21e2464 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Mon, 18 Nov 2024 08:06:35 +0100 Subject: [PATCH 01/22] Separate React 17 --- lib/docs/scrapers/react.rb | 47 ++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index eeececf621..7ef60cc19b 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -2,37 +2,40 @@ module Docs class React < UrlScraper self.name = 'React' self.type = 'simple' - self.release = '18.2.0' - self.base_url = 'https://reactjs.org/docs/' - self.root_path = 'hello-world.html' self.links = { - home: 'https://reactjs.org/', + home: 'https://react.dev/', code: 'https://github.com/facebook/react' } - html_filters.push 'react/entries', 'react/clean_html' + version '17' do + self.release = '17.0.2' + self.base_url = 'https://17.reactjs.org/docs/' + self.root_path = 'hello-world.html' + html_filters.push 'react/entries', 'react/clean_html' - options[:skip] = %w( - codebase-overview.html - design-principles.html - how-to-contribute.html - implementation-notes.html - ) + options[:skip] = %w( + codebase-overview.html + design-principles.html + how-to-contribute.html + implementation-notes.html + ) - options[:replace_paths] = { - 'more-about-refs.html' => 'refs-and-the-dom.html', - 'interactivity-and-dynamic-uis.html' => 'state-and-lifecycle.html', - 'working-with-the-browser.html' => 'refs-and-the-dom.html', - 'top-level-api.html' => 'react-api.html', - } + options[:replace_paths] = { + 'more-about-refs.html' => 'refs-and-the-dom.html', + 'interactivity-and-dynamic-uis.html' => 'state-and-lifecycle.html', + 'working-with-the-browser.html' => 'refs-and-the-dom.html', + 'top-level-api.html' => 'react-api.html', + } + + options[:attribution] = <<-HTML + © 2013–present Facebook Inc.
+ Licensed under the Creative Commons Attribution 4.0 International Public License. + HTML + end - options[:attribution] = <<-HTML - © 2013–present Facebook Inc.
- Licensed under the Creative Commons Attribution 4.0 International Public License. - HTML def get_latest_version(opts) - doc = fetch_doc('https://reactjs.org/docs/getting-started.html', opts) + doc = fetch_doc('https://react.dev/', opts) doc.at_css('a[href="/versions"]').content.strip[1..-1] end end From 75cce72b3f51307ae2f7ae642e06b333a039c1de Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 13:11:35 +0100 Subject: [PATCH 02/22] basic scraping for Reference --- lib/docs/filters/react/clean_html_react_dev.rb | 11 +++++++++++ lib/docs/filters/react/entries_react_dev.rb | 13 +++++++++++++ lib/docs/scrapers/react.rb | 14 +++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 lib/docs/filters/react/clean_html_react_dev.rb create mode 100644 lib/docs/filters/react/entries_react_dev.rb diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb new file mode 100644 index 0000000000..6e1e9f5cdd --- /dev/null +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -0,0 +1,11 @@ +module Docs + class React + class CleanHtmlReactDevFilter < Filter + def call + @doc = at_css('article') + + doc + end + end + end +end diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb new file mode 100644 index 0000000000..5807985de0 --- /dev/null +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -0,0 +1,13 @@ +module Docs + class React + class EntriesReactDevFilter < Docs::EntriesFilter + def get_name + at_css('article h1').content + end + + def get_type + return 'TODO add types' + end + end + end +end diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index 7ef60cc19b..02133b177e 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -7,6 +7,19 @@ class React < UrlScraper code: 'https://github.com/facebook/react' } + version do + self.release = '18.3.1' + # TODO add /learn + self.base_url = 'https://react.dev/reference' + + html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' + + options[:attribution] = <<-HTML + © 2013–present Facebook Inc.
+ Licensed under the Creative Commons Attribution 4.0 International Public License. + HTML + end + version '17' do self.release = '17.0.2' self.base_url = 'https://17.reactjs.org/docs/' @@ -33,7 +46,6 @@ class React < UrlScraper HTML end - def get_latest_version(opts) doc = fetch_doc('https://react.dev/', opts) doc.at_css('a[href="/versions"]').content.strip[1..-1] From 60c02cf69f65b8e1506cd701dd7917bf7363f7a4 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 14:00:20 +0100 Subject: [PATCH 03/22] remove stylings, breadcrumbs --- lib/docs/filters/react/clean_html_react_dev.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 6e1e9f5cdd..07c47726fc 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -4,6 +4,21 @@ class CleanHtmlReactDevFilter < Filter def call @doc = at_css('article') + # Remove breadcrumbs before h1 + css('h1').each do |node| + node.previous.remove + end + + # Remove prev-next links + css('div.grid > a').each do |node| + node.remove + end + + # Remove styling divs + css('div[class*="ps-0"]', 'div[class*="mx-"]', 'div[class*="px-"]', 'div[class=""]', 'div.cm-line').each do |node| + node.before(node.children).remove + end + doc end end From 6cff40e729360f3f883556bb438dbb54c09f956c Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 20:21:09 +0100 Subject: [PATCH 04/22] more styling removal --- .../filters/react/clean_html_react_dev.rb | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 07c47726fc..fb77fa144a 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -9,13 +9,27 @@ def call node.previous.remove end - # Remove prev-next links - css('div.grid > a').each do |node| + remove_selectors = [ + 'div.grid > a', # prev-next links + 'button', # "show more" etc. buttons + 'div.order-last', # code iframe containers + 'a[title="Open in CodeSandbox"]', # codesandbox links + ] + css(*remove_selectors).each do |node| node.remove end - # Remove styling divs - css('div[class*="ps-0"]', 'div[class*="mx-"]', 'div[class*="px-"]', 'div[class=""]', 'div.cm-line').each do |node| + # Remove recipe blocks - TODO transform to outgoing link to docs + css('h4[id^="examples-"]').each do |node| + node.parent.parent.parent.remove + end + + # Remove styling divs while lifting children + styling_prefixes = [ + 'ps-', 'mx-', 'my-', 'px-', 'py-', 'mb-', 'sp-', 'rounded-' + ] + selectors = styling_prefixes.map { |prefix| "div[class*=\"#{prefix}\"]" } + css(*selectors, 'div[class=""]', 'div.cm-line').each do |node| node.before(node.children).remove end From 42ec919cd8f0feaf07b34b82ee16d61da1fdf83d Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 20:28:11 +0100 Subject: [PATCH 05/22] syntax highlighting --- lib/docs/filters/react/clean_html_react_dev.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index fb77fa144a..dc9dcae1c2 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -33,6 +33,14 @@ def call node.before(node.children).remove end + # Syntax highlighting + css('pre br').each do |node| + node.replace("\n") + end + css('pre').each do |node| + node['data-language'] = 'jsx' + end + doc end end From 30ddc7bbaf106320090def89f40f952be7733574 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 20:36:04 +0100 Subject: [PATCH 06/22] sanitize titles - experimental tag --- lib/docs/filters/react/entries_react_dev.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 5807985de0..2264985bc8 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -2,7 +2,9 @@ module Docs class React class EntriesReactDevFilter < Docs::EntriesFilter def get_name - at_css('article h1').content + canary_copy = '- This feature is available in the latest Canary' + name = at_css('article h1').content + return name.sub(canary_copy, ' (experimental)') end def get_type From ef1cc8a8bbe4540a9cc201475f12bf4690d746ca Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 20:40:52 +0100 Subject: [PATCH 07/22] update icon --- public/icons/docs/react/16.png | Bin 495 -> 872 bytes public/icons/docs/react/16@2x.png | Bin 830 -> 1674 bytes public/icons/docs/react/SOURCE | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/icons/docs/react/16.png b/public/icons/docs/react/16.png index 2d384517115ac7b1e6ea4af427549d3fe34d01b1..d24cb4f7655c83c8ed5fa830839780edd75500c5 100644 GIT binary patch literal 872 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstU$g(vPY0F z14ES>14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a>%l@(4XEVL4Kex0|Isn2N83og1P8=rA(V=kID z-DO42{Ohbft1q=5wv;&3dU2IEOa3hD*$KRv{4;ZR%9R~V`TMV2en;Z(tUT^(%L5oK zcGdvRVodUOck#~TZVdn#T;l2L%Kn0tQAk~?Zr1Z?pwMwo7sn8Z%f9_D#TpYtT;H3v z2z_0s#G5u#HAPP8)n9p!__nA2?quIB{>I3VVsuj`N$<1~$3?~I&$(S4HN<|{xN~+b znW7P{^)#ik!*D`M$STvVs}|_EGHhJ;vTbFi+QqF_N+z?`mMy>i`g`@Y1Pdwu1gqJ% zQy%Jkz1no+^H%QJy6?W7dstC4_phXW_>CBi4|&Ibv;WBc_2~nj-=9Z^Q*5M8g~@0g zU-zDAkeUA@+FxQ)P{@>Yp#+!4ct}^7zv;}%ewZt`| zBqgyV)hf9t6-Y4{85o)98kp)D7={>HSQ%Sd8JTGt7+4t?IA2`ihoT`jKP5A*61N7U z9+^)-4dGQG5hX#19-8f8yaN4Aam!<$wB&=hGkt zW@T=@WNu+)VeiQz%)$yT4JLkOjJc9BO@XrA|fIpA|fIpA|fIpB1w~{EI>vrLPsq^NG(H2 zF-=uATxK_4X*y?gJ#l|QeT_nakw}oFBPJ$Fm8wgcuT-?pDmp$hQ(Q=rrZre%N|vij znXWclWlWs1P^iEuH#QWJGV_g*Q z23gSTM8)~_cn5Vtp{HY$=Nsrf3XV%U(`kqNITdiY8PBkM9UAOE1o;x#yYBlt00000 LNkvXXu0mjfzNW+s diff --git a/public/icons/docs/react/16@2x.png b/public/icons/docs/react/16@2x.png index 016a0c3c87d04e2e08cad9d60860f3068812290a..953ae4cc3aba0f0cc7cb93bdf5578d4caeb88bfd 100644 GIT binary patch literal 1674 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyEa{HEjtmSN`?>!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+081LM~KpAc7|f`}t`B9GjO zIC4As$er+`cf$|gi8y-4Y0rhwJ7>c7-EuzoAmYf~$iqN2cf&!-M$r%mfykqGamCdC z|NkYOqmzLVzqBOCFBnLp0A}r|1P2k$su$lrf4NX3Ui>wExr=q$&f~7BR?9@s?-Sw< zzx>WVgKfe@J6o30#Y^LJ-y3{fAh1Z^M8@LrkJYnguCX-!5V2(I{ZD@%-oEi*n_%3r zX;)KZC8ivSWxT}Y_-56`?|Yu#S*M`RuA;U@{#oeLgE~1g3g$jJK;wr}zG1 zmAa)W$>~Zosp|yS@!;`OGl8&KMyen85=Bx?|(GI;N<(k_Y5*ogKhoh#VyF+u-;#Q># zrmtqt`qFE~a3Sl81;2rSo8F$6YKQL_F@9IOc!t@}lQbfnoJp zSpy5@P4^2etllizoKbi^?J1*M_3{btCRy)}%{#w0Z}~^N+`0#*PI{ekE7xy7e_y{^ zf`g4&F>=4X$%Dz?<}6|S@S<>r;vxTeLCwyBGW|SP1AnET|Laq6gYo%=$}N$XZ6B5H zt+0G$SQvibiJYm&u|1DBR+Nft$T+{c#l~o1o$Z7fEESV$v>9jk?3o?pKjD;1R#d~) zb9?-m&)j2OC7#SFu=o*;n8W@HcT38udS{a#X8yHv_ z7&u>C;)kLkH$NpatrE8eqaK-0Kn>wlArU1(iRB6fMfqu&IjIUIl?AB^nFS@loLw>J z@h2XR!Y~buQ~syVcs>ncU{>bVOXe0<7WSSj!Yr)d(qM8pg;{xXh{EX`S56!`b42C{ f`{@Rc1zvg#ufzpQJ~^3!au|cBtDnm{r-UW|tMN4s delta 808 zcmV+@1K0eD4Za4DBYy!wP)t-sA|fIoAt53nA^-pYA|fIpA|fClAR;0nA|fIpA|hYZ z^jN>#SijptgOy2>s9nzOSisz0)%9T5_%BINR=UN9RpduqAConQpwa`9wgk8_>KzWE=&Fo2(s8FfFRI|+_Cn#FSftHfZ}_%TmcE=5dBn66vN=U>zEJ#m0lxYI*| zlT4knT+HfKw|~+iBO_M2)i6p@MTnX)O;kpVpFMDXJ8O78bb>&7ieT3FJa2tasK8LF z!bFCbUeWMKkfba>L`adPCND8kugEV*PDO{AKzWHVovC~P000SeQchE?C+`69^C%_q z3{BU70004_Nkl?x2XmY?`X(A?`CHgL=7OEk>ENChmTw& zKNGYyoPVgHDtGMmH|{3@oa8k?!sh9o2ZKRHQIGSk$F}!BaGBk)MWV;TaGzxRL*Y0Y zvF&6p88E0xxsZ4;;!lCpMg;bHDj{zZY`UU3_skp8RI z5R(oc)f%2i4cn}SMHd|Ff@58La1K;)b&@SM$rhR9^0#=k+dEv9zQt!>Q!H!uL2siJ zosFV9*-PR~3V-P>+evpr@D+LP8sM7aiaw#C-D*fU@I}l|5`XLhI}*SEGpv;5{jX Date: Sun, 1 Dec 2024 20:51:49 +0100 Subject: [PATCH 08/22] add categories --- lib/docs/filters/react/entries_react_dev.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 2264985bc8..0c49a6d8cb 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -4,11 +4,12 @@ class EntriesReactDevFilter < Docs::EntriesFilter def get_name canary_copy = '- This feature is available in the latest Canary' name = at_css('article h1').content - return name.sub(canary_copy, ' (experimental)') + return name.sub(canary_copy, ' (Experimental)') end def get_type - return 'TODO add types' + breadcrumb_nodes = css('a.tracking-wide') + return breadcrumb_nodes.last.content || 'Miscellaneous' end end end From fefde4e15bf2a77d35cb58912c74659e4659b117 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 22:49:46 +0100 Subject: [PATCH 09/22] add Learn pages --- lib/docs/filters/react/clean_html_react_dev.rb | 4 +++- lib/docs/filters/react/entries_react_dev.rb | 7 +++++-- lib/docs/scrapers/react.rb | 10 ++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index dc9dcae1c2..954dd9b37c 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -6,7 +6,9 @@ def call # Remove breadcrumbs before h1 css('h1').each do |node| - node.previous.remove + if (node.previous) + node.previous.remove + end end remove_selectors = [ diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 0c49a6d8cb..0fda0c5aba 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -4,12 +4,15 @@ class EntriesReactDevFilter < Docs::EntriesFilter def get_name canary_copy = '- This feature is available in the latest Canary' name = at_css('article h1').content - return name.sub(canary_copy, ' (Experimental)') + return name.sub(canary_copy, ' (Canary)') end def get_type breadcrumb_nodes = css('a.tracking-wide') - return breadcrumb_nodes.last.content || 'Miscellaneous' + category = breadcrumb_nodes.last.content + is_learn_page = base_url.to_s.end_with?('learn') + prefix = is_learn_page ? 'Learn: ' : '' + return prefix + (category || 'Miscellaneous') end end end diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index 02133b177e..a24798da7e 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -1,5 +1,7 @@ module Docs class React < UrlScraper + include MultipleBaseUrls + self.name = 'React' self.type = 'simple' self.links = { @@ -9,8 +11,12 @@ class React < UrlScraper version do self.release = '18.3.1' - # TODO add /learn - self.base_url = 'https://react.dev/reference' + host = 'https://react.dev' + self.base_urls = [ + "#{host}/reference", + "#{host}/learn", + ] + self.initial_paths = %w(/react) html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' From 33aba064c175c61274c329033cc64cc39b85aef8 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 23:18:53 +0100 Subject: [PATCH 10/22] use single base URL for working crosslinks --- lib/docs/filters/react/entries_react_dev.rb | 2 +- lib/docs/scrapers/react.rb | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 0fda0c5aba..358d6a8d36 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -10,7 +10,7 @@ def get_name def get_type breadcrumb_nodes = css('a.tracking-wide') category = breadcrumb_nodes.last.content - is_learn_page = base_url.to_s.end_with?('learn') + is_learn_page = path.start_with?('learn/') prefix = is_learn_page ? 'Learn: ' : '' return prefix + (category || 'Miscellaneous') end diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index a24798da7e..a4f0e0fe74 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -1,6 +1,5 @@ module Docs class React < UrlScraper - include MultipleBaseUrls self.name = 'React' self.type = 'simple' @@ -11,15 +10,13 @@ class React < UrlScraper version do self.release = '18.3.1' - host = 'https://react.dev' - self.base_urls = [ - "#{host}/reference", - "#{host}/learn", - ] - self.initial_paths = %w(/react) + self.base_url = 'https://react.dev' + self.initial_paths = %w(/reference/react /learn) html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' + options[:only_patterns] = [/\A\/learn/, /\A\/reference/] + options[:attribution] = <<-HTML © 2013–present Facebook Inc.
Licensed under the Creative Commons Attribution 4.0 International Public License. From 7be6b3e7c462526d351e0f92d2683facb7ba5b53 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Sun, 1 Dec 2024 23:19:00 +0100 Subject: [PATCH 11/22] strip styling --- lib/docs/filters/react/clean_html_react_dev.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 954dd9b37c..2a5ce8a2a8 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -43,6 +43,9 @@ def call node['data-language'] = 'jsx' end + # Remove styling + css('*').remove_attr('class').remove_attr('style') + doc end end From 2e1f0a046292f6eb087fc6421e633044a8fd2331 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Mon, 2 Dec 2024 08:07:04 +0100 Subject: [PATCH 12/22] Address top-level page categories --- lib/docs/filters/react/entries_react_dev.rb | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 358d6a8d36..6979448980 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -2,17 +2,27 @@ module Docs class React class EntriesReactDevFilter < Docs::EntriesFilter def get_name - canary_copy = '- This feature is available in the latest Canary' name = at_css('article h1').content - return name.sub(canary_copy, ' (Canary)') + return update_canary_copy(name) end def get_type breadcrumb_nodes = css('a.tracking-wide') - category = breadcrumb_nodes.last.content + is_top_level_page = breadcrumb_nodes.length == 1 + category = if is_top_level_page + # Category is the opened category in the sidebar + css('aside a.text-link div').first.content + else + breadcrumb_nodes.last.content + end is_learn_page = path.start_with?('learn/') prefix = is_learn_page ? 'Learn: ' : '' - return prefix + (category || 'Miscellaneous') + return update_canary_copy(prefix + (category || 'Miscellaneous')) + end + + def update_canary_copy(string) + canary_copy = '- This feature is available in the latest Canary' + return string.sub(canary_copy, ' (Canary)') end end end From 7a510f6d8b127dd696492992b5079b12b8ef43c9 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Tue, 10 Dec 2024 21:56:00 +0100 Subject: [PATCH 13/22] remove dark images --- lib/docs/filters/react/clean_html_react_dev.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 2a5ce8a2a8..3371089aca 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -13,14 +13,18 @@ def call remove_selectors = [ 'div.grid > a', # prev-next links - 'button', # "show more" etc. buttons - 'div.order-last', # code iframe containers - 'a[title="Open in CodeSandbox"]', # codesandbox links + 'button', # "show more" etc. buttons + 'div.order-last', # code iframe containers + 'div.dark-image', # dark images + 'a[title="Open in CodeSandbox"]', # codesandbox links ] css(*remove_selectors).each do |node| node.remove end + # Fix images not loading + css('img').remove_attr('srcset') + # Remove recipe blocks - TODO transform to outgoing link to docs css('h4[id^="examples-"]').each do |node| node.parent.parent.parent.remove From 94a7aa3c3447918caa94ddbad73a87eb8a686184 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Tue, 10 Dec 2024 23:13:26 +0100 Subject: [PATCH 14/22] use better top-level categories --- .../filters/react/clean_html_react_dev.rb | 4 +--- lib/docs/filters/react/entries_react_dev.rb | 22 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 3371089aca..f8a27bcc78 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -31,9 +31,7 @@ def call end # Remove styling divs while lifting children - styling_prefixes = [ - 'ps-', 'mx-', 'my-', 'px-', 'py-', 'mb-', 'sp-', 'rounded-' - ] + styling_prefixes = %w[ps- mx- my- px- py- mb- sp- rounded-] selectors = styling_prefixes.map { |prefix| "div[class*=\"#{prefix}\"]" } css(*selectors, 'div[class=""]', 'div.cm-line').each do |node| node.before(node.children).remove diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 6979448980..59dcd33438 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -2,21 +2,21 @@ module Docs class React class EntriesReactDevFilter < Docs::EntriesFilter def get_name - name = at_css('article h1').content + name = at_css('article h1')&.content return update_canary_copy(name) end def get_type - breadcrumb_nodes = css('a.tracking-wide') - is_top_level_page = breadcrumb_nodes.length == 1 - category = if is_top_level_page - # Category is the opened category in the sidebar - css('aside a.text-link div').first.content - else - breadcrumb_nodes.last.content - end - is_learn_page = path.start_with?('learn/') - prefix = is_learn_page ? 'Learn: ' : '' + # Category is the opened category in the sidebar + category = css('a:has(> span.text-link) > div').first&.content + # The grey category in the sidebar + top_category = css('h3:has(~ li a.text-link)') + .last&.content + &.sub(/@.*$/, '') # remove version tag + &.sub(/^./, &:upcase) # capitalize first letter + &.concat(": ") + is_learn_page = path.start_with?('learn/') || slug == 'learn' + prefix = is_learn_page ? 'Learn: ' : top_category return update_canary_copy(prefix + (category || 'Miscellaneous')) end From afd18f66af52e9dd0e51ab4834ea276d5ddad51b Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:22:23 +0100 Subject: [PATCH 15/22] treat properly callouts --- assets/stylesheets/application.css.scss | 1 + assets/stylesheets/pages/_react.scss | 20 ++++++++++++++ docs/adding-docs.md | 2 +- .../filters/react/clean_html_react_dev.rb | 27 ++++++++++++++++--- lib/docs/filters/react/entries_react_dev.rb | 2 +- lib/docs/scrapers/react.rb | 2 +- 6 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 assets/stylesheets/pages/_react.scss diff --git a/assets/stylesheets/application.css.scss b/assets/stylesheets/application.css.scss index dc2ee648da..3823cdc3d8 100644 --- a/assets/stylesheets/application.css.scss +++ b/assets/stylesheets/application.css.scss @@ -109,6 +109,7 @@ 'pages/qt', 'pages/ramda', 'pages/rdoc', + 'pages/react', 'pages/react_native', 'pages/reactivex', 'pages/redis', diff --git a/assets/stylesheets/pages/_react.scss b/assets/stylesheets/pages/_react.scss new file mode 100644 index 0000000000..e9f79b8487 --- /dev/null +++ b/assets/stylesheets/pages/_react.scss @@ -0,0 +1,20 @@ +._react { + @extend %simple; + + .note { + @extend %note; + } + + .note-orange { + @extend %note-orange; + } + + .note-blue { + @extend %note-blue; + } + + .note-green { + @extend %note-green; + } + +} diff --git a/docs/adding-docs.md b/docs/adding-docs.md index 971b91bece..5051d5ee31 100644 --- a/docs/adding-docs.md +++ b/docs/adding-docs.md @@ -13,7 +13,7 @@ Adding a documentation may look like a daunting task but once you get the hang o 6. Generate the full documentation using the `thor docs:generate [my_doc] --force` command. Additionally, you can use the `--verbose` option to see which files are being created/updated/deleted (useful to see what changed since the last run), and the `--debug` option to see which URLs are being requested and added to the queue (useful to pin down which page adds unwanted URLs to the queue). 7. Start the server, open the app, enable the documentation, and see how everything plays out. 8. Tweak the scraper/filters and repeat 5) and 6) until the pages and metadata are ok. -9. To customize the pages' styling, create an SCSS file in the `assets/stylesheets/pages/` directory and import it in both `application.css.scss` AND `application-dark.css.scss`. Both the file and CSS class should be named `_[type]` where [type] is equal to the scraper's `type` attribute (documentations with the same type share the same custom CSS and JS). Setting the type to `simple` will apply the general styling rules in `assets/stylesheets/pages/_simple.scss`, which can be used for documentations where little to no CSS changes are needed. +9. To customize the pages' styling, create an SCSS file in the `assets/stylesheets/pages/` directory and import it in `application.css.scss`. Both the file and CSS class should be named `_[type]` where [type] is equal to the scraper's `type` attribute (documentations with the same type share the same custom CSS and JS). Setting the type to `simple` will apply the general styling rules in `assets/stylesheets/pages/_simple.scss`, which can be used for documentations where little to no CSS changes are needed. 10. To add syntax highlighting or execute custom JavaScript on the pages, create a file in the `assets/javascripts/views/pages/` directory (take a look at the other files to see how it works). 11. Add the documentation's icon in the `public/icons/docs/[my_doc]/` directory, in both 16x16 and 32x32-pixels formats. The icon spritesheet is automatically generated when you (re)start your local DevDocs instance. 12. Add the documentation's copyright details to `options[:attribution]`. This is the data shown in the table on the [about](https://devdocs.io/about) page, and is ordered alphabetically. Please see an existing scraper for the typesetting. diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index f8a27bcc78..77c2ae9f65 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -30,9 +30,30 @@ def call node.parent.parent.parent.remove end + # Transform callout blocks + class_transform = { + '.expandable-callout[class*=yellow]' => 'note note-orange', # pitfalls, experimental + '.expandable-callout[class*=green]' => 'note note-green', # note + '.bg-card' => 'note', # you will learn + 'details' => 'note note-blue' # deep dive + } + + class_transform.each do |old_class, new_class| + css(old_class).each do |node| + node.set_attribute('class', new_class) + end + end + + # Transform h3 to h4 inside callouts + css('.note h3').each do |node| + new_node = Nokogiri::XML::Node.new('h4', @doc) + new_node.content = node.content + node.replace(new_node) + end + # Remove styling divs while lifting children styling_prefixes = %w[ps- mx- my- px- py- mb- sp- rounded-] - selectors = styling_prefixes.map { |prefix| "div[class*=\"#{prefix}\"]" } + selectors = styling_prefixes.map { |prefix| "div[class*=\"#{prefix}\"]:not(.note)" } css(*selectors, 'div[class=""]', 'div.cm-line').each do |node| node.before(node.children).remove end @@ -45,8 +66,8 @@ def call node['data-language'] = 'jsx' end - # Remove styling - css('*').remove_attr('class').remove_attr('style') + # Remove styling except for callouts and images + css('*:not([class*=image]):not(.note)').remove_attr('class').remove_attr('style') doc end diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 59dcd33438..80b5aad064 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -16,7 +16,7 @@ def get_type &.sub(/^./, &:upcase) # capitalize first letter &.concat(": ") is_learn_page = path.start_with?('learn/') || slug == 'learn' - prefix = is_learn_page ? 'Learn: ' : top_category + prefix = is_learn_page ? 'Learn: ' : top_category || '' return update_canary_copy(prefix + (category || 'Miscellaneous')) end diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index a4f0e0fe74..d89cf6cb28 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -2,7 +2,7 @@ module Docs class React < UrlScraper self.name = 'React' - self.type = 'simple' + self.type = 'react' self.links = { home: 'https://react.dev/', code: 'https://github.com/facebook/react' From 70c5489f52fb29a7b280f817baff9cde874b3952 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:45:43 +0100 Subject: [PATCH 16/22] add search entries --- lib/docs/filters/react/entries_react_dev.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 80b5aad064..f3efff4bf3 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -24,6 +24,18 @@ def update_canary_copy(string) canary_copy = '- This feature is available in the latest Canary' return string.sub(canary_copy, ' (Canary)') end + + def additional_entries + excluded = %w(usage troubleshooting recap reference challenges alternatives) + entries = [] + css('article h2, article h3').map do |node| + next unless node.has_attribute?('id') + name = node.content.strip + next if excluded.include?(name.downcase) + entries << [name, node['id'], type] + end + return entries + end end end end From ceaf48b4ba568bb711f02e4e647d718ee53c9ec4 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:48:27 +0100 Subject: [PATCH 17/22] fix Canary callouts --- lib/docs/filters/react/clean_html_react_dev.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 77c2ae9f65..28cff5a4f8 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -34,6 +34,7 @@ def call class_transform = { '.expandable-callout[class*=yellow]' => 'note note-orange', # pitfalls, experimental '.expandable-callout[class*=green]' => 'note note-green', # note + '.expandable-callout[class*=gray]' => 'note', # canary '.bg-card' => 'note', # you will learn 'details' => 'note note-blue' # deep dive } From d6df6b5135bcbc33da292672d41fb0ddc66e8a64 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:50:04 +0100 Subject: [PATCH 18/22] nice callout margins --- assets/stylesheets/pages/_react.scss | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/assets/stylesheets/pages/_react.scss b/assets/stylesheets/pages/_react.scss index e9f79b8487..6bb32bdfe2 100644 --- a/assets/stylesheets/pages/_react.scss +++ b/assets/stylesheets/pages/_react.scss @@ -3,6 +3,10 @@ .note { @extend %note; + h4 { + margin-top: .25rem; + margin-bottom: .5rem; + } } .note-orange { From d8eb56811e2abe3dfbcbc18921f7ee6e1adc8ed5 Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:51:10 +0100 Subject: [PATCH 19/22] fix warnings --- lib/docs/filters/react/clean_html_react_dev.rb | 2 +- lib/docs/filters/react/entries_react_dev.rb | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index 28cff5a4f8..b593b09f3f 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -6,7 +6,7 @@ def call # Remove breadcrumbs before h1 css('h1').each do |node| - if (node.previous) + if node.previous node.previous.remove end end diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index f3efff4bf3..39edd7c987 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -3,7 +3,8 @@ class React class EntriesReactDevFilter < Docs::EntriesFilter def get_name name = at_css('article h1')&.content - return update_canary_copy(name) + + update_canary_copy(name) end def get_type @@ -17,12 +18,14 @@ def get_type &.concat(": ") is_learn_page = path.start_with?('learn/') || slug == 'learn' prefix = is_learn_page ? 'Learn: ' : top_category || '' - return update_canary_copy(prefix + (category || 'Miscellaneous')) + + update_canary_copy(prefix + (category || 'Miscellaneous')) end def update_canary_copy(string) canary_copy = '- This feature is available in the latest Canary' - return string.sub(canary_copy, ' (Canary)') + + string.sub(canary_copy, ' (Canary)') end def additional_entries @@ -34,7 +37,8 @@ def additional_entries next if excluded.include?(name.downcase) entries << [name, node['id'], type] end - return entries + + entries end end end From e67408b63ef6ebf32c6e271c616567300a3e49bd Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 00:55:37 +0100 Subject: [PATCH 20/22] remove additional entries --- lib/docs/filters/react/entries_react_dev.rb | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/lib/docs/filters/react/entries_react_dev.rb b/lib/docs/filters/react/entries_react_dev.rb index 39edd7c987..ce6da0e056 100644 --- a/lib/docs/filters/react/entries_react_dev.rb +++ b/lib/docs/filters/react/entries_react_dev.rb @@ -27,19 +27,6 @@ def update_canary_copy(string) string.sub(canary_copy, ' (Canary)') end - - def additional_entries - excluded = %w(usage troubleshooting recap reference challenges alternatives) - entries = [] - css('article h2, article h3').map do |node| - next unless node.has_attribute?('id') - name = node.content.strip - next if excluded.include?(name.downcase) - entries << [name, node['id'], type] - end - - entries - end end end end From 0a1b28762201db26f836372db08f27a276d0fdef Mon Sep 17 00:00:00 2001 From: Gergely Gombos Date: Wed, 11 Dec 2024 01:00:01 +0100 Subject: [PATCH 21/22] fix Ready to learn --- lib/docs/filters/react/clean_html_react_dev.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/filters/react/clean_html_react_dev.rb b/lib/docs/filters/react/clean_html_react_dev.rb index b593b09f3f..1ad201cd34 100644 --- a/lib/docs/filters/react/clean_html_react_dev.rb +++ b/lib/docs/filters/react/clean_html_react_dev.rb @@ -46,7 +46,7 @@ def call end # Transform h3 to h4 inside callouts - css('.note h3').each do |node| + css('.note h3', '.note h2').each do |node| new_node = Nokogiri::XML::Node.new('h4', @doc) new_node.content = node.content node.replace(new_node) From 7d2643260d1b9623624e454e8ae70603602230a3 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Thu, 12 Dec 2024 08:53:09 +0100 Subject: [PATCH 22/22] Update React documentation (19) --- lib/docs/scrapers/react.rb | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/docs/scrapers/react.rb b/lib/docs/scrapers/react.rb index d89cf6cb28..9bc1249cd5 100644 --- a/lib/docs/scrapers/react.rb +++ b/lib/docs/scrapers/react.rb @@ -8,19 +8,27 @@ class React < UrlScraper code: 'https://github.com/facebook/react' } + options[:attribution] = <<-HTML + © 2013–present Facebook Inc.
+ Licensed under the Creative Commons Attribution 4.0 International Public License. + HTML + version do - self.release = '18.3.1' + self.release = '19' self.base_url = 'https://react.dev' self.initial_paths = %w(/reference/react /learn) - html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' options[:only_patterns] = [/\A\/learn/, /\A\/reference/] + end + + version '18' do + self.release = '18.3.1' + self.base_url = 'https://18.react.dev' + self.initial_paths = %w(/reference/react /learn) + html_filters.push 'react/entries_react_dev', 'react/clean_html_react_dev' - options[:attribution] = <<-HTML - © 2013–present Facebook Inc.
- Licensed under the Creative Commons Attribution 4.0 International Public License. - HTML + options[:only_patterns] = [/\A\/learn/, /\A\/reference/] end version '17' do @@ -42,11 +50,6 @@ class React < UrlScraper 'working-with-the-browser.html' => 'refs-and-the-dom.html', 'top-level-api.html' => 'react-api.html', } - - options[:attribution] = <<-HTML - © 2013–present Facebook Inc.
- Licensed under the Creative Commons Attribution 4.0 International Public License. - HTML end def get_latest_version(opts)