diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 33cac6156a1..d33bfb2719c 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -2,7 +2,7 @@ ## Linter GitHub Actions ## ########################### # -# Documentation: https://github.com/github/super-linter/ +# Documentation: https://github.com/super-linter/super-linter/ # # Exception config files are in the .github/linters directory # @@ -27,11 +27,11 @@ jobs: # Only run the full workflow for manual runs or if upgrading the super linter if: | github.event_name != 'workflow_dispatch' && - startsWith(github.event.pull_request.title,'Bump github/super-linter') != true + startsWith(github.event.pull_request.title,'Bump super-linter/super-linter') != true run: | echo "VALIDATE_ALL_CODEBASE=false" >> $GITHUB_ENV - name: Lint Code Base - uses: github/super-linter/slim@v4.10.1 + uses: super-linter/super-linter/slim@v7.2.0 env: DEFAULT_BRANCH: main GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test_website.yml b/.github/workflows/test_website.yml index a6489abf9f4..6dcec52924c 100644 --- a/.github/workflows/test_website.yml +++ b/.github/workflows/test_website.yml @@ -40,7 +40,7 @@ jobs: - name: Use more complete checks for generated HTML linting run: cp -f .github/linters/.htmlhintrc_morechecks .github/linters/.htmlhintrc - name: Lint Generated HTML - uses: github/super-linter/slim@v4.10.1 + uses: super-linter/super-linter/slim@v7.2.0 env: DEFAULT_BRANCH: main FILTER_REGEX_INCLUDE: src/static/html/.* diff --git a/sql/.sqlfluff b/sql/.sqlfluff index c2b723a778e..03673851633 100644 --- a/sql/.sqlfluff +++ b/sql/.sqlfluff @@ -1,65 +1,33 @@ [sqlfluff] -# verbose is an integer (0-2) indicating the level of log output -verbose = 0 -# Turn off color formatting of output -nocolor = False -# Supported dialects https://docs.sqlfluff.com/en/stable/dialects.html -# Or run 'sqlfluff dialects' dialect = bigquery -# One of [raw|jinja|python|placeholder] -templater = jinja -# Comma separated list of rules to check, default to all -rules = all # Comma separated list of rules to exclude, or None -exclude_rules = L011,L014,L015,L016,L020,L022,L026,L027,L028,L029,L030,L031,L032,L034,L035,L036,L037,L042,L043,L051,L060,L071 -# L011 - We don't always alias tables with AS ("FROM table1 AS tb1" instead of "FROM table1 tb1"). Do for columns but not for tables. -# L014 - Unquoted identifiers (e.g. column names) will be mixed case so don't enforce case -# L015 - Sometimes clearer to include brackets for complex COUNT(DISTINCT) cases -# L016 - We allow longer lines as some of our queries are complex. Maybe should limit in future? -# L020 - Asks for unique table aliases meaning it complains if selecting from two 2021_07_01 tables as implicit alias is table name (not fully qualified) so same. -# L022 - CTEs may be chained and do not require a blank line separator, only the last one. -# L026 - BigQuery uses STRUCTS which can look like incorrect table references -# L027 - Asks for qualified columns for ambiguous ones, but we not qualify our columns, and they are not really ambiguous (or BigQuery would complain) -# L028 - Insists on references in column names even if not ambiguous. Bit OTT. -# L029 - Avoids keywords as identifiers but we use this a lot (e.g. AS count, AS max...etc.) -# L030 - Function names will be mixed case so don't enforce case -# L031 - Avoid aliases in from and join - why? -# L032 - Uses joins instead of USING - why? -# L034 - Insists on wildcards (*) in certain SELECT order - why? -# L035 - Do not use ELSE NULL as redundant. But it's clearer!? -# L036 - Select targets should be on new lines but sub clauses don't always obey this. Maybe revisit in future? -# L037 - if using DESC in one ORDER BY column, then insist on ASC/DESC for all. -# L042 - Use CTEs instead of subqueries. We don't use this consistently and big rewrite to do that. -# L043 - Use coalesce instead of case if you can. But it's clearer!? -# L051 - INNER JOIN must be fully qualified. Probably should use this but not our style. -# L060 - Use COALESCE instead of IFNULL or NVL. We think ISNULL is clearer. -# L071 - Turn off until https://github.com/sqlfluff/sqlfluff/issues/4188 is addressed -# The depth to recursively parse to (0 for unlimited) -recurse = 0 -# Below controls SQLFluff output, see max_line_length for SQL output -output_line_length = 80 -# Number of passes to run before admitting defeat -runaway_limit = 10 -# Ignore errors by category (one or more of the following, separated by commas: lexing,linting,parsing,templating) -ignore = None -# Ignore linting errors found within sections of code coming directly from -# templated code (e.g. from within Jinja curly braces. Note that it does not -# ignore errors from literal code found within template loops. -ignore_templated_areas = True -# can either be autodetect or a valid encoding e.g. utf-8, utf-8-sig -encoding = autodetect -# Ignore inline overrides (e.g. to test if still required) -disable_noqa = False -# Comma separated list of file extensions to lint -# NB: This config will only apply in the root folder -sql_file_exts = .sql,.sql.j2,.dml,.ddl -# Allow fix to run on files, even if they contain parsing errors -# Note altering this is NOT RECOMMENDED as can corrupt SQL -fix_even_unparsable = False -# Very large files can make the parser effectively hang. -# This limit skips files over a certain character length -# and warns the user what has happened. -# Set this to 0 to disable. +exclude_rules = AL01,AL04,AL07,AL09,AM03,AM05,AM08,CP02,CP03,CV02,CV12,LT05,LT09,LT14,RF01,RF02,RF03,RF04,ST01,ST02,ST05,ST06,ST07,ST08,ST11 +# AL01 - We don't always alias tables with AS ("FROM table1 AS tb1" instead of "FROM table1 tb1"). Do for columns but not for tables. +# AL04 - Asks for unique table aliases meaning it complains if selecting from two 2021_07_01 tables as implicit alias is table name (not fully qualified) so same. +# AL07 - Avoid aliases in from and join - why? +# AL09 - Allow self-aliasing, particularly for struct references +# AM03 - if using DESC in one ORDER BY column, then insist on ASC/DESC for all. +# AM05 - INNER JOIN must be fully qualified. Probably should use this but not our style. +# AM08 - CROSS JOIN detection. Doesn't work with UNNEST +# CP02 - Unquoted identifiers (e.g. column names) will be mixed case so don't enforce case +# CP03 - Function names will be mixed case so don't enforce case +# CV02 - Use COALESCE instead of IFNULL or NVL. We think ISNULL is clearer. +# CV12 - Doesn't work with UNNEST +# LT05 - We allow longer lines as some of our queries are complex. Maybe should limit in future? +# LT09 - Select targets should be on new lines but sub clauses don't always obey this. Maybe revisit in future? +# LT14 - We have some simple, single line joins +# RF01 - BigQuery uses STRUCTS which can look like incorrect table references +# RF02 - Asks for qualified columns for ambiguous ones, but we not qualify our columns, and they are not really ambiguous (or BigQuery would complain) +# RF03 - Insists on references in column names even if not ambiguous. Bit OTT. +# RF04 - Avoids keywords as identifiers but we use this a lot (e.g. AS count, AS max...etc.) +# ST01 - Do not use ELSE NULL as redundant. But it's clearer!? +# ST02 - Use coalesce instead of case if you can. But it's clearer!? +# ST05 - Use CTEs instead of subqueries. We don't use this consistently and big rewrite to do that. +# ST06 - Insists on wildcards (*) in certain SELECT order - why? +# ST07 - Uses joins instead of USING - why? +# ST08 - Sometimes clearer to include brackets for complex COUNT(DISTINCT) cases +# ST11 - Doesn't consider wildcards in SELECT. Issue: https://github.com/sqlfluff/sqlfluff/issues/6511 + large_file_skip_byte_limit = 40000 # CPU processes to use while linting. # If positive, just implies number of processes. @@ -68,202 +36,23 @@ large_file_skip_byte_limit = 40000 processes = -1 [sqlfluff:indentation] -# See https://docs.sqlfluff.com/en/stable/indentation.html -indented_joins = False -indented_ctes = False +# See https://docs.sqlfluff.com/en/stable/layout.html#configuring-indent-locations indented_using_on = False -indented_on_contents = True -template_blocks_indent = True - -# Layout configuration -# See https://docs.sqlfluff.com/en/stable/layout.html#configuring-layout-and-spacing -[sqlfluff:layout:type:comma] -spacing_before = touch -line_position = trailing +tab_space_size = 2 +allow_implicit_indents = True [sqlfluff:layout:type:binary_operator] line_position = trailing -[sqlfluff:layout:type:statement_terminator] -spacing_before = touch -line_position = trailing - -[sqlfluff:layout:type:end_of_file] -spacing_before = touch - -[sqlfluff:layout:type:set_operator] -line_position = alone:strict - -[sqlfluff:layout:type:start_bracket] -spacing_after = touch - -[sqlfluff:layout:type:end_bracket] -spacing_before = touch - -[sqlfluff:layout:type:start_square_bracket] -spacing_after = touch - -[sqlfluff:layout:type:end_square_bracket] -spacing_before = touch - -[sqlfluff:layout:type:casting_operator] -spacing_before = touch -spacing_after = touch - -[sqlfluff:layout:type:comparison_operator] -spacing_within = touch -line_position = leading - -[sqlfluff:layout:type:object_reference] -spacing_within = inline - -[sqlfluff:layout:type:comment] -spacing_before = any -spacing_after = any - -# Template loop tokens shouldn't dictate spacing around them. -[sqlfluff:layout:type:template_loop] -spacing_before = any -spacing_after = any - -[sqlfluff:templater] -unwrap_wrapped_queries = True - -[sqlfluff:templater:jinja] -apply_dbt_builtins = True - -# Some rules can be configured directly from the config common to other rules -[sqlfluff:rules] -tab_space_size = 2 -max_line_length = 80 -indent_unit = space -allow_scalar = True -single_table_references = consistent -unquoted_identifiers_policy = all - -# Some rules have their own specific config. -[sqlfluff:rules:L003] -hanging_indents = True - -[sqlfluff:rules:L010] -# Keywords +[sqlfluff:rules:capitalisation.keywords] capitalisation_policy = upper -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L011] -# Aliasing preference for tables -aliasing = explicit - -[sqlfluff:rules:L012] -# Aliasing preference for columns -aliasing = explicit -[sqlfluff:rules:L014] -# Unquoted identifiers -extended_capitalisation_policy = consistent -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L016] -# Line length -ignore_comment_lines = False -ignore_comment_clauses = False - -[sqlfluff:rules:L027] -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L026] -# References must be in FROM clause -# Disabled for some dialects (e.g. bigquery) -force_enable = False - -[sqlfluff:rules:L028] -# References must be consistently used -# Disabled for some dialects (e.g. bigquery) -force_enable = False - -[sqlfluff:rules:L029] -# Keywords should not be used as identifiers. -unquoted_identifiers_policy = aliases -quoted_identifiers_policy = none -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L030] -# Function names -extended_capitalisation_policy = consistent -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L031] -# Avoid table aliases in from clauses and join conditions. -# Disabled for some dialects (e.g. bigquery) -force_enable = False - -[sqlfluff:rules:L036] -wildcard_policy = single - -[sqlfluff:rules:L038] -# Trailing commas -select_clause_trailing_comma = forbid - -[sqlfluff:rules:L040] -# Null & Boolean Literals -capitalisation_policy = consistent -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L042] -# By default, allow subqueries in from clauses, but not join clauses -forbid_subquery_in = join - -[sqlfluff:rules:L047] -# Consistent syntax to count all rows -prefer_count_1 = False -prefer_count_0 = True - -[sqlfluff:rules:L051] -# Fully qualify JOIN clause -fully_qualify_join_types = inner - -[sqlfluff:rules:L052] -# Semi-colon formatting approach -multiline_newline = False -require_final_semicolon = False - -[sqlfluff:rules:L054] -# GROUP BY/ORDER BY column references -group_by_and_order_by_style = consistent - -[sqlfluff:rules:L057] -## Special characters in identifiers -unquoted_identifiers_policy = all -quoted_identifiers_policy = all -allow_space_in_identifier = False -additional_allowed_characters = "-." -ignore_words = None -ignore_words_regex = None - -[sqlfluff:rules:L059] -# Policy on quoted and unquoted identifiers -prefer_quoted_identifiers = False -ignore_words = None -ignore_words_regex = None -force_enable = False +[sqlfluff:rules:capitalisation.types] +extended_capitalisation_policy = upper -[sqlfluff:rules:L062] -# Comma separated list of blocked words that should not be used -blocked_words = None +[sqlfluff:rules:convention.blocked_words] # Regex of blocked SQL that should not be used. -# Can be overridden with `-- noqa: L062` for those chapters using secondary pages +# Can be overridden with `-- noqa: CV09` for those chapters using secondary pages # TABLESAMPLE - sometimes used for testing. Shouldn't be used in production as not random. # sample_data - sometimes used for testing. Shouldn't be used in production. # Block 2022_05_12 (contains secondary pages) @@ -272,19 +61,11 @@ blocked_words = None # Block 2021_06_01 (probably forgot to update month to July for 2021) blocked_regex = (TABLESAMPLE|sample_data|2022_?05_?12|2022_?06_?09|2022_?07_?01|2021_?06_?01) -[sqlfluff:rules:L063] -# Data Types -extended_capitalisation_policy = upper -# Comma separated list of words to ignore for this rule -ignore_words = None -ignore_words_regex = None +[sqlfluff:rules:convention.count_rows] +prefer_count_0 = True -[sqlfluff:rules:L064] -# Consistent usage of preferred quotes for quoted literals +[sqlfluff:rules:convention.quoted_literals] preferred_quoted_literal_style = single_quotes -# Disabled for dialects that do not support single and double quotes for quoted literals (e.g. Postgres) -force_enable = False -[sqlfluff:rules:L066] -min_alias_length = None -max_alias_length = None +[sqlfluff:rules:references.special_chars] +additional_allowed_characters = "-." diff --git a/sql/2019/accessibility/09_01.sql b/sql/2019/accessibility/09_01.sql index 948746324f7..59b8106fb3d 100644 --- a/sql/2019/accessibility/09_01.sql +++ b/sql/2019/accessibility/09_01.sql @@ -20,8 +20,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX), UNNEST(getElements(payload)) AS element WHERE diff --git a/sql/2019/accessibility/09_02.sql b/sql/2019/accessibility/09_02.sql index bb7c28bbde6..61a2817930f 100644 --- a/sql/2019/accessibility/09_02.sql +++ b/sql/2019/accessibility/09_02.sql @@ -19,20 +19,15 @@ SELECT COUNT(DISTINCT page) AS pages, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX AS client, url AS page, getCompliantElements(payload) AS compliant_elements FROM `httparchive.pages.2019_07_01_*`) -JOIN - (SELECT client, page, REGEXP_CONTAINS(body, '(?i)role=[\'"]?main') AS has_role_main FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) -USING - (client, page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT _TABLE_SUFFIX AS client, url AS page, getCompliantElements(payload) AS compliant_elements FROM `httparchive.pages.2019_07_01_*`) +JOIN (SELECT client, page, REGEXP_CONTAINS(body, '(?i)role=[\'"]?main') AS has_role_main FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) +USING (client, page) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE 'header' IN UNNEST(compliant_elements) AND 'footer' IN UNNEST(compliant_elements) AND - 'nav' IN UNNEST(compliant_elements) AND - ('main' IN UNNEST(compliant_elements) OR has_role_main) + 'nav' IN UNNEST(compliant_elements) AND ('main' IN UNNEST(compliant_elements) OR has_role_main) GROUP BY client, total diff --git a/sql/2019/accessibility/09_03.sql b/sql/2019/accessibility/09_03.sql index 1973c7cbdde..95526644516 100644 --- a/sql/2019/accessibility/09_03.sql +++ b/sql/2019/accessibility/09_03.sql @@ -21,8 +21,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX), UNNEST(getElements(payload)) AS element GROUP BY diff --git a/sql/2019/accessibility/09_04.sql b/sql/2019/accessibility/09_04.sql index 664c9ba7395..d6edee41a91 100644 --- a/sql/2019/accessibility/09_04.sql +++ b/sql/2019/accessibility/09_04.sql @@ -18,14 +18,10 @@ SELECT COUNT(DISTINCT page) AS pages, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX AS client, url AS page, getMainCount(payload) AS main_elements FROM `httparchive.pages.2019_07_01_*`) -JOIN - (SELECT client, page, ARRAY_LENGTH(REGEXP_EXTRACT_ALL(body, '(?i)role=[\'"]?main')) AS main_roles FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) -USING - (client, page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT _TABLE_SUFFIX AS client, url AS page, getMainCount(payload) AS main_elements FROM `httparchive.pages.2019_07_01_*`) +JOIN (SELECT client, page, ARRAY_LENGTH(REGEXP_EXTRACT_ALL(body, '(?i)role=[\'"]?main')) AS main_roles FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) +USING (client, page) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) GROUP BY client, diff --git a/sql/2019/accessibility/09_05.sql b/sql/2019/accessibility/09_05.sql index c14548b4eba..ebd9b7d98d7 100644 --- a/sql/2019/accessibility/09_05.sql +++ b/sql/2019/accessibility/09_05.sql @@ -9,10 +9,8 @@ SELECT FROM `httparchive.almanac.summary_response_bodies`, UNNEST(REGEXP_EXTRACT_ALL(LOWER(body), 'role=[\'"]?([\\w-]+)')) AS role -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND firstHtml diff --git a/sql/2019/accessibility/09_06.sql b/sql/2019/accessibility/09_06.sql index 4f826f3e237..b292963a125 100644 --- a/sql/2019/accessibility/09_06.sql +++ b/sql/2019/accessibility/09_06.sql @@ -12,17 +12,16 @@ FROM ( page, id, COUNT(0) AS freq - FROM - (SELECT client, page, body FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml), + FROM (SELECT client, page, body FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml), UNNEST(REGEXP_EXTRACT_ALL(body, '(?i)\\sid=[\'"]?([^\'"\\s]+)')) AS id - JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) + JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) GROUP BY client, total, page, - id) + id +) WHERE freq > 1 GROUP BY diff --git a/sql/2019/accessibility/09_07.sql b/sql/2019/accessibility/09_07.sql index 521dd0ac5f3..ef796b1e1f4 100644 --- a/sql/2019/accessibility/09_07.sql +++ b/sql/2019/accessibility/09_07.sql @@ -24,7 +24,8 @@ FROM ( _TABLE_SUFFIX AS client, getMediaElements(payload) AS media_elements FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) WHERE 'audio' IN UNNEST(media_elements) OR 'video' IN UNNEST(media_elements) diff --git a/sql/2019/accessibility/09_08.sql b/sql/2019/accessibility/09_08.sql index 1a230542df0..d3a454eed52 100644 --- a/sql/2019/accessibility/09_08.sql +++ b/sql/2019/accessibility/09_08.sql @@ -12,4 +12,5 @@ FROM ( JSON_EXTRACT_SCALAR(report, "$.audits['html-has-lang'].score") = '1' AS has_lang, JSON_EXTRACT_SCALAR(report, "$.audits['html-valid-lang'].score") = '1' AS valid_lang FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/accessibility/09_10.sql b/sql/2019/accessibility/09_10.sql index 3630d8a4a37..fdaee1b192f 100644 --- a/sql/2019/accessibility/09_10.sql +++ b/sql/2019/accessibility/09_10.sql @@ -18,8 +18,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE getEarlyHash(payload) > 0 diff --git a/sql/2019/accessibility/09_11.sql b/sql/2019/accessibility/09_11.sql index 5c5020ee217..71d209f6a38 100644 --- a/sql/2019/accessibility/09_11.sql +++ b/sql/2019/accessibility/09_11.sql @@ -18,10 +18,8 @@ SELECT COUNT(DISTINCT page) AS pages, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT client, page, REGEXP_EXTRACT_ALL(body, '(?i)') AS headings FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT client, page, REGEXP_EXTRACT_ALL(body, '(?i)') AS headings FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND firstHtml) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE includesSkippedHeading(headings) diff --git a/sql/2019/accessibility/09_13.sql b/sql/2019/accessibility/09_13.sql index d840532cf7d..7db770e25eb 100644 --- a/sql/2019/accessibility/09_13.sql +++ b/sql/2019/accessibility/09_13.sql @@ -20,10 +20,8 @@ SELECT COUNT(DISTINCT page) AS pages, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT client, page, REGEXP_EXTRACT_ALL(LOWER(body), ']+(aria-invalid|aria-required)\\b'), REGEXP_EXTRACT_ALL(body, ']+[^-](required)\\b') )) AS attr -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/accessibility/09_17.sql b/sql/2019/accessibility/09_17.sql index 6edc6f820ae..2f7e83ca63f 100644 --- a/sql/2019/accessibility/09_17.sql +++ b/sql/2019/accessibility/09_17.sql @@ -40,28 +40,26 @@ SELECT ROUND(COUNTIF(table_info.has_table AND has_columnheader_role) * 100 / COUNTIF(table_info.has_table), 2) AS perc_with_columnheader, ROUND(COUNTIF(table_info.has_table AND has_rowheader_role) * 100 / COUNTIF(table_info.has_table), 2) AS perc_with_rowheader, ROUND(COUNTIF(table_info.has_table AND (table_info.has_th OR has_rowheader_role OR has_columnheader_role)) * 100 / COUNTIF(table_info.has_table), 2) AS perc_with_any -FROM - ( - SELECT - client, - page, - REGEXP_CONTAINS(body, r'(?i)\brole=[\'"]?(columnheader)\b') AS has_columnheader_role, - REGEXP_CONTAINS(body, r'(?i)\brole=[\'"]?(rowheader)\b') AS has_rowheader_role - FROM - `httparchive.almanac.summary_response_bodies` - WHERE - date = '2019-07-01' AND - firstHtml - ) -JOIN - ( - SELECT - _TABLE_SUFFIX AS client, - url AS page, - getTableInfo(payload) AS table_info - FROM - `httparchive.pages.2019_07_01_*` - ) +FROM ( + SELECT + client, + page, + REGEXP_CONTAINS(body, r'(?i)\brole=[\'"]?(columnheader)\b') AS has_columnheader_role, + REGEXP_CONTAINS(body, r'(?i)\brole=[\'"]?(rowheader)\b') AS has_rowheader_role + FROM + `httparchive.almanac.summary_response_bodies` + WHERE + date = '2019-07-01' AND + firstHtml +) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url AS page, + getTableInfo(payload) AS table_info + FROM + `httparchive.pages.2019_07_01_*` +) USING (client, page) GROUP BY client diff --git a/sql/2019/accessibility/09_18.sql b/sql/2019/accessibility/09_18.sql index 8e296349378..1831ead61d5 100644 --- a/sql/2019/accessibility/09_18.sql +++ b/sql/2019/accessibility/09_18.sql @@ -26,7 +26,8 @@ FROM ( _TABLE_SUFFIX AS client, getTableElements(payload) AS table_elements FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) WHERE 'table' IN UNNEST(table_elements) GROUP BY diff --git a/sql/2019/accessibility/09_19a.sql b/sql/2019/accessibility/09_19a.sql index 1e63945d2f3..18b7ffbe8b5 100644 --- a/sql/2019/accessibility/09_19a.sql +++ b/sql/2019/accessibility/09_19a.sql @@ -10,8 +10,7 @@ SELECT FROM `httparchive.almanac.summary_response_bodies`, UNNEST(REGEXP_EXTRACT_ALL(LOWER(body), '<[^>]+\\b(aria-\\w+=[\'"]?[\\w-]+)')) AS attr -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/accessibility/09_19b.sql b/sql/2019/accessibility/09_19b.sql index bb00518dc20..c52ad395db0 100644 --- a/sql/2019/accessibility/09_19b.sql +++ b/sql/2019/accessibility/09_19b.sql @@ -9,8 +9,7 @@ SELECT FROM `httparchive.almanac.summary_response_bodies`, UNNEST(REGEXP_EXTRACT_ALL(LOWER(body), '<[^>]+\\b(aria-\\w+=[\'"]?[\\w-]+)')) AS attr -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/accessibility/09_19c.sql b/sql/2019/accessibility/09_19c.sql index f07745a2545..5bce1ad8c63 100644 --- a/sql/2019/accessibility/09_19c.sql +++ b/sql/2019/accessibility/09_19c.sql @@ -1,8 +1,7 @@ #standardSQL # 09_19c: % valid ARIA attributes # Valid attributes from https://github.com/dequelabs/axe-core/blob/master/lib/commons/aria/index.js -CREATE TEMPORARY FUNCTION isValidAttribute(attr STRING) RETURNS BOOLEAN AS -(attr IN ('aria-atomic', 'aria-busy', 'aria-controls', 'aria-current', 'aria-describedby', 'aria-disabled', 'aria-dropeffect', 'aria-flowto', 'aria-grabbed', 'aria-haspopup', 'aria-hidden', 'aria-invalid', 'aria-keyshortcuts', 'aria-label', 'aria-labelledby', 'aria-live', 'aria-owns', 'aria-relevant', 'aria-roledescription')); +CREATE TEMPORARY FUNCTION isValidAttribute(attr STRING) RETURNS BOOLEAN AS (attr IN ('aria-atomic', 'aria-busy', 'aria-controls', 'aria-current', 'aria-describedby', 'aria-disabled', 'aria-dropeffect', 'aria-flowto', 'aria-grabbed', 'aria-haspopup', 'aria-hidden', 'aria-invalid', 'aria-keyshortcuts', 'aria-label', 'aria-labelledby', 'aria-live', 'aria-owns', 'aria-relevant', 'aria-roledescription')); SELECT client, diff --git a/sql/2019/accessibility/09_20.sql b/sql/2019/accessibility/09_20.sql index efb1f7cc42a..3d148aac361 100644 --- a/sql/2019/accessibility/09_20.sql +++ b/sql/2019/accessibility/09_20.sql @@ -9,7 +9,8 @@ FROM ( SELECT JSON_EXTRACT_SCALAR(report, "$.audits['aria-valid-attr-value'].score") = '1' AS is_valid FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) WHERE # Ignore pages with no aria-* attributes is_valid IS NOT NULL diff --git a/sql/2019/accessibility/09_22.sql b/sql/2019/accessibility/09_22.sql index e2b1beb6a00..722f8ef5040 100644 --- a/sql/2019/accessibility/09_22.sql +++ b/sql/2019/accessibility/09_22.sql @@ -13,7 +13,8 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client ORDER BY diff --git a/sql/2019/accessibility/09_24.sql b/sql/2019/accessibility/09_24.sql index f08ce85a881..a378720e99f 100644 --- a/sql/2019/accessibility/09_24.sql +++ b/sql/2019/accessibility/09_24.sql @@ -14,7 +14,8 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client ORDER BY diff --git a/sql/2019/accessibility/09_27.sql b/sql/2019/accessibility/09_27.sql index c6a01c27e55..ffd80f16eee 100644 --- a/sql/2019/accessibility/09_27.sql +++ b/sql/2019/accessibility/09_27.sql @@ -40,8 +40,7 @@ SELECT FROM `httparchive.pages.2019_07_01_*`, UNNEST(getTagsWithTabIndex(payload)) AS tag_type -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) GROUP BY client, tag_type, total ORDER BY occurrences DESC diff --git a/sql/2019/accessibility/09_28.sql b/sql/2019/accessibility/09_28.sql index 5c884679772..8e2bc97fb32 100644 --- a/sql/2019/accessibility/09_28.sql +++ b/sql/2019/accessibility/09_28.sql @@ -38,8 +38,7 @@ FROM ( FROM `httparchive.pages.2019_07_01_*` ) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total_pages FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total_pages FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) GROUP BY client, total_pages ORDER BY occurrences DESC diff --git a/sql/2019/accessibility/09_30b.sql b/sql/2019/accessibility/09_30b.sql index 2ab9bc782ae..339288de1ac 100644 --- a/sql/2019/accessibility/09_30b.sql +++ b/sql/2019/accessibility/09_30b.sql @@ -29,8 +29,7 @@ SELECT FROM `httparchive.pages.2019_07_01_*`, UNNEST(getAriaLabelUsage(payload)) AS uses_aria_label -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) GROUP BY client, uses_aria_label, total ORDER BY occurrences DESC diff --git a/sql/2019/accessibility/09_32b.sql b/sql/2019/accessibility/09_32b.sql index d17c18fe327..d7c42061a02 100644 --- a/sql/2019/accessibility/09_32b.sql +++ b/sql/2019/accessibility/09_32b.sql @@ -38,7 +38,6 @@ JOIN ( date = '2019-07-01' AND firstHtml ) -USING - (client, page) +USING (client, page) GROUP BY client diff --git a/sql/2019/accessibility/09_35.sql b/sql/2019/accessibility/09_35.sql index 0c9694cb438..0a8d477bdd0 100644 --- a/sql/2019/accessibility/09_35.sql +++ b/sql/2019/accessibility/09_35.sql @@ -51,7 +51,8 @@ FROM ( url AS page, includesMotionElement(payload) AS motion FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) JOIN ( SELECT client, @@ -63,9 +64,9 @@ JOIN ( date = '2019-07-01' GROUP BY client, - page) -USING - (client, page) + page +) +USING (client, page) GROUP BY client ORDER BY diff --git a/sql/2019/caching/16_04a.sql b/sql/2019/caching/16_04a.sql index 13284920b89..d525291e71e 100644 --- a/sql/2019/caching/16_04a.sql +++ b/sql/2019/caching/16_04a.sql @@ -15,17 +15,16 @@ SELECT COUNT(0) AS total_req, COUNTIF(diff < 0) AS req_too_short_cache, ROUND(COUNTIF(diff < 0) * 100 / COUNT(0), 2) AS perc_req_too_short_cache -FROM - ( - SELECT - client, - expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' AND - resp_last_modified != '' AND - expAge > 0 - ) +FROM ( + SELECT + client, + expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' AND + resp_last_modified != '' AND + expAge > 0 +) GROUP BY client diff --git a/sql/2019/caching/16_04a_3rd_party.sql b/sql/2019/caching/16_04a_3rd_party.sql index 74059586cfd..e083d612012 100644 --- a/sql/2019/caching/16_04a_3rd_party.sql +++ b/sql/2019/caching/16_04a_3rd_party.sql @@ -16,19 +16,18 @@ SELECT COUNT(0) AS total_req, COUNTIF(diff < 0) AS req_too_short_cache, ROUND(COUNTIF(diff < 0) * 100 / COUNT(0), 2) AS perc_req_too_short_cache -FROM - ( - SELECT - client, - IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, - expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' AND - resp_last_modified != '' AND - expAge > 0 - ) +FROM ( + SELECT + client, + IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, + expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' AND + resp_last_modified != '' AND + expAge > 0 +) GROUP BY client, party diff --git a/sql/2019/caching/16_04b.sql b/sql/2019/caching/16_04b.sql index a923aeef7a9..8b937392021 100644 --- a/sql/2019/caching/16_04b.sql +++ b/sql/2019/caching/16_04b.sql @@ -14,17 +14,16 @@ SELECT client, percentile, APPROX_QUANTILES(diff_in_days, 1000)[OFFSET(percentile * 10)] AS diff_in_days -FROM - ( - SELECT - client, - ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / 86400, 2) AS diff_in_days - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' AND - resp_last_modified != '' AND expAge > 0 - ), +FROM ( + SELECT + client, + ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / 86400, 2) AS diff_in_days + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' AND + resp_last_modified != '' AND expAge > 0 +), UNNEST([10, 20, 30, 40, 50, 60, 70, 80, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/caching/16_04b_3rd_party.sql b/sql/2019/caching/16_04b_3rd_party.sql index 1588ae7c8a8..866e94ebbbf 100644 --- a/sql/2019/caching/16_04b_3rd_party.sql +++ b/sql/2019/caching/16_04b_3rd_party.sql @@ -15,19 +15,18 @@ SELECT party, percentile, APPROX_QUANTILES(diff_in_days, 1000)[OFFSET(percentile * 10)] AS diff_in_days -FROM - ( - SELECT - client, - IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, - ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / 86400, 2) AS diff_in_days - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' AND - resp_last_modified != '' AND - expAge > 0 - ), +FROM ( + SELECT + client, + IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, + ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / 86400, 2) AS diff_in_days + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' AND + resp_last_modified != '' AND + expAge > 0 +), UNNEST([10, 20, 30, 40, 50, 60, 70, 80, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/caching/16_07.sql b/sql/2019/caching/16_07.sql index ea80a9a9c6f..20be3b1afcf 100644 --- a/sql/2019/caching/16_07.sql +++ b/sql/2019/caching/16_07.sql @@ -16,8 +16,7 @@ SELECT ROUND(COUNT(DISTINCT pageid) * 100 / SUM(COUNT(DISTINCT pageid)) OVER (PARTITION BY client), 2) AS pct_of_all_pages FROM `httparchive.almanac.summary_requests` -JOIN - (SELECT requestid, reqCookieLen > 0 AS uses_cookies FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01') +JOIN (SELECT requestid, reqCookieLen > 0 AS uses_cookies FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01') USING (requestid) WHERE date = '2019-07-01' diff --git a/sql/2019/caching/16_07_3rd_party.sql b/sql/2019/caching/16_07_3rd_party.sql index 68c2627fa83..2555698627d 100644 --- a/sql/2019/caching/16_07_3rd_party.sql +++ b/sql/2019/caching/16_07_3rd_party.sql @@ -25,8 +25,7 @@ FROM ( FROM `httparchive.almanac.summary_requests` ) -JOIN - (SELECT requestid, reqCookieLen > 0 AS uses_cookies FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01') +JOIN (SELECT requestid, reqCookieLen > 0 AS uses_cookies FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01') USING (requestid) WHERE date = '2019-07-01' diff --git a/sql/2019/caching/16_09_3rd_party.sql b/sql/2019/caching/16_09_3rd_party.sql index 5c96c928edc..9184d182341 100644 --- a/sql/2019/caching/16_09_3rd_party.sql +++ b/sql/2019/caching/16_09_3rd_party.sql @@ -9,17 +9,16 @@ SELECT COUNT(0) AS occurrences, ROUND(COUNT(0) * 100 / total_with_vary, 2) AS pct_of_vary, ROUND(COUNT(0) * 100 / all_requests, 2) AS pct_all_requests -FROM - ( - SELECT - client, - IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, - resp_vary - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' - ), +FROM ( + SELECT + client, + IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, + resp_vary + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' +), UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS header_name JOIN ( SELECT diff --git a/sql/2019/caching/16_10_3rd_party.sql b/sql/2019/caching/16_10_3rd_party.sql index faaf215f4cb..f6689770e8b 100644 --- a/sql/2019/caching/16_10_3rd_party.sql +++ b/sql/2019/caching/16_10_3rd_party.sql @@ -9,17 +9,16 @@ SELECT COUNT(0) AS occurrences, ROUND(COUNT(0) * 100 / total_using_control, 2) AS pct_of_control, ROUND(COUNT(0) * 100 / all_requests, 2) AS pct_all_requests -FROM - ( - SELECT - client, - IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, - resp_cache_control - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' - ), +FROM ( + SELECT + client, + IF(STRPOS(NET.HOST(url), REGEXP_EXTRACT(NET.REG_DOMAIN(page), r'([\w-]+)')) > 0, 1, 3) AS party, + resp_cache_control + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' +), UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive JOIN ( SELECT diff --git a/sql/2019/cdn/17_01.sql b/sql/2019/cdn/17_01.sql index 619550b17f3..26564f76f68 100644 --- a/sql/2019/cdn/17_01.sql +++ b/sql/2019/cdn/17_01.sql @@ -15,15 +15,14 @@ SELECT ROUND((COUNTIF(NOT firstHtml AND NOT sameHost AND NOT sameDomain) * 100 / (0.001 + SUM(COUNTIF(NOT firstHtml AND NOT sameHost AND NOT sameDomain)) OVER (PARTITION BY client))), 2) AS thirdPartyHitsPct, SUM(COUNT(0)) OVER (PARTITION BY client) AS totalHits, ROUND((COUNT(0) * 100 / (0.001 + SUM(COUNT(0)) OVER (PARTITION BY client))), 2) AS hitsPct -FROM - ( - SELECT - client, page, url, firstHtml, respBodySize, - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry - IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, - IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain - FROM `httparchive.almanac.requests3` - ) +FROM ( + SELECT + client, page, url, firstHtml, respBodySize, + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry + IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, + IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain + FROM `httparchive.almanac.requests3` +) GROUP BY client, cdn diff --git a/sql/2019/cdn/17_02.sql b/sql/2019/cdn/17_02.sql index e37bc28f1e0..7b81a11a75b 100644 --- a/sql/2019/cdn/17_02.sql +++ b/sql/2019/cdn/17_02.sql @@ -17,16 +17,15 @@ SELECT ROUND((COUNTIF(cdn != 'ORIGIN') * 100) / COUNT(0), 2) AS hitsPct, SUM(CASE WHEN cdn != 'ORIGIN' THEN respBodySize ELSE 0 END) AS cdnBytes, ROUND((SUM(CASE WHEN _cdn_provider != '' THEN respBodySize ELSE 0 END) * 100) / SUM(respBodySize), 2) AS bytesPct -FROM - ( - SELECT - client, page, url, firstHtml, respBodySize, - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, - CASE WHEN NET.HOST(url) = NET.HOST(page) THEN TRUE ELSE FALSE END AS sameHost, - CASE WHEN NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page) THEN TRUE ELSE FALSE END AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain - FROM `httparchive.almanac.requests3` +FROM ( + SELECT + client, page, url, firstHtml, respBodySize, + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, + CASE WHEN NET.HOST(url) = NET.HOST(page) THEN TRUE ELSE FALSE END AS sameHost, + CASE WHEN NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page) THEN TRUE ELSE FALSE END AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain + FROM `httparchive.almanac.requests3` --GROUP BY client, pageid, requestid, page, url, firstHtml, _cdn_provider, respBodySize - ) +) GROUP BY client, hits diff --git a/sql/2019/cdn/17_02f.sql b/sql/2019/cdn/17_02f.sql index e4a9cac5955..16f40076618 100644 --- a/sql/2019/cdn/17_02f.sql +++ b/sql/2019/cdn/17_02f.sql @@ -6,28 +6,30 @@ SELECT COUNTIF(jscdnHits > 0) AS hasJSCDNHits, COUNT(0) AS hits, ROUND(100 * COUNTIF(jscdnHits > 0) / COUNT(0), 2) AS pct -FROM - ( - SELECT - client, - page, - COUNTIF( - NET.HOST(url) IN ('unpkg.com', - 'www.jsdelivr.net', - 'cdnjs.cloudflare.com', - 'ajax.aspnetcdn.com', - 'ajax.googleapis.com', - 'stackpath.bootstrapcdn.com', - 'maxcdn.bootstrapcdn.com', - 'use.fontawesome.com', - 'code.jquery.com', - 'fonts.googleapis.com') - ) AS jscdnHits - FROM `httparchive.almanac.requests3` - GROUP BY - client, - page - ) +FROM ( + SELECT + client, + page, + COUNTIF( + NET.HOST( + url) IN ( + 'unpkg.com', + 'www.jsdelivr.net', + 'cdnjs.cloudflare.com', + 'ajax.aspnetcdn.com', + 'ajax.googleapis.com', + 'stackpath.bootstrapcdn.com', + 'maxcdn.bootstrapcdn.com', + 'use.fontawesome.com', + 'code.jquery.com', + 'fonts.googleapis.com' + ) + ) AS jscdnHits + FROM `httparchive.almanac.requests3` + GROUP BY + client, + page +) GROUP BY client ORDER BY diff --git a/sql/2019/cdn/17_02g.sql b/sql/2019/cdn/17_02g.sql index df2e8e8ec98..86a4c25b4b3 100644 --- a/sql/2019/cdn/17_02g.sql +++ b/sql/2019/cdn/17_02g.sql @@ -3,18 +3,17 @@ SELECT *, ROUND(100 * pageUseCount / totalPagesCount, 2) AS Pct # doing the Pct calc causes memory problems with bigquery -FROM - ( - SELECT - client, - IF(respBodySize > 0 AND REGEXP_CONTAINS(resp_content_type, r'javascript|css|font'), NET.HOST(url), NULL) AS host, - COUNT(DISTINCT page) AS pageUseCount, - SUM(COUNTIF(firstHtml)) OVER (PARTITION BY client) AS totalPagesCount - FROM `httparchive.almanac.requests3` - GROUP BY - client, - host - ) +FROM ( + SELECT + client, + IF(respBodySize > 0 AND REGEXP_CONTAINS(resp_content_type, r'javascript|css|font'), NET.HOST(url), NULL) AS host, + COUNT(DISTINCT page) AS pageUseCount, + SUM(COUNTIF(firstHtml)) OVER (PARTITION BY client) AS totalPagesCount + FROM `httparchive.almanac.requests3` + GROUP BY + client, + host +) WHERE host IS NOT NULL AND pageUseCount > 1000 ORDER BY client DESC, diff --git a/sql/2019/cdn/17_02h.sql b/sql/2019/cdn/17_02h.sql index 5df1f85bd58..3d2bc0e841e 100644 --- a/sql/2019/cdn/17_02h.sql +++ b/sql/2019/cdn/17_02h.sql @@ -4,27 +4,30 @@ SELECT *, ROUND(100 * pageUseCount / totalPagesCount, 2) AS Pct -FROM - ( - SELECT - client, - IF(NET.HOST(url) IN ('unpkg.com', - 'cdn.jsdelivr.net', - 'cdnjs.cloudflare.com', - 'ajax.aspnetcdn.com', - 'ajax.googleapis.com', - 'stackpath.bootstrapcdn.com', - 'maxcdn.bootstrapcdn.com', - 'use.fontawesome.com', - 'code.jquery.com', - 'fonts.googleapis.com'), NET.HOST(url), 'OTHER') AS jsCDN, - COUNT(DISTINCT page) AS pageUseCount, - SUM(COUNTIF(firstHtml)) OVER (PARTITION BY client) AS totalPagesCount - FROM `httparchive.almanac.requests3` - GROUP BY - client, - jsCDN - ) +FROM ( + SELECT + client, + IF(NET.HOST( + url) IN ( + 'unpkg.com', + 'cdn.jsdelivr.net', + 'cdnjs.cloudflare.com', + 'ajax.aspnetcdn.com', + 'ajax.googleapis.com', + 'stackpath.bootstrapcdn.com', + 'maxcdn.bootstrapcdn.com', + 'use.fontawesome.com', + 'code.jquery.com', + 'fonts.googleapis.com' + ), NET.HOST(url + ), 'OTHER') AS jsCDN, + COUNT(DISTINCT page) AS pageUseCount, + SUM(COUNTIF(firstHtml)) OVER (PARTITION BY client) AS totalPagesCount + FROM `httparchive.almanac.requests3` + GROUP BY + client, + jsCDN +) ORDER BY client DESC, pageUseCount DESC diff --git a/sql/2019/cdn/17_03.sql b/sql/2019/cdn/17_03.sql index 15ba36ad0bc..a37ff2bfa20 100644 --- a/sql/2019/cdn/17_03.sql +++ b/sql/2019/cdn/17_03.sql @@ -18,7 +18,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2019-07-01' AND - _cdn_provider != '') + _cdn_provider != '' +) GROUP BY client, cdn diff --git a/sql/2019/cdn/17_04.sql b/sql/2019/cdn/17_04.sql index 06ab7d3b91e..8d99a7f7dca 100644 --- a/sql/2019/cdn/17_04.sql +++ b/sql/2019/cdn/17_04.sql @@ -18,7 +18,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2019-07-01' AND - _cdn_provider != '') + _cdn_provider != '' +) GROUP BY client, cdn diff --git a/sql/2019/cdn/17_12.sql b/sql/2019/cdn/17_12.sql index 84ab0054993..9b59c79b1f3 100644 --- a/sql/2019/cdn/17_12.sql +++ b/sql/2019/cdn/17_12.sql @@ -11,16 +11,16 @@ SELECT APPROX_QUANTILES(tlstime, 1000)[OFFSET(750)] AS p75, APPROX_QUANTILES(tlstime, 1000)[OFFSET(900)] AS p90 FROM ( - SELECT - client, requestid, page, url, firstHtml, - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry - CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, - ARRAY_LENGTH(split(JSON_EXTRACT(payload, '$._securityDetails.sanList'), '')) AS sanLength, - IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, - IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain - FROM `httparchive.almanac.requests` - WHERE date = '2019-07-01' - GROUP BY client, requestid, page, url, firstHtml, cdn, tlstime, sanLength + SELECT + client, requestid, page, url, firstHtml, + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry + CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, + ARRAY_LENGTH(split(JSON_EXTRACT(payload, '$._securityDetails.sanList'), '')) AS sanLength, + IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, + IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain + FROM `httparchive.almanac.requests` + WHERE date = '2019-07-01' + GROUP BY client, requestid, page, url, firstHtml, cdn, tlstime, sanLength ) WHERE tlstime != -1 AND diff --git a/sql/2019/cdn/17_13.sql b/sql/2019/cdn/17_13.sql index dc207a59f74..e48eef88a1f 100644 --- a/sql/2019/cdn/17_13.sql +++ b/sql/2019/cdn/17_13.sql @@ -11,17 +11,17 @@ SELECT APPROX_QUANTILES(sanLength, 1000)[OFFSET(750)] AS p75, APPROX_QUANTILES(sanLength, 1000)[OFFSET(900)] AS p90 FROM ( - SELECT - client, requestid, page, url, firstHtml, - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry - CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, - ARRAY_LENGTH(split(JSON_EXTRACT(payload, '$._securityDetails.sanList'), '')) AS sanLength, - -- length(FROM_BASE64(REPLACE(REGEXP_REPLACE(JSON_EXTRACT_SCALAR(payload, '$._certificates[0]'), ""-----(BEGIN|END) CERTIFICATE-----"", """"), ""\n"", """"))) AS tlscertsize, - IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, - IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain - FROM `httparchive.almanac.requests` - WHERE date = '2019-07-01' - GROUP BY client, requestid, page, url, firstHtml, cdn, tlstime, sanLength + SELECT + client, requestid, page, url, firstHtml, + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, # sometimes _cdn provider detection includes multiple entries. we bias for the DNS detected entry which is the first entry + CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, + ARRAY_LENGTH(split(JSON_EXTRACT(payload, '$._securityDetails.sanList'), '')) AS sanLength, + -- length(FROM_BASE64(REPLACE(REGEXP_REPLACE(JSON_EXTRACT_SCALAR(payload, '$._certificates[0]'), ""-----(BEGIN|END) CERTIFICATE-----"", """"), ""\n"", """"))) AS tlscertsize, + IF(NET.HOST(url) = NET.HOST(page), TRUE, FALSE) AS sameHost, + IF(NET.HOST(url) = NET.HOST(page) OR NET.REG_DOMAIN(url) = NET.REG_DOMAIN(page), TRUE, FALSE) AS sameDomain # if toplevel reg_domain will return NULL so we group this as sameDomain + FROM `httparchive.almanac.requests` + WHERE date = '2019-07-01' + GROUP BY client, requestid, page, url, firstHtml, cdn, tlstime, sanLength ) WHERE tlstime != -1 AND diff --git a/sql/2019/cdn/17_17.sql b/sql/2019/cdn/17_17.sql index 7f3fc88307c..7f45e79c8e4 100644 --- a/sql/2019/cdn/17_17.sql +++ b/sql/2019/cdn/17_17.sql @@ -17,6 +17,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/cdn/17_18.sql b/sql/2019/cdn/17_18.sql index 72fd505bf17..c3bbe4b2a42 100644 --- a/sql/2019/cdn/17_18.sql +++ b/sql/2019/cdn/17_18.sql @@ -14,7 +14,8 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2019-07-01') + date = '2019-07-01' +) GROUP BY client, cdn diff --git a/sql/2019/cdn/17_19.sql b/sql/2019/cdn/17_19.sql index 5c0400173bf..b8b9146e4db 100644 --- a/sql/2019/cdn/17_19.sql +++ b/sql/2019/cdn/17_19.sql @@ -15,42 +15,41 @@ SELECT ROUND(100 * COUNTIF(IFNULL(a.protocol, b.protocol) NOT IN ('HTTP/0.9', 'HTTP/1.0', 'HTTP/1.1', 'HTTP/2')) / COUNT(0), 2) AS http_other_pct, ROUND(100 * COUNTIF(isSecure OR IFNULL(a.protocol, b.protocol) = 'HTTP/2') / COUNT(0), 2) AS tls_pct, COUNT(0) AS total -FROM - ( - SELECT - client, page, url, firstHtml, - # WPT is inconsistent with protocol population. - upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/')))) AS protocol, - JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, +FROM ( + SELECT + client, page, url, firstHtml, + # WPT is inconsistent with protocol population. + upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/')))) AS protocol, + JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, - # WPT joins CDN detection but we bias to the DNS detection which is the first entry - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, - CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, + # WPT joins CDN detection but we bias to the DNS detection which is the first entry + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, + CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, - # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 - IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, - CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket - FROM - `httparchive.almanac.requests3` - WHERE - # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them - resp_location = '' OR resp_location IS NULL - ) a -LEFT JOIN - ( - SELECT - client, page, - CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket, - ANY_VALUE(upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))))) AS protocol, - ANY_VALUE(JSON_EXTRACT_SCALAR(payload, '$._tls_version')) AS tlsVersion - FROM - `httparchive.almanac.requests3` - WHERE - JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL AND - IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))) IS NOT NULL AND - jSON_EXTRACT(payload, '$._socket') IS NOT NULL - GROUP BY client, page, socket - ) b ON (a.client = b.client AND a.page = b.page AND a.socket = b.socket) + # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 + IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, + CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket + FROM + `httparchive.almanac.requests3` + WHERE + # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them + resp_location = '' OR resp_location IS NULL +) a +LEFT JOIN ( + SELECT + client, page, + CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket, + ANY_VALUE(upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))))) AS protocol, + ANY_VALUE(JSON_EXTRACT_SCALAR(payload, '$._tls_version')) AS tlsVersion + FROM + `httparchive.almanac.requests3` + WHERE + JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL AND + IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))) IS NOT NULL AND + jSON_EXTRACT(payload, '$._socket') IS NOT NULL + GROUP BY client, page, socket +) b +ON (a.client = b.client AND a.page = b.page AND a.socket = b.socket) GROUP BY client, diff --git a/sql/2019/cdn/17_19b.sql b/sql/2019/cdn/17_19b.sql index 79720fd02d2..70c0ff64e1d 100644 --- a/sql/2019/cdn/17_19b.sql +++ b/sql/2019/cdn/17_19b.sql @@ -11,27 +11,26 @@ SELECT ROUND(100 * COUNTIF(IFNULL(a.tlsVersion, b.tlsVersion) = 'TLS 1.2') / COUNT(0), 2) AS tls12_pct, ROUND(100 * COUNTIF(IFNULL(a.tlsVersion, b.tlsVersion) = 'TLS 1.3') / COUNT(0), 2) AS tls13_pct, COUNT(0) AS total -FROM - ( - SELECT - client, page, url, firstHtml, - # WPT is inconsistent with protocol population. - upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('TLS ', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'TLS ')))) AS protocol, - JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, +FROM ( + SELECT + client, page, url, firstHtml, + # WPT is inconsistent with protocol population. + upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('TLS ', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'TLS ')))) AS protocol, + JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, - # WPT joins CDN detection but we bias to the DNS detection which is the first entry - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, - CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, + # WPT joins CDN detection but we bias to the DNS detection which is the first entry + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, + CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, - # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 - IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, - CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket - FROM - `httparchive.almanac.requests3` - WHERE - # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them - resp_location = '' OR resp_location IS NULL - ) a + # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 + IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, + CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket + FROM + `httparchive.almanac.requests3` + WHERE + # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them + resp_location = '' OR resp_location IS NULL +) a LEFT JOIN ( SELECT client, page, diff --git a/sql/2019/cdn/17_19c.sql b/sql/2019/cdn/17_19c.sql index fdfae2e0e7c..8ed95444066 100644 --- a/sql/2019/cdn/17_19c.sql +++ b/sql/2019/cdn/17_19c.sql @@ -4,41 +4,40 @@ SELECT a.client, firstHtml, IFNULL(a.protocol, b.protocol) AS protocol, IFNULL(a.tlsVersion, b.tlsVersion) AS tlsVersion, isSecure, COUNT(0) AS total -FROM - ( - SELECT - client, page, url, firstHtml, - # WPT is inconsistent with protocol population. - upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/')))) AS protocol, - JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, +FROM ( + SELECT + client, page, url, firstHtml, + # WPT is inconsistent with protocol population. + upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/')))) AS protocol, + JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tlsVersion, - # WPT joins CDN detection but we bias to the DNS detection which is the first entry - IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, - CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, + # WPT joins CDN detection but we bias to the DNS detection which is the first entry + IFNULL(NULLIF(REGEXP_EXTRACT(_cdn_provider, r'^([^,]*).*'), ''), 'ORIGIN') AS cdn, + CAST(JSON_EXTRACT(payload, '$.timings.ssl') AS INT64) AS tlstime, - # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 - IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, - CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket - FROM - `httparchive.almanac.requests3` - WHERE - # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them - resp_location = '' OR resp_location IS NULL - ) a -LEFT JOIN - ( - SELECT - client, page, - CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket, - ANY_VALUE(upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))))) AS protocol, - ANY_VALUE(JSON_EXTRACT_SCALAR(payload, '$._tls_version')) AS tlsVersion - FROM `httparchive.almanac.requests3` - WHERE - JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL AND - IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))) IS NOT NULL AND - jSON_EXTRACT(payload, '$._socket') IS NOT NULL - GROUP BY client, page, socket - ) b ON (a.client = b.client AND a.page = b.page AND a.socket = b.socket) + # isSecure reports what the browser thought it was going to use, but it can get upgraded with STS OR UpgradeInsecure: 1 + IF(STARTS_WITH(url, 'https') OR JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL OR CAST(JSON_EXTRACT(payload, '$._is_secure') AS INT64) = 1, TRUE, FALSE) AS isSecure, + CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket + FROM + `httparchive.almanac.requests3` + WHERE + # WPT changes the response fields based on a redirect (url becomes the Location path instead of the original) causing insonsistencies in the counts, so we ignore them + resp_location = '' OR resp_location IS NULL +) a +LEFT JOIN ( + SELECT + client, page, + CAST(jSON_EXTRACT(payload, '$._socket') AS INT64) AS socket, + ANY_VALUE(upper(IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))))) AS protocol, + ANY_VALUE(JSON_EXTRACT_SCALAR(payload, '$._tls_version')) AS tlsVersion + FROM `httparchive.almanac.requests3` + WHERE + JSON_EXTRACT_SCALAR(payload, '$._tls_version') IS NOT NULL AND + IFNULL(JSON_EXTRACT_SCALAR(payload, '$._protocol'), IFNULL(NULLIF(JSON_EXTRACT_SCALAR(payload, '$._tls_next_proto'), 'unknown'), NULLIF(concat('HTTP/', JSON_EXTRACT_SCALAR(payload, '$.response.httpVersion')), 'HTTP/'))) IS NOT NULL AND + jSON_EXTRACT(payload, '$._socket') IS NOT NULL + GROUP BY client, page, socket +) b +ON (a.client = b.client AND a.page = b.page AND a.socket = b.socket) GROUP BY client, diff --git a/sql/2019/cms/14_01.sql b/sql/2019/cms/14_01.sql index aadb4a47d18..9a7f4700292 100644 --- a/sql/2019/cms/14_01.sql +++ b/sql/2019/cms/14_01.sql @@ -8,10 +8,8 @@ SELECT ROUND(COUNT(0) * 100 / total, 2) AS pct FROM `httparchive.technologies.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'CMS' GROUP BY diff --git a/sql/2019/cms/14_01b.sql b/sql/2019/cms/14_01b.sql index 168845f4a04..2db75afe6ea 100644 --- a/sql/2019/cms/14_01b.sql +++ b/sql/2019/cms/14_01b.sql @@ -7,8 +7,7 @@ SELECT ROUND(COUNT(0) * 100 / total, 2) AS pct FROM `httparchive.technologies.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE category = 'CMS' diff --git a/sql/2019/cms/14_02.sql b/sql/2019/cms/14_02.sql index ad6f78b407d..05d7c02b877 100644 --- a/sql/2019/cms/14_02.sql +++ b/sql/2019/cms/14_02.sql @@ -15,11 +15,10 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) -JOIN - (SELECT _TABLE_SUFFIX AS client, url FROM `httparchive.technologies.2019_07_01_*` WHERE app = 'WordPress') -USING - (client, url) + firstHtml +) +JOIN (SELECT _TABLE_SUFFIX AS client, url FROM `httparchive.technologies.2019_07_01_*` WHERE app = 'WordPress') +USING (client, url) GROUP BY client, amp_plugin_version diff --git a/sql/2019/cms/14_03.sql b/sql/2019/cms/14_03.sql index e766ce9daa2..476e8cbe275 100644 --- a/sql/2019/cms/14_03.sql +++ b/sql/2019/cms/14_03.sql @@ -15,11 +15,10 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) -INNER JOIN - (SELECT _TABLE_SUFFIX AS client, url FROM `httparchive.technologies.2019_07_01_*` WHERE app = 'WordPress') -USING - (client, url) + firstHtml +) +INNER JOIN (SELECT _TABLE_SUFFIX AS client, url FROM `httparchive.technologies.2019_07_01_*` WHERE app = 'WordPress') +USING (client, url) GROUP BY client, amp_plugin_mode diff --git a/sql/2019/cms/14_10.sql b/sql/2019/cms/14_10.sql index 0455d926025..89743e5d726 100644 --- a/sql/2019/cms/14_10.sql +++ b/sql/2019/cms/14_10.sql @@ -22,7 +22,8 @@ JOIN ( GROUP BY client, url, - app) + app +) ON CONCAT(origin, '/') = url AND IF(form_factor.name = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2019/cms/14_11.sql b/sql/2019/cms/14_11.sql index 0e9cb3db393..9b51728f491 100644 --- a/sql/2019/cms/14_11.sql +++ b/sql/2019/cms/14_11.sql @@ -22,7 +22,8 @@ JOIN ( GROUP BY client, url, - app) + app +) ON CONCAT(origin, '/') = url AND IF(form_factor.name = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2019/cms/14_13b.sql b/sql/2019/cms/14_13b.sql index 0186d19ac83..68955dc3f8b 100644 --- a/sql/2019/cms/14_13b.sql +++ b/sql/2019/cms/14_13b.sql @@ -8,8 +8,7 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct FROM `httparchive.almanac.summary_requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` WHERE category = 'CMS') +JOIN (SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` WHERE category = 'CMS') USING (client, page) WHERE date = '2019-07-01' AND diff --git a/sql/2019/cms/14_15.sql b/sql/2019/cms/14_15.sql index 1c408374125..6dbb55ba95e 100644 --- a/sql/2019/cms/14_15.sql +++ b/sql/2019/cms/14_15.sql @@ -15,15 +15,16 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') - USING - (client, page) + WHERE category = 'CMS' + ) + USING (client, page) WHERE date = '2019-07-01' AND NET.HOST(url) IN (SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2019-07-01' AND category != 'hosting') GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/cms/14_15b.sql b/sql/2019/cms/14_15b.sql index 6caceb565c0..a161fdc74b9 100644 --- a/sql/2019/cms/14_15b.sql +++ b/sql/2019/cms/14_15b.sql @@ -18,16 +18,17 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, app FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') - USING - (client, page) + WHERE category = 'CMS' + ) + USING (client, page) WHERE date = '2019-07-01' AND NET.HOST(url) IN (SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2019-07-01') GROUP BY client, app, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY client, diff --git a/sql/2019/cms/14_15c.sql b/sql/2019/cms/14_15c.sql index d40355d9b5d..99ac935d035 100644 --- a/sql/2019/cms/14_15c.sql +++ b/sql/2019/cms/14_15c.sql @@ -17,9 +17,9 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') - USING - (client, page) + WHERE category = 'CMS' + ) + USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -30,7 +30,8 @@ FROM ( GROUP BY client, category, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/cms/14_15d.sql b/sql/2019/cms/14_15d.sql index 8d2ba58f278..5a6def97118 100644 --- a/sql/2019/cms/14_15d.sql +++ b/sql/2019/cms/14_15d.sql @@ -15,14 +15,15 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') - USING - (client, page) + WHERE category = 'CMS' + ) + USING (client, page) WHERE date = '2019-07-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/cms/14_15e.sql b/sql/2019/cms/14_15e.sql index 08fdd90a006..35c42b5c5cb 100644 --- a/sql/2019/cms/14_15e.sql +++ b/sql/2019/cms/14_15e.sql @@ -17,15 +17,16 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') - USING - (client, page) + WHERE category = 'CMS' + ) + USING (client, page) WHERE date = '2019-07-01' GROUP BY client, type, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/cms/14_16.sql b/sql/2019/cms/14_16.sql index 7ae3e269560..234b68aaf53 100644 --- a/sql/2019/cms/14_16.sql +++ b/sql/2019/cms/14_16.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') -USING - (client, page) + WHERE category = 'CMS' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -24,9 +24,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE r.date = '2019-07-01' AND tp.date = '2019-07-01' AND diff --git a/sql/2019/cms/14_17.sql b/sql/2019/cms/14_17.sql index b2a44e00e55..9af3f5f7ddf 100644 --- a/sql/2019/cms/14_17.sql +++ b/sql/2019/cms/14_17.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'CMS') -USING - (client, page) + WHERE category = 'CMS' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -24,9 +24,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE r.date = '2019-07-01' AND tp.date = '2019-07-01' AND diff --git a/sql/2019/cms/14_18.sql b/sql/2019/cms/14_18.sql index 2f1fd7609df..5f4d76e10c6 100644 --- a/sql/2019/cms/14_18.sql +++ b/sql/2019/cms/14_18.sql @@ -6,8 +6,7 @@ SELECT SUM(COUNT(0)) OVER () AS total, ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (), 2) AS pct FROM - `httparchive.technologies.2019_07_01_mobile`, - (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`) + `httparchive.technologies.2019_07_01_mobile`, (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`) JOIN `httparchive.lighthouse.2019_07_01_mobile` USING (url) diff --git a/sql/2019/cms/14_18b.sql b/sql/2019/cms/14_18b.sql index 07b569dce9a..f9992f83f80 100644 --- a/sql/2019/cms/14_18b.sql +++ b/sql/2019/cms/14_18b.sql @@ -12,7 +12,8 @@ JOIN ( url, JSON_EXTRACT_SCALAR(report, '$.audits.is-crawlable.score') AS crawlable FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) USING (url) WHERE category = 'CMS' AND diff --git a/sql/2019/compression/15_01.sql b/sql/2019/compression/15_01.sql index 7e0f7667ab6..12908bfc437 100644 --- a/sql/2019/compression/15_01.sql +++ b/sql/2019/compression/15_01.sql @@ -1,5 +1,5 @@ #standardSQL - # 15_01: What compression formats are being used (gzip, brotli, etc) +# 15_01: What compression formats are being used (gzip, brotli, etc) SELECT _TABLE_SUFFIX AS client, resp_content_encoding, diff --git a/sql/2019/compression/15_04.sql b/sql/2019/compression/15_04.sql index abcfc37b9a5..c2016b0de05 100644 --- a/sql/2019/compression/15_04.sql +++ b/sql/2019/compression/15_04.sql @@ -1,5 +1,5 @@ #standardSQL - # 15_04: Compression by Content type +# 15_04: Compression by Content type SELECT _TABLE_SUFFIX AS client, mimeType, @@ -7,8 +7,8 @@ SELECT SUM(IF(resp_content_encoding = 'gzip', 1, 0)) AS gzip, SUM(IF(resp_content_encoding = 'br', 1, 0)) AS brotli, SUM(IF(resp_content_encoding = 'deflate', 1, 0)) AS deflate, - SUM(IF(resp_content_encoding IN('gzip', 'deflate', 'br'), 0, 1)) AS no_text_compression, - ROUND(SUM(IF(resp_content_encoding IN('gzip', 'deflate', 'br'), 1, 0)) / COUNT(0), 2) AS pct_compressed, + SUM(IF(resp_content_encoding IN ('gzip', 'deflate', 'br'), 0, 1)) AS no_text_compression, + ROUND(SUM(IF(resp_content_encoding IN ('gzip', 'deflate', 'br'), 1, 0)) / COUNT(0), 2) AS pct_compressed, ROUND(SUM(IF(resp_content_encoding = 'br', 1, 0)) / COUNT(0), 2) AS pct_compressed_brotli FROM `httparchive.summary_requests.2019_07_01_*` diff --git a/sql/2019/css/02_01.sql b/sql/2019/css/02_01.sql index 7b192fc6433..2c94717a338 100644 --- a/sql/2019/css/02_01.sql +++ b/sql/2019/css/02_01.sql @@ -36,11 +36,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_02.sql b/sql/2019/css/02_02.sql index 0f4d3accca9..0edf2544100 100644 --- a/sql/2019/css/02_02.sql +++ b/sql/2019/css/02_02.sql @@ -19,11 +19,10 @@ FROM ( feature IN ('CSSAtRuleImport', 'CSSAtRuleSupports') GROUP BY client, - feature) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + feature +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_03.sql b/sql/2019/css/02_03.sql index f4125909bfe..86f63bb9972 100644 --- a/sql/2019/css/02_03.sql +++ b/sql/2019/css/02_03.sql @@ -36,11 +36,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_04.sql b/sql/2019/css/02_04.sql index 0e928a9c8bf..bb9421a41c4 100644 --- a/sql/2019/css/02_04.sql +++ b/sql/2019/css/02_04.sql @@ -36,11 +36,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_05.sql b/sql/2019/css/02_05.sql index fce6f9d8900..97701e04d47 100644 --- a/sql/2019/css/02_05.sql +++ b/sql/2019/css/02_05.sql @@ -46,11 +46,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_06.sql b/sql/2019/css/02_06.sql index b5637d2be72..e6ae084c4c7 100644 --- a/sql/2019/css/02_06.sql +++ b/sql/2019/css/02_06.sql @@ -75,14 +75,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_07.sql b/sql/2019/css/02_07.sql index aef9ab3675e..7a3f460ea93 100644 --- a/sql/2019/css/02_07.sql +++ b/sql/2019/css/02_07.sql @@ -61,10 +61,9 @@ FROM ( date = '2019-07-01' GROUP BY client, - unit) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + unit +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) ORDER BY freq / total DESC diff --git a/sql/2019/css/02_08.sql b/sql/2019/css/02_08.sql index fe8bd3f5ed0..925eea6dd7f 100644 --- a/sql/2019/css/02_08.sql +++ b/sql/2019/css/02_08.sql @@ -49,14 +49,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_08b.sql b/sql/2019/css/02_08b.sql index 3646fb02b01..3535c4d72b8 100644 --- a/sql/2019/css/02_08b.sql +++ b/sql/2019/css/02_08b.sql @@ -49,6 +49,7 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' +) GROUP BY client diff --git a/sql/2019/css/02_10.sql b/sql/2019/css/02_10.sql index d9a68c29591..39ae6d8f67f 100644 --- a/sql/2019/css/02_10.sql +++ b/sql/2019/css/02_10.sql @@ -11,12 +11,15 @@ FROM JOIN ( SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) + GROUP BY _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE - app IN ('animate.css', 'Ant Design', 'Bootstrap', 'Bulma', 'Clarity', 'ZURB Foundation', + app IN ( + 'animate.css', 'Ant Design', 'Bootstrap', 'Bulma', 'Clarity', 'ZURB Foundation', 'Angular Material', 'Material Design Lite', 'Materialize CSS', - 'Milligram', 'Pure CSS', 'Semantic-ui', 'Shapecss', 'tailwindcss', 'UIKit') + 'Milligram', 'Pure CSS', 'Semantic-ui', 'Shapecss', 'tailwindcss', 'UIKit' + ) GROUP BY client, total, diff --git a/sql/2019/css/02_11.sql b/sql/2019/css/02_11.sql index 1a9c330eb31..20861421d2d 100644 --- a/sql/2019/css/02_11.sql +++ b/sql/2019/css/02_11.sql @@ -11,10 +11,8 @@ SELECT COUNT(DISTINCT page) AS freq, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT client, page, body FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND type = 'css') -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +FROM (SELECT client, page, body FROM `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND type = 'css') +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) USING (client), # Search for reset util fingerprints in stylesheet comments. UNNEST(REGEXP_EXTRACT_ALL(body, '(?i)(normalize\\.css|pure\\-css|http://meyerweb\\.com/eric/tools/css/reset/)')) AS util diff --git a/sql/2019/css/02_12.sql b/sql/2019/css/02_12.sql index 71bd2972825..8c2e6ea1c74 100644 --- a/sql/2019/css/02_12.sql +++ b/sql/2019/css/02_12.sql @@ -49,10 +49,9 @@ FROM ( date = '2019-07-01' GROUP BY client, - direction) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + direction +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) ORDER BY freq / total DESC diff --git a/sql/2019/css/02_13.sql b/sql/2019/css/02_13.sql index 77708eafe18..59dd53160ad 100644 --- a/sql/2019/css/02_13.sql +++ b/sql/2019/css/02_13.sql @@ -6,10 +6,8 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.blink_features.features` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) WHERE yyyymmdd = '20190701' AND feature = 'CSSFlexibleBox' diff --git a/sql/2019/css/02_14.sql b/sql/2019/css/02_14.sql index 5bc4ee7531f..c732a5c16dd 100644 --- a/sql/2019/css/02_14.sql +++ b/sql/2019/css/02_14.sql @@ -6,10 +6,8 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.blink_features.features` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) WHERE yyyymmdd = '20190701' AND feature = 'CSSGridLayout' diff --git a/sql/2019/css/02_15b.sql b/sql/2019/css/02_15b.sql index cc539869a49..849cff1a901 100644 --- a/sql/2019/css/02_15b.sql +++ b/sql/2019/css/02_15b.sql @@ -45,14 +45,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_16.sql b/sql/2019/css/02_16.sql index 8810f679614..4b6a34d57be 100644 --- a/sql/2019/css/02_16.sql +++ b/sql/2019/css/02_16.sql @@ -45,14 +45,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_17.sql b/sql/2019/css/02_17.sql index 36d41a22ff6..8903636b16b 100644 --- a/sql/2019/css/02_17.sql +++ b/sql/2019/css/02_17.sql @@ -47,14 +47,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_18.sql b/sql/2019/css/02_18.sql index 168e8fbadc6..59db18eb03c 100644 --- a/sql/2019/css/02_18.sql +++ b/sql/2019/css/02_18.sql @@ -51,14 +51,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_20.sql b/sql/2019/css/02_20.sql index 98692f8245d..5a77f5cc402 100644 --- a/sql/2019/css/02_20.sql +++ b/sql/2019/css/02_20.sql @@ -13,7 +13,8 @@ FROM ( FROM `httparchive.summary_requests.2019_07_01_*` WHERE - type = 'css') + type = 'css' +) GROUP BY client, filename diff --git a/sql/2019/css/02_32.sql b/sql/2019/css/02_32.sql index d7293df438f..d6029f26c68 100644 --- a/sql/2019/css/02_32.sql +++ b/sql/2019/css/02_32.sql @@ -39,6 +39,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_33.sql b/sql/2019/css/02_33.sql index d9de30ec12f..5a2ec77aa5a 100644 --- a/sql/2019/css/02_33.sql +++ b/sql/2019/css/02_33.sql @@ -47,9 +47,11 @@ FROM ( GROUP BY client, page, - value) + value + ) GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_34.sql b/sql/2019/css/02_34.sql index 31004c7fc9f..496609cf662 100644 --- a/sql/2019/css/02_34.sql +++ b/sql/2019/css/02_34.sql @@ -27,6 +27,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_35.sql b/sql/2019/css/02_35.sql index e3b832c7480..67584af5f6e 100644 --- a/sql/2019/css/02_35.sql +++ b/sql/2019/css/02_35.sql @@ -47,9 +47,11 @@ FROM ( GROUP BY client, page, - value) + value + ) GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_36.sql b/sql/2019/css/02_36.sql index 1f4e38cdc5b..fffec864784 100644 --- a/sql/2019/css/02_36.sql +++ b/sql/2019/css/02_36.sql @@ -39,6 +39,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_37.sql b/sql/2019/css/02_37.sql index 640eacfa6f3..216b69bb26d 100644 --- a/sql/2019/css/02_37.sql +++ b/sql/2019/css/02_37.sql @@ -39,6 +39,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_39.sql b/sql/2019/css/02_39.sql index 2eb84f53644..ab3a7dcc702 100644 --- a/sql/2019/css/02_39.sql +++ b/sql/2019/css/02_39.sql @@ -34,6 +34,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_40.sql b/sql/2019/css/02_40.sql index c7777e7461f..ac14d086d98 100644 --- a/sql/2019/css/02_40.sql +++ b/sql/2019/css/02_40.sql @@ -39,6 +39,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_41.sql b/sql/2019/css/02_41.sql index acba82eb530..cf627303a41 100644 --- a/sql/2019/css/02_41.sql +++ b/sql/2019/css/02_41.sql @@ -37,6 +37,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_42.sql b/sql/2019/css/02_42.sql index 6abb1524960..c15cda1b61b 100644 --- a/sql/2019/css/02_42.sql +++ b/sql/2019/css/02_42.sql @@ -34,6 +34,7 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/css/02_43.sql b/sql/2019/css/02_43.sql index 43f56390471..0376c060e8a 100644 --- a/sql/2019/css/02_43.sql +++ b/sql/2019/css/02_43.sql @@ -46,11 +46,11 @@ SELECT FROM ( SELECT client, - COUNTIF(type.`=`) AS equals, -- noqa: L057 - COUNTIF(type.`*=`) AS star_equals, -- noqa: L057 - COUNTIF(type.`^=`) AS caret_equals, -- noqa: L057 - COUNTIF(type.`$=`) AS dollar_equals, -- noqa: L057 - COUNTIF(type.`~=`) AS tilde_equals -- noqa: L057 + COUNTIF(type.`=`) AS equals, -- noqa: RF05 + COUNTIF(type.`*=`) AS star_equals, -- noqa: RF05 + COUNTIF(type.`^=`) AS caret_equals, -- noqa: RF05 + COUNTIF(type.`$=`) AS dollar_equals, -- noqa: RF05 + COUNTIF(type.`~=`) AS tilde_equals -- noqa: RF05 FROM ( SELECT client, @@ -59,14 +59,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_44.sql b/sql/2019/css/02_44.sql index 9a58b406f36..232ec9645e6 100644 --- a/sql/2019/css/02_44.sql +++ b/sql/2019/css/02_44.sql @@ -44,11 +44,11 @@ SELECT FROM ( SELECT client, - COUNTIF(type.`=`) AS equals, -- noqa: L057 - COUNTIF(type.`*=`) AS star_equals, -- noqa: L057 - COUNTIF(type.`^=`) AS caret_equals, -- noqa: L057 - COUNTIF(type.`$=`) AS dollar_equals, -- noqa: L057 - COUNTIF(type.`~=`) AS tilde_equals -- noqa: L057 + COUNTIF(type.`=`) AS equals, -- noqa: RF05 + COUNTIF(type.`*=`) AS star_equals, -- noqa: RF05 + COUNTIF(type.`^=`) AS caret_equals, -- noqa: RF05 + COUNTIF(type.`$=`) AS dollar_equals, -- noqa: RF05 + COUNTIF(type.`~=`) AS tilde_equals -- noqa: RF05 FROM ( SELECT client, @@ -57,14 +57,14 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/css/02_45.sql b/sql/2019/css/02_45.sql index 70680953c6b..709291c37c8 100644 --- a/sql/2019/css/02_45.sql +++ b/sql/2019/css/02_45.sql @@ -16,6 +16,7 @@ FROM ( UNNEST(REGEXP_EXTRACT_ALL(body, '(?i)class=[\'"]([^\'"]+)')) AS value WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client diff --git a/sql/2019/ecommerce/13_02.sql b/sql/2019/ecommerce/13_02.sql index e0a3f4e6d64..3059b1a2cc9 100644 --- a/sql/2019/ecommerce/13_02.sql +++ b/sql/2019/ecommerce/13_02.sql @@ -8,8 +8,7 @@ SELECT ROUND(COUNT(0) * 100 / total, 2) AS pct FROM `httparchive.technologies.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' diff --git a/sql/2019/ecommerce/13_02b.sql b/sql/2019/ecommerce/13_02b.sql index a261e6de13a..7cea5242f1f 100644 --- a/sql/2019/ecommerce/13_02b.sql +++ b/sql/2019/ecommerce/13_02b.sql @@ -7,8 +7,7 @@ SELECT ROUND(COUNT(0) * 100 / total, 2) AS pct FROM `httparchive.technologies.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' diff --git a/sql/2019/ecommerce/13_06b.sql b/sql/2019/ecommerce/13_06b.sql index 5ae68a784a7..8608e28a3e1 100644 --- a/sql/2019/ecommerce/13_06b.sql +++ b/sql/2019/ecommerce/13_06b.sql @@ -8,8 +8,7 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct FROM `httparchive.almanac.summary_requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` WHERE category = 'Ecommerce') +JOIN (SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` WHERE category = 'Ecommerce') USING (client, page) WHERE date = '2019-07-01' AND diff --git a/sql/2019/ecommerce/13_08.sql b/sql/2019/ecommerce/13_08.sql index c2d82f4c0be..32aee054691 100644 --- a/sql/2019/ecommerce/13_08.sql +++ b/sql/2019/ecommerce/13_08.sql @@ -22,7 +22,8 @@ JOIN ( GROUP BY client, url, - app) + app +) ON CONCAT(origin, '/') = url AND IF(form_factor.name = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2019/ecommerce/13_09.sql b/sql/2019/ecommerce/13_09.sql index ac4be191b11..6c818373765 100644 --- a/sql/2019/ecommerce/13_09.sql +++ b/sql/2019/ecommerce/13_09.sql @@ -15,15 +15,16 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2019-07-01' AND NET.HOST(url) IN (SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2019-07-01' AND category != 'hosting') GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/ecommerce/13_09b.sql b/sql/2019/ecommerce/13_09b.sql index 42a8fe6c532..d409e22b371 100644 --- a/sql/2019/ecommerce/13_09b.sql +++ b/sql/2019/ecommerce/13_09b.sql @@ -18,16 +18,17 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, app FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2019-07-01' AND NET.HOST(url) IN (SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2019-07-01') GROUP BY client, app, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY client, diff --git a/sql/2019/ecommerce/13_09c.sql b/sql/2019/ecommerce/13_09c.sql index afebfafd5da..bc8b6d2d649 100644 --- a/sql/2019/ecommerce/13_09c.sql +++ b/sql/2019/ecommerce/13_09c.sql @@ -17,9 +17,9 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -30,7 +30,8 @@ FROM ( GROUP BY client, category, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/ecommerce/13_09d.sql b/sql/2019/ecommerce/13_09d.sql index 0426beb1f6b..e94ec1aadc1 100644 --- a/sql/2019/ecommerce/13_09d.sql +++ b/sql/2019/ecommerce/13_09d.sql @@ -15,14 +15,15 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2019-07-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/ecommerce/13_09e.sql b/sql/2019/ecommerce/13_09e.sql index f792511635b..cf0d0469178 100644 --- a/sql/2019/ecommerce/13_09e.sql +++ b/sql/2019/ecommerce/13_09e.sql @@ -17,15 +17,16 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2019-07-01' GROUP BY client, type, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/ecommerce/13_10.sql b/sql/2019/ecommerce/13_10.sql index 7cc5105724f..2679048cdb7 100644 --- a/sql/2019/ecommerce/13_10.sql +++ b/sql/2019/ecommerce/13_10.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') -USING - (client, page) + WHERE category = 'Ecommerce' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -24,9 +24,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE sr.date = '2019-07-01' AND sr.date = tp.date AND diff --git a/sql/2019/ecommerce/13_11.sql b/sql/2019/ecommerce/13_11.sql index 3c2ed9e26a6..64fe51475d4 100644 --- a/sql/2019/ecommerce/13_11.sql +++ b/sql/2019/ecommerce/13_11.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2019_07_01_*` - WHERE category = 'Ecommerce') -USING - (client, page) + WHERE category = 'Ecommerce' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` tp ON @@ -24,9 +24,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE sr.date = '2019-07-01' AND tp.date = sr.date AND diff --git a/sql/2019/ecommerce/13_12.sql b/sql/2019/ecommerce/13_12.sql index 7147c0bc088..271e432e538 100644 --- a/sql/2019/ecommerce/13_12.sql +++ b/sql/2019/ecommerce/13_12.sql @@ -6,8 +6,7 @@ SELECT SUM(COUNT(0)) OVER () AS total, ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (), 2) AS pct FROM - `httparchive.technologies.2019_07_01_mobile`, - (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`) + `httparchive.technologies.2019_07_01_mobile`, (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`) JOIN `httparchive.lighthouse.2019_07_01_mobile` USING (url) diff --git a/sql/2019/fonts/06_02.sql b/sql/2019/fonts/06_02.sql index 33035cb3122..4542dc8b6ce 100644 --- a/sql/2019/fonts/06_02.sql +++ b/sql/2019/fonts/06_02.sql @@ -18,7 +18,8 @@ FROM ( type = 'font' AND NET.HOST(url) != NET.HOST(page) GROUP BY - client), + client +), UNNEST(font_host) AS font_host WHERE font_host.count > 1000 diff --git a/sql/2019/fonts/06_04.sql b/sql/2019/fonts/06_04.sql index 464f6a34c51..02cadf9120c 100644 --- a/sql/2019/fonts/06_04.sql +++ b/sql/2019/fonts/06_04.sql @@ -9,7 +9,8 @@ FROM ( SELECT JSON_EXTRACT(report, '$.audits.font-display.score') AS score FROM - `httparchive.lighthouse.2019_07_01_*`) + `httparchive.lighthouse.2019_07_01_*` +) WHERE score IS NOT NULL GROUP BY diff --git a/sql/2019/fonts/06_09b.sql b/sql/2019/fonts/06_09b.sql index ddf4bb89ef0..e690ab1429c 100644 --- a/sql/2019/fonts/06_09b.sql +++ b/sql/2019/fonts/06_09b.sql @@ -39,7 +39,8 @@ FROM ( date = '2019-07-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/fonts/06_10.sql b/sql/2019/fonts/06_10.sql index c29b98975b5..401947f9787 100644 --- a/sql/2019/fonts/06_10.sql +++ b/sql/2019/fonts/06_10.sql @@ -68,11 +68,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) GROUP BY client, total diff --git a/sql/2019/fonts/06_14.sql b/sql/2019/fonts/06_14.sql index 69c4858b4d9..74081b59190 100644 --- a/sql/2019/fonts/06_14.sql +++ b/sql/2019/fonts/06_14.sql @@ -43,11 +43,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) GROUP BY client, total diff --git a/sql/2019/fonts/06_15.sql b/sql/2019/fonts/06_15.sql index 098ea655ddc..46c6af9025e 100644 --- a/sql/2019/fonts/06_15.sql +++ b/sql/2019/fonts/06_15.sql @@ -16,16 +16,11 @@ SELECT COUNT(DISTINCT page) AS freq, total, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX AS client, url AS page, payload FROM `httparchive.pages.2019_07_01_*`) -JOIN - (SELECT client, page, url FROM `httparchive.almanac.requests` WHERE date = '2019-07-01' AND type = 'font') -USING - (client, page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client), +FROM (SELECT _TABLE_SUFFIX AS client, url AS page, payload FROM `httparchive.pages.2019_07_01_*`) +JOIN (SELECT client, page, url FROM `httparchive.almanac.requests` WHERE date = '2019-07-01' AND type = 'font') +USING (client, page) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client), UNNEST(getPreconnectUrls(payload)) AS preconnect_url WHERE # hosts match diff --git a/sql/2019/fonts/06_16.sql b/sql/2019/fonts/06_16.sql index 6fd38eee038..d60b38dfcc7 100644 --- a/sql/2019/fonts/06_16.sql +++ b/sql/2019/fonts/06_16.sql @@ -43,11 +43,10 @@ FROM ( date = '2019-07-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) GROUP BY client, total diff --git a/sql/2019/fonts/06_18.sql b/sql/2019/fonts/06_18.sql index 81a4488fa1b..1c3b7c694bc 100644 --- a/sql/2019/fonts/06_18.sql +++ b/sql/2019/fonts/06_18.sql @@ -7,10 +7,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND type = 'font' AND diff --git a/sql/2019/fonts/06_21.sql b/sql/2019/fonts/06_21.sql index da9a44a8edd..717c45189be 100644 --- a/sql/2019/fonts/06_21.sql +++ b/sql/2019/fonts/06_21.sql @@ -36,18 +36,17 @@ FROM ( client, page HAVING - SUM(ARRAY_LENGTH(usesFontVariationSettings(css))) > 0) -JOIN - (SELECT client, page - FROM `httparchive.almanac.requests` - WHERE date = '2019-07-01' AND type = 'font' AND JSON_EXTRACT_SCALAR(payload, '$._font_details.table_sizes.gvar') IS NOT NULL - GROUP BY client, page) -USING - (client, page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) + SUM(ARRAY_LENGTH(usesFontVariationSettings(css))) > 0 +) +JOIN ( + SELECT client, page + FROM `httparchive.almanac.requests` + WHERE date = '2019-07-01' AND type = 'font' AND JSON_EXTRACT_SCALAR(payload, '$._font_details.table_sizes.gvar') IS NOT NULL + GROUP BY client, page +) +USING (client, page) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2019/fonts/06_24.sql b/sql/2019/fonts/06_24.sql index 75bcad9d458..5cecc2c0c22 100644 --- a/sql/2019/fonts/06_24.sql +++ b/sql/2019/fonts/06_24.sql @@ -37,10 +37,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.parsed_css` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' GROUP BY diff --git a/sql/2019/fonts/06_25.sql b/sql/2019/fonts/06_25.sql index 98920d1ca3d..bbad7c5ec24 100644 --- a/sql/2019/fonts/06_25.sql +++ b/sql/2019/fonts/06_25.sql @@ -23,10 +23,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.parsed_css` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND ARRAY_LENGTH(checksSupports(css)) > 0 diff --git a/sql/2019/fonts/06_26.sql b/sql/2019/fonts/06_26.sql index 2345c07f33d..54220c55889 100644 --- a/sql/2019/fonts/06_26.sql +++ b/sql/2019/fonts/06_26.sql @@ -26,10 +26,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.parsed_css` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY client) +USING (client) WHERE date = '2019-07-01' AND ARRAY_LENGTH(usesFontStretch(css)) > 0 diff --git a/sql/2019/fonts/06_32.sql b/sql/2019/fonts/06_32.sql index aaef09bbf08..9d7f2207968 100644 --- a/sql/2019/fonts/06_32.sql +++ b/sql/2019/fonts/06_32.sql @@ -18,5 +18,6 @@ FROM ( client, host ORDER BY - freq / total DESC) + freq / total DESC +) LIMIT 100 diff --git a/sql/2019/fonts/06_35.sql b/sql/2019/fonts/06_35.sql index 01d685800e2..18ae478d66b 100644 --- a/sql/2019/fonts/06_35.sql +++ b/sql/2019/fonts/06_35.sql @@ -6,9 +6,7 @@ SELECT url FROM `httparchive.summary_pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, MAX(reqFont) AS reqFont FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX, reqFont) +JOIN (SELECT _TABLE_SUFFIX, MAX(reqFont) AS reqFont FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (_TABLE_SUFFIX, reqFont) ORDER BY fonts DESC diff --git a/sql/2019/fonts/06_42.sql b/sql/2019/fonts/06_42.sql index 9c1fb0f193a..a0c51f585e0 100644 --- a/sql/2019/fonts/06_42.sql +++ b/sql/2019/fonts/06_42.sql @@ -7,10 +7,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND type = 'font' AND diff --git a/sql/2019/fonts/06_43.sql b/sql/2019/fonts/06_43.sql index 3104c254cd2..bd92ad2ade2 100644 --- a/sql/2019/fonts/06_43.sql +++ b/sql/2019/fonts/06_43.sql @@ -10,10 +10,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct_pages FROM `httparchive.almanac.requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client), +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client), # Color fonts have any of sbix, cbdt, svg or colr tables. UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)(sbix|cbdt|svg|colr)')) AS format WHERE diff --git a/sql/2019/fonts/06_44.sql b/sql/2019/fonts/06_44.sql index 8d851b580a9..beeab307be9 100644 --- a/sql/2019/fonts/06_44.sql +++ b/sql/2019/fonts/06_44.sql @@ -37,10 +37,8 @@ SELECT FROM `httparchive.almanac.parsed_css`, UNNEST(getFontDisplay(css)) AS font_display -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' GROUP BY diff --git a/sql/2019/http/20_07.sql b/sql/2019/http/20_07.sql index 22affaccb00..ebfc939babe 100644 --- a/sql/2019/http/20_07.sql +++ b/sql/2019/http/20_07.sql @@ -6,20 +6,19 @@ SELECT IF(prioritization_status IS NOT NULL, prioritization_status, 'Unknown') AS prioritizes_correctly, COUNT(0) AS num_pages, ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct -FROM - ( - SELECT - date, - client, - url, - JSON_EXTRACT_SCALAR(payload, '$._cdn_provider') AS cdn - FROM - `httparchive.almanac.requests` - WHERE - date = '2019-07-01' AND - JSON_EXTRACT_SCALAR(payload, '$._protocol') = 'HTTP/2' AND - firstHtml - ) AS pages +FROM ( + SELECT + date, + client, + url, + JSON_EXTRACT_SCALAR(payload, '$._cdn_provider') AS cdn + FROM + `httparchive.almanac.requests` + WHERE + date = '2019-07-01' AND + JSON_EXTRACT_SCALAR(payload, '$._protocol') = 'HTTP/2' AND + firstHtml +) AS pages LEFT JOIN `httparchive.almanac.h2_prioritization_cdns` AS h2_pri ON pages.date = h2_pri.date AND pages.cdn = h2_pri.cdn diff --git a/sql/2019/javascript/01_01c.sql b/sql/2019/javascript/01_01c.sql index b5f3ea450a3..293f4dfe6b3 100644 --- a/sql/2019/javascript/01_01c.sql +++ b/sql/2019/javascript/01_01c.sql @@ -16,6 +16,8 @@ FROM ( FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY - bin)) + bin + ) +) ORDER BY bin diff --git a/sql/2019/javascript/01_01d.sql b/sql/2019/javascript/01_01d.sql index b4325ebc5df..8fe5c98eee2 100644 --- a/sql/2019/javascript/01_01d.sql +++ b/sql/2019/javascript/01_01d.sql @@ -19,7 +19,9 @@ FROM ( `httparchive.summary_pages.2019_07_01_*` GROUP BY bin, - client)) + client + ) +) ORDER BY bin, client diff --git a/sql/2019/javascript/01_02a.sql b/sql/2019/javascript/01_02a.sql index 0f28b57a4b8..b1a9f2850d3 100644 --- a/sql/2019/javascript/01_02a.sql +++ b/sql/2019/javascript/01_02a.sql @@ -18,11 +18,13 @@ FROM ( FROM `httparchive.almanac.summary_requests` WHERE - date = '2019-07-01') + date = '2019-07-01' + ) WHERE type = 'script' GROUP BY - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/javascript/01_02b.sql b/sql/2019/javascript/01_02b.sql index 95ace18bc88..7d0437174d2 100644 --- a/sql/2019/javascript/01_02b.sql +++ b/sql/2019/javascript/01_02b.sql @@ -17,7 +17,8 @@ FROM ( type = 'script' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/javascript/01_04.sql b/sql/2019/javascript/01_04.sql index 5aad8756369..ba915f2a6e4 100644 --- a/sql/2019/javascript/01_04.sql +++ b/sql/2019/javascript/01_04.sql @@ -17,7 +17,8 @@ FROM ( type = 'script' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/javascript/01_07.sql b/sql/2019/javascript/01_07.sql index 25071393280..ce556247cdb 100644 --- a/sql/2019/javascript/01_07.sql +++ b/sql/2019/javascript/01_07.sql @@ -18,6 +18,7 @@ SELECT ROUND(APPROX_QUANTILES(v8_time, 1000)[OFFSET(750)], 3) AS p75, ROUND(APPROX_QUANTILES(v8_time, 1000)[OFFSET(900)], 3) AS p90 FROM ( - SELECT _TABLE_SUFFIX AS client, totalMainThreadTime(payload) AS v8_time FROM `httparchive.pages.2019_07_01_*`) + SELECT _TABLE_SUFFIX AS client, totalMainThreadTime(payload) AS v8_time FROM `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/javascript/01_08.sql b/sql/2019/javascript/01_08.sql index 99b82d624d7..8585a03a621 100644 --- a/sql/2019/javascript/01_08.sql +++ b/sql/2019/javascript/01_08.sql @@ -6,8 +6,7 @@ SELECT COUNT(DISTINCT url) AS freq, total, ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX, COUNT(DISTINCT url) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT _TABLE_SUFFIX, COUNT(DISTINCT url) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) JOIN `httparchive.technologies.2019_07_01_*` USING (_TABLE_SUFFIX) diff --git a/sql/2019/javascript/01_09.sql b/sql/2019/javascript/01_09.sql index 813e6068aec..4d0e2fc9be5 100644 --- a/sql/2019/javascript/01_09.sql +++ b/sql/2019/javascript/01_09.sql @@ -15,23 +15,22 @@ FROM ( _TABLE_SUFFIX AS client, COUNT(DISTINCT url) AS freq_2018, COUNT(DISTINCT url) / total AS pct_2018 - FROM - (SELECT _TABLE_SUFFIX, COUNT(url) AS total FROM `httparchive.summary_pages.2018_07_01_*` GROUP BY _TABLE_SUFFIX) + FROM (SELECT _TABLE_SUFFIX, COUNT(url) AS total FROM `httparchive.summary_pages.2018_07_01_*` GROUP BY _TABLE_SUFFIX) JOIN `httparchive.technologies.2018_07_01_*` USING (_TABLE_SUFFIX) GROUP BY app, client, - total) + total +) JOIN ( SELECT app, _TABLE_SUFFIX AS client, COUNT(DISTINCT url) AS freq_2019, COUNT(DISTINCT url) / total AS pct_2019 - FROM - (SELECT _TABLE_SUFFIX, COUNT(url) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) + FROM (SELECT _TABLE_SUFFIX, COUNT(url) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) JOIN `httparchive.technologies.2019_07_01_*` USING (_TABLE_SUFFIX) @@ -40,7 +39,8 @@ JOIN ( GROUP BY app, client, - total) + total +) USING (app, client) WHERE freq_2019 > 10 diff --git a/sql/2019/javascript/01_10.sql b/sql/2019/javascript/01_10.sql index 507376be2d1..111c5d44523 100644 --- a/sql/2019/javascript/01_10.sql +++ b/sql/2019/javascript/01_10.sql @@ -6,8 +6,7 @@ SELECT COUNT(0) AS freq, total, ROUND(COUNT(0) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) JOIN `httparchive.technologies.2019_07_01_*` USING (_TABLE_SUFFIX) diff --git a/sql/2019/javascript/01_18.sql b/sql/2019/javascript/01_18.sql index 50cb835dc53..f0d17cfbd47 100644 --- a/sql/2019/javascript/01_18.sql +++ b/sql/2019/javascript/01_18.sql @@ -31,7 +31,8 @@ FROM ( type = 'script' GROUP BY client, - page) + page +) GROUP BY client ORDER BY diff --git a/sql/2019/javascript/01_24.sql b/sql/2019/javascript/01_24.sql index e5720cc9da0..518562ef758 100644 --- a/sql/2019/javascript/01_24.sql +++ b/sql/2019/javascript/01_24.sql @@ -7,10 +7,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.summary_response_bodies` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND type = 'script' AND diff --git a/sql/2019/javascript/01_25.sql b/sql/2019/javascript/01_25.sql index c01fea71813..7fa44735ed1 100644 --- a/sql/2019/javascript/01_25.sql +++ b/sql/2019/javascript/01_25.sql @@ -7,10 +7,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.summary_response_bodies` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND type = 'script' AND diff --git a/sql/2019/markup/03_02a.sql b/sql/2019/markup/03_02a.sql index b0a59f16fbb..6769417d9db 100644 --- a/sql/2019/markup/03_02a.sql +++ b/sql/2019/markup/03_02a.sql @@ -20,8 +20,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX), UNNEST(getElements(payload)) AS element GROUP BY diff --git a/sql/2019/markup/03_03c.sql b/sql/2019/markup/03_03c.sql index 49c132a6898..0965ac3eed6 100644 --- a/sql/2019/markup/03_03c.sql +++ b/sql/2019/markup/03_03c.sql @@ -20,8 +20,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct FROM `httparchive.pages.2019_07_01_*` -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX), UNNEST(getCustomElements(payload)) AS element GROUP BY diff --git a/sql/2019/markup/03_06.sql b/sql/2019/markup/03_06.sql index 77cc245c12f..cdb7fd05a8a 100644 --- a/sql/2019/markup/03_06.sql +++ b/sql/2019/markup/03_06.sql @@ -26,7 +26,8 @@ FROM ( url, countElements(payload) AS elements FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/media/04_01a.sql b/sql/2019/media/04_01a.sql index d7456a9c212..3a99615317b 100644 --- a/sql/2019/media/04_01a.sql +++ b/sql/2019/media/04_01a.sql @@ -44,30 +44,28 @@ SELECT APPROX_QUANTILES(ROUND(100 * resourceBytes / IFNULL(NULLIF(pageBytes, 0), 0.1), 2), 1000)[OFFSET(750)] AS pct_p75, APPROX_QUANTILES(ROUND(100 * resourceBytes / IFNULL(NULLIF(pageBytes, 0), 0.1), 2), 1000)[OFFSET(990)] AS pct_p99, APPROX_QUANTILES(ROUND(100 * resourceBytes / IFNULL(NULLIF(pageBytes, 0), 0.1), 2), 1000)[OFFSET(900)] AS pct_p90 -FROM - ( +FROM ( + SELECT + type, + pageBytes, + IF(type = 'image', totalImageCount, totalImageCount + totalVideoCount) AS resourceCount, + IF(type = 'image', totalImageBytes, totalImageBytes + totalVideoBytes) AS resourceBytes + FROM ( SELECT - type, - pageBytes, - IF(type = 'image', totalImageCount, totalImageCount + totalVideoCount) AS resourceCount, - IF(type = 'image', totalImageBytes, totalImageBytes + totalVideoBytes) AS resourceBytes + url, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS pageBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].requestCount') AS INT64) AS totalImageCount, + getVideoBytes(report) AS totalVideoBytes, + getVideoCount(report) AS totalVideoCount FROM - ( - SELECT - url, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS pageBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].requestCount') AS INT64) AS totalImageCount, - getVideoBytes(report) AS totalVideoBytes, - getVideoCount(report) AS totalVideoCount - FROM - `httparchive.lighthouse.2019_07_01_mobile` - ) - # we to make this a little easier to read we unnest with just image and image+video - # it's important to remember that each of the results is mutually exclusive and should not imply addition - # that is, you cannot assume that image + video at the p75 and image at p75 are the same webpages being collected - # if we wanted to do more advanced percentile based on page size, we would need a different statistics engine (eg: R) - CROSS JOIN UNNEST(['image', 'image+video']) AS type + `httparchive.lighthouse.2019_07_01_mobile` ) +# we to make this a little easier to read we unnest with just image and image+video +# it's important to remember that each of the results is mutually exclusive and should not imply addition +# that is, you cannot assume that image + video at the p75 and image at p75 are the same webpages being collected + # if we wanted to do more advanced percentile based on page size, we would need a different statistics engine (eg: R) + CROSS JOIN UNNEST(['image', 'image+video']) AS type +) GROUP BY type diff --git a/sql/2019/media/04_01b.sql b/sql/2019/media/04_01b.sql index 37aae8c5793..43e66a71995 100644 --- a/sql/2019/media/04_01b.sql +++ b/sql/2019/media/04_01b.sql @@ -18,18 +18,21 @@ SELECT APPROX_QUANTILES(ROUND(100 * offScreenImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(500)] AS pctImageBytes_p50, APPROX_QUANTILES(ROUND(100 * offScreenImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(750)] AS pctImageBytes_p75, APPROX_QUANTILES(ROUND(100 * offScreenImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(900)] AS pctImageBytes_p90 -FROM - ( - SELECT - url, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.offscreen-images.details.overallSavingsBytes') AS INT64) AS offScreenImagesBytes, - IF(REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.offscreen-images.details.items'), ','), - ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.offscreen-images.details.items'), ',')), 0) AS offScreenImagesCount, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsBytes') AS INT64) AS unoptimizedImagesBytes, - IF(REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ','), - ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ',')), 0) AS unoptimizedImagesCount - FROM - `httparchive.lighthouse.2019_07_01_mobile` - ) +FROM ( + SELECT + url, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.offscreen-images.details.overallSavingsBytes') AS INT64) AS offScreenImagesBytes, + IF( + REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.offscreen-images.details.items'), ','), + ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.offscreen-images.details.items'), ',')), 0 + ) AS offScreenImagesCount, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsBytes') AS INT64) AS unoptimizedImagesBytes, + IF( + REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ','), + ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ',')), 0 + ) AS unoptimizedImagesCount + FROM + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/media/04_01c.sql b/sql/2019/media/04_01c.sql index 143d7a6a398..2ecf4b8f4cf 100644 --- a/sql/2019/media/04_01c.sql +++ b/sql/2019/media/04_01c.sql @@ -23,16 +23,17 @@ SELECT APPROX_QUANTILES(unoptimizedImagesSavingsMs, 1000)[OFFSET(500)] AS ms_p50, APPROX_QUANTILES(unoptimizedImagesSavingsMs, 1000)[OFFSET(750)] AS ms_p75, APPROX_QUANTILES(unoptimizedImagesSavingsMs, 1000)[OFFSET(900)] AS ms_p90 -FROM - ( - SELECT - url, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsMs') AS INT64) AS unoptimizedImagesSavingsMs, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsBytes') AS INT64) AS unoptimizedImagesBytes, - IF(REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ','), - ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ',')), 0) AS unoptimizedImagesCount - FROM - `httparchive.lighthouse.2019_07_01_mobile` - ) +FROM ( + SELECT + url, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsMs') AS INT64) AS unoptimizedImagesSavingsMs, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-optimized-images.details.overallSavingsBytes') AS INT64) AS unoptimizedImagesBytes, + IF( + REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ','), + ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-optimized-images.details.items'), ',')), 0 + ) AS unoptimizedImagesCount + FROM + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/media/04_01d.sql b/sql/2019/media/04_01d.sql index 7222d16eab1..d3ab7519cc5 100644 --- a/sql/2019/media/04_01d.sql +++ b/sql/2019/media/04_01d.sql @@ -18,15 +18,16 @@ SELECT APPROX_QUANTILES(ROUND(100 * respImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(500)] AS pctImageBytes_p50, APPROX_QUANTILES(ROUND(100 * respImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(750)] AS pctImageBytes_p75, APPROX_QUANTILES(ROUND(100 * respImagesBytes / (totalImageBytes + 0.1), 2), 1000)[OFFSET(900)] AS pctImageBytes_p90 -FROM - ( - SELECT - url, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, - CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-responsive-images.details.overallSavingsBytes') AS INT64) AS respImagesBytes, - IF(REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-responsive-images.details.items'), ','), - ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-responsive-images.details.items'), ',')), 0) AS respImagesCount - FROM - `httparchive.lighthouse.2019_07_01_mobile` - ) +FROM ( + SELECT + url, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[0].size') AS INT64) AS totalBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.resource-summary.details.items[1].size') AS INT64) AS totalImageBytes, + CAST(JSON_EXTRACT_SCALAR(report, '$.audits.uses-responsive-images.details.overallSavingsBytes') AS INT64) AS respImagesBytes, + IF( + REGEX_CONTAINS(JSON_EXTRACT(report, '$.audits.uses-responsive-images.details.items'), ','), + ARRAY_LENGTH(split(JSON_EXTRACT(report, '$.audits.uses-responsive-images.details.items'), ',')), 0 + ) AS respImagesCount + FROM + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/media/04_03.sql b/sql/2019/media/04_03.sql index 25b746abd47..3018014eeb0 100644 --- a/sql/2019/media/04_03.sql +++ b/sql/2019/media/04_03.sql @@ -28,11 +28,9 @@ WHERE status = 200 AND # we are trying to catch images. WPO populates the format for media but it uses a file extension guess. - #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO - (format != '' OR mimetype LIKE 'image%') AND + #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO (format != '' OR mimetype LIKE 'image%') AND - # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix - (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND + # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND # strip favicon requests format != 'ico' AND diff --git a/sql/2019/media/04_03b.sql b/sql/2019/media/04_03b.sql index 2de4e9dd2cf..2fb5211b9bd 100644 --- a/sql/2019/media/04_03b.sql +++ b/sql/2019/media/04_03b.sql @@ -21,51 +21,47 @@ SELECT APPROX_QUANTILES(bytes, 1000)[OFFSET(750)] AS bytes_p75, APPROX_QUANTILES(bytes, 1000)[OFFSET(900)] AS bytes_p90, APPROX_QUANTILES(bytes, 1000)[OFFSET(990)] AS bytes_p99 -FROM - ( +FROM ( + SELECT + client, + page, + webImageType, + SUM(IF(LOWER(imageType) = LOWER(webImageType), hits, 0)) AS hits, + SUM(IF(LOWER(webImageType) = LOWER(imageType), bytes, 0)) AS bytes + FROM ( SELECT client, page, - webImageType, - SUM(IF(LOWER(imageType) = LOWER(webImageType), hits, 0)) AS hits, - SUM(IF(LOWER(webImageType) = LOWER(imageType), bytes, 0)) AS bytes - FROM - ( - SELECT - client, - page, - NULLIF(IF(REGEX_CONTAINS(mimetype, r'(?i)^application|^applicaton|^binary|^image$|^multipart|^media|^$|^text/html|^text/plain|\d|array|unknown|undefined|\*|string|^img|^images|^text|\%2f|\(|ipg$|jpe$|jfif'), format, LOWER(REGEXP_REPLACE(REGEXP_REPLACE(mimetype, r'(?is).*image[/\\](?:x-)?|[\."]|[ +,;]+.*$', ''), r'(?i)pjpeg|jpeg', 'jpg'))), '') AS imageType, - COUNT(0) AS hits, - SUM(respSize) AS bytes - FROM `httparchive.almanac.requests3` + NULLIF(IF(REGEX_CONTAINS(mimetype, r'(?i)^application|^applicaton|^binary|^image$|^multipart|^media|^$|^text/html|^text/plain|\d|array|unknown|undefined|\*|string|^img|^images|^text|\%2f|\(|ipg$|jpe$|jfif'), format, LOWER(REGEXP_REPLACE(REGEXP_REPLACE(mimetype, r'(?is).*image[/\\](?:x-)?|[\."]|[ +,;]+.*$', ''), r'(?i)pjpeg|jpeg', 'jpg'))), '') AS imageType, + COUNT(0) AS hits, + SUM(respSize) AS bytes + FROM `httparchive.almanac.requests3` - WHERE - # many 404s ANDredirects show up as image/gif - status = 200 AND + WHERE + # many 404s ANDredirects show up as image/gif + status = 200 AND - # we are trying to catch images. WPO populates the format for media but it uses a file extension guess. - #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO - (format != '' OR mimetype LIKE 'image%') AND + # we are trying to catch images. WPO populates the format for media but it uses a file extension guess. + #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO (format != '' OR mimetype LIKE 'image%') AND - # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix - (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND + # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND - # strip favicon requests - format != 'ico' AND + # strip favicon requests + format != 'ico' AND - # strip video mimetypes ANDother favicons - NOT REGEXP_CONTAINS(mimetype, r'video|ico') - GROUP BY - client, - page, - imageType - ) - CROSS JOIN UNNEST(['jpg', 'png', 'webp', 'gif', 'svg']) AS webImageType + # strip video mimetypes ANDother favicons + NOT REGEXP_CONTAINS(mimetype, r'video|ico') GROUP BY client, page, - webImageType + imageType ) + CROSS JOIN UNNEST(['jpg', 'png', 'webp', 'gif', 'svg']) AS webImageType + GROUP BY + client, + page, + webImageType +) GROUP BY client, imageType diff --git a/sql/2019/media/04_04.sql b/sql/2019/media/04_04.sql index 0d5345bfed0..3bc9726f3b9 100644 --- a/sql/2019/media/04_04.sql +++ b/sql/2019/media/04_04.sql @@ -18,7 +18,8 @@ FROM ( firstHtml GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/media/04_08.sql b/sql/2019/media/04_08.sql index 7a3c9426d22..85e2d0d6e69 100644 --- a/sql/2019/media/04_08.sql +++ b/sql/2019/media/04_08.sql @@ -13,7 +13,8 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client ORDER BY diff --git a/sql/2019/media/04_09a.sql b/sql/2019/media/04_09a.sql index 7e110537f0c..3db155de652 100644 --- a/sql/2019/media/04_09a.sql +++ b/sql/2019/media/04_09a.sql @@ -1,6 +1,7 @@ #standardSQL # 04_09a: Client Hints -SELECT client, +SELECT + client, COUNTIF(chHTML) AS chHTMLCount, COUNTIF(chHeader) AS chHeaderCount, COUNTIF(chHTML AND chHeader) AS chBothCount, @@ -10,18 +11,17 @@ SELECT client, ROUND(100 * COUNTIF(chHeader) / COUNT(0), 2) AS chHeaderPct, ROUND(100 * COUNTIF(chHTML AND chHeader) / COUNT(0), 2) AS chBothPct, ROUND(100 * COUNTIF(chHTML OR chHeader) / COUNT(0), 2) AS chEitherPct -FROM - ( - SELECT - client, - page, - REGEXP_CONTAINS(body, r'(?is)<]*Accept-CH\b') AS chHTML, - REGEXP_CONTAINS(respOtherHeaders, r'(?is)Accept-CH = ') AS chHeader - FROM - `httparchive.almanac.summary_response_bodies` - WHERE - date = '2019-07-01' AND - firstHtml - ) +FROM ( + SELECT + client, + page, + REGEXP_CONTAINS(body, r'(?is)<]*Accept-CH\b') AS chHTML, + REGEXP_CONTAINS(respOtherHeaders, r'(?is)Accept-CH = ') AS chHeader + FROM + `httparchive.almanac.summary_response_bodies` + WHERE + date = '2019-07-01' AND + firstHtml +) GROUP BY client diff --git a/sql/2019/media/04_09b.sql b/sql/2019/media/04_09b.sql index ef93e83dee0..599bf95174b 100644 --- a/sql/2019/media/04_09b.sql +++ b/sql/2019/media/04_09b.sql @@ -5,20 +5,20 @@ SELECT chHTML, chHeader, COUNT(0) AS hits -FROM - ( - SELECT - client, - page, - replace(regexp_extract(regexp_extract(body, r'(?is)<]*Accept-CH\b[^><]*'), r'(?im).*content=["#32"\']*([^\'"><]*)'), '#32;', '') AS chHTML, - regexp_extract(regexp_extract(respOtherHeaders, r'(?is)Accept-CH = (.*)'), r'(?im)^([^=]*?)(?:, [a-z-]+ = .*)') AS chHeader - FROM `httparchive.almanac.summary_response_bodies` - WHERE - date = '2019-07-01' AND - firstHtml AND - (REGEXP_CONTAINS(body, r'(?is)<]*Accept-CH\b') OR - REGEXP_CONTAINS(respOtherHeaders, r'(?is)Accept-CH = ')) - ) +FROM ( + SELECT + client, + page, + replace(regexp_extract(regexp_extract(body, r'(?is)<]*Accept-CH\b[^><]*'), r'(?im).*content=["#32"\']*([^\'"><]*)'), '#32;', '') AS chHTML, + regexp_extract(regexp_extract(respOtherHeaders, r'(?is)Accept-CH = (.*)'), r'(?im)^([^=]*?)(?:, [a-z-]+ = .*)') AS chHeader + FROM `httparchive.almanac.summary_response_bodies` + WHERE + date = '2019-07-01' AND + firstHtml AND ( + REGEXP_CONTAINS(body, r'(?is)<]*Accept-CH\b') OR + REGEXP_CONTAINS(respOtherHeaders, r'(?is)Accept-CH = ') + ) +) GROUP BY client, chHTML, diff --git a/sql/2019/media/04_09c.sql b/sql/2019/media/04_09c.sql index 63384adadd4..8b2bc4c74e0 100644 --- a/sql/2019/media/04_09c.sql +++ b/sql/2019/media/04_09c.sql @@ -2,32 +2,31 @@ # 04_09c: Top Client Hints SELECT client, ch, SUM(hits) AS hits -FROM - ( +FROM ( + SELECT + client, + REGEXP_REPLACE(concat(IFNULL(chHTML, ''), ',', IFNULL(chHeader, '')), r'^,|,$| ', '') AS acceptCH, + COUNT(0) AS hits + FROM ( SELECT client, - REGEXP_REPLACE(concat(IFNULL(chHTML, ''), ',', IFNULL(chHeader, '')), r'^,|,$| ', '') AS acceptCH, - COUNT(0) AS hits + page, + replace(regexp_extract(regexp_extract(body, r'(?is)<]*Accept-CH\b[^><]*'), r'(?im).*content=["#32"\']*([^\'"><]*)'), ' ', '') AS chHTML, + regexp_extract(regexp_extract(respOtherHeaders, r'(?is)Accept-CH = (.*)'), r'(?im)^([^=]*?)(?:, [a-z-]+ = .*)') AS chHeader FROM - ( - SELECT - client, - page, - replace(regexp_extract(regexp_extract(body, r'(?is)<]*Accept-CH\b[^><]*'), r'(?im).*content=["#32"\']*([^\'"><]*)'), ' ', '') AS chHTML, - regexp_extract(regexp_extract(respOtherHeaders, r'(?is)Accept-CH = (.*)'), r'(?im)^([^=]*?)(?:, [a-z-]+ = .*)') AS chHeader - FROM - `httparchive.almanac.summary_response_bodies` - WHERE - date = '2019-07-01' AND - firstHtml AND - (REGEXP_CONTAINS(body, r'(?im)<]*Accept-CH\b') OR - REGEXP_CONTAINS(respOtherHeaders, r'(?im)Accept-CH = ')) + `httparchive.almanac.summary_response_bodies` + WHERE + date = '2019-07-01' AND + firstHtml AND ( + REGEXP_CONTAINS(body, r'(?im)<]*Accept-CH\b') OR + REGEXP_CONTAINS(respOtherHeaders, r'(?im)Accept-CH = ') ) - GROUP BY - client, - chHTML, - chHeader ) + GROUP BY + client, + chHTML, + chHeader +) CROSS JOIN UNNEST(SPLIT(LOWER(acceptCH), ',')) AS ch GROUP BY client, diff --git a/sql/2019/media/04_11a.sql b/sql/2019/media/04_11a.sql index c31f4a91b6b..2d6f77be60f 100644 --- a/sql/2019/media/04_11a.sql +++ b/sql/2019/media/04_11a.sql @@ -29,54 +29,50 @@ SELECT APPROX_QUANTILES(ROUND(bytes / IF(imageType = 'svg' AND pixels > 0, pixels, naturalPixels), 4), 1000)[OFFSET(500)] AS bpp_p50, APPROX_QUANTILES(ROUND(bytes / IF(imageType = 'svg' AND pixels > 0, pixels, naturalPixels), 4), 1000)[OFFSET(750)] AS bpp_p75, APPROX_QUANTILES(ROUND(bytes / IF(imageType = 'svg' AND pixels > 0, pixels, naturalPixels), 4), 1000)[OFFSET(900)] AS bpp_p90 -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - p.url AS page, - image.url AS url, - image.width AS width, - image.height AS height, - image.naturalWidth AS naturalWidth, - image.naturalHeight AS naturalHeight, - IFNULL(image.width, 0) * IFNULL(image.height, 0) AS pixels, - IFNULL(image.naturalWidth, 0) * IFNULL(image.naturalHeight, 0) AS naturalPixels - FROM - `httparchive.pages.2019_07_01_*` p - CROSS JOIN UNNEST(getImages(payload)) AS image - WHERE - image.naturalHeight > 0 AND - image.naturalWidth > 0 - -- LIMIT 1000 - ) a -LEFT JOIN - ( - SELECT - client, - page, - url, - NULLIF(IF(REGEX_CONTAINS(mimetype, r'(?i)^application|^applicaton|^binary|^image$|^multipart|^media|^$|^text/html|^text/plain|\d|array|unknown|undefined|\*|string|^img|^images|^text|\%2f|\(|ipg$|jpe$|jfif'), format, LOWER(REGEXP_REPLACE(REGEXP_REPLACE(mimetype, r'(?is).*image[/\\](?:x-)?|[\."]|[ +,;]+.*$', ''), r'(?i)pjpeg|jpeg', 'jpg'))), '') AS imageType, - respSize AS bytes - FROM `httparchive.almanac.requests3` +FROM ( + SELECT + _TABLE_SUFFIX AS client, + p.url AS page, + image.url AS url, + image.width AS width, + image.height AS height, + image.naturalWidth AS naturalWidth, + image.naturalHeight AS naturalHeight, + IFNULL(image.width, 0) * IFNULL(image.height, 0) AS pixels, + IFNULL(image.naturalWidth, 0) * IFNULL(image.naturalHeight, 0) AS naturalPixels + FROM + `httparchive.pages.2019_07_01_*` p + CROSS JOIN UNNEST(getImages(payload)) AS image + WHERE + image.naturalHeight > 0 AND + image.naturalWidth > 0 +-- LIMIT 1000 +) a +LEFT JOIN ( + SELECT + client, + page, + url, + NULLIF(IF(REGEX_CONTAINS(mimetype, r'(?i)^application|^applicaton|^binary|^image$|^multipart|^media|^$|^text/html|^text/plain|\d|array|unknown|undefined|\*|string|^img|^images|^text|\%2f|\(|ipg$|jpe$|jfif'), format, LOWER(REGEXP_REPLACE(REGEXP_REPLACE(mimetype, r'(?is).*image[/\\](?:x-)?|[\."]|[ +,;]+.*$', ''), r'(?i)pjpeg|jpeg', 'jpg'))), '') AS imageType, + respSize AS bytes + FROM `httparchive.almanac.requests3` - WHERE - # many 404s and redirects show up as image/gif - status = 200 AND + WHERE + # many 404s and redirects show up as image/gif + status = 200 AND - # we are trying to catch images. WPO populates the format for media but it uses a file extension guess. - #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO - (format != '' OR mimetype LIKE 'image%') AND + # we are trying to catch images. WPO populates the format for media but it uses a file extension guess. + #So we exclude mimetypes that aren't image or where the format couldn't be guessed by WPO (format != '' OR mimetype LIKE 'image%') AND - # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix - (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND + # many image/gifs are really beacons with 1x1 pixel, but svgs can get caught in the mix (respSize > 1500 OR REGEXP_CONTAINS(mimetype, r'svg')) AND - # strip favicon requests - format != 'ico' AND + # strip favicon requests + format != 'ico' AND - # strip video mimetypes and other favicons - NOT REGEXP_CONTAINS(mimetype, r'video|ico') - -- limit 1000 - ) + # strip video mimetypes and other favicons + NOT REGEXP_CONTAINS(mimetype, r'video|ico') +-- limit 1000 +) ON (b.client = a.client AND a.page = b.page AND a.url = b.url) WHERE diff --git a/sql/2019/media/04_11b.sql b/sql/2019/media/04_11b.sql index 6475261bbd8..67800f558f8 100644 --- a/sql/2019/media/04_11b.sql +++ b/sql/2019/media/04_11b.sql @@ -42,30 +42,28 @@ SELECT Round(APPROX_QUANTILES(naturalPixels, 1000)[OFFSET(500)] / (any_value(viewportHeight) * any_value(viewportWidth)), 2) AS pct_p50, Round(APPROX_QUANTILES(naturalPixels, 1000)[OFFSET(750)] / (any_value(viewportHeight) * any_value(viewportWidth)), 2) AS pct_p75, Round(APPROX_QUANTILES(naturalPixels, 1000)[OFFSET(900)] / (any_value(viewportHeight) * any_value(viewportWidth)), 2) AS pct_p90 -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - url AS page, - getCssPixels(json_extract_scalar(payload, '$._Images')) AS cssPixels, - getNaturalPixels(json_extract_scalar(payload, '$._Images')) AS naturalPixels, - CAST(json_extract_scalar(payload, '$._smallImageCount') AS INT64) AS smallImageCount, - CAST(json_extract_scalar(payload, '$._bigImageCount') AS INT64) AS bigImageCount, - CAST(json_extract_scalar(payload, '$._image_total') AS INT64) AS imageBytes, - CAST(json_extract_scalar(json_extract_scalar(payload, '$._Dpi'), '$.dppx') AS FLOAT64) AS dpr, - CAST(json_extract_scalar(json_extract_scalar(payload, '$._Resolution'), '$.absolute.height') AS INT64) AS viewportHeight, - CAST(json_extract_scalar(json_extract_scalar(payload, '$._Resolution'), '$.absolute.width') AS INT64) AS viewportWidth - FROM - `httparchive.pages.2019_07_01_*` - -- LIMIT 1000 - ) +FROM ( + SELECT + _TABLE_SUFFIX AS client, + url AS page, + getCssPixels(json_extract_scalar(payload, '$._Images')) AS cssPixels, + getNaturalPixels(json_extract_scalar(payload, '$._Images')) AS naturalPixels, + CAST(json_extract_scalar(payload, '$._smallImageCount') AS INT64) AS smallImageCount, + CAST(json_extract_scalar(payload, '$._bigImageCount') AS INT64) AS bigImageCount, + CAST(json_extract_scalar(payload, '$._image_total') AS INT64) AS imageBytes, + CAST(json_extract_scalar(json_extract_scalar(payload, '$._Dpi'), '$.dppx') AS FLOAT64) AS dpr, + CAST(json_extract_scalar(json_extract_scalar(payload, '$._Resolution'), '$.absolute.height') AS INT64) AS viewportHeight, + CAST(json_extract_scalar(json_extract_scalar(payload, '$._Resolution'), '$.absolute.width') AS INT64) AS viewportWidth + FROM + `httparchive.pages.2019_07_01_*` +-- LIMIT 1000 +) WHERE # it appears the _Images array is populated only from tag requests and not CSS or favicon # likewise the bigImageCount and smallImageCount only track images > 100,000 and < 10,000 respectively. # Meaning images between 10KB and 100KB won't show up in the count # https://github.com/WPO-Foundation/webpagetest/blob/master/www/breakdown.inc#L95 - cssPixels > 0 AND naturalPixels > 0 AND - (smallImageCount > 0 OR bigImageCount > 0) + cssPixels > 0 AND naturalPixels > 0 AND (smallImageCount > 0 OR bigImageCount > 0) GROUP BY client ORDER BY diff --git a/sql/2019/media/04_12.sql b/sql/2019/media/04_12.sql index 52c254284c6..7320c42decd 100644 --- a/sql/2019/media/04_12.sql +++ b/sql/2019/media/04_12.sql @@ -33,9 +33,9 @@ FROM ( type = 'image' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) GROUP BY client, diff --git a/sql/2019/media/04_12b.sql b/sql/2019/media/04_12b.sql index 45e69d55106..b2872e2e961 100644 --- a/sql/2019/media/04_12b.sql +++ b/sql/2019/media/04_12b.sql @@ -33,6 +33,7 @@ FROM ( type = 'image' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/media/04_13.sql b/sql/2019/media/04_13.sql index fa2e1eb6dac..c01337ae232 100644 --- a/sql/2019/media/04_13.sql +++ b/sql/2019/media/04_13.sql @@ -8,4 +8,5 @@ FROM ( SELECT JSON_EXTRACT_SCALAR(report, '$.audits.image-alt.score') = '1' AS pass FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/media/04_16.sql b/sql/2019/media/04_16.sql index 759463e7bb3..578f0b0ace8 100644 --- a/sql/2019/media/04_16.sql +++ b/sql/2019/media/04_16.sql @@ -7,10 +7,8 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.requests` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND type = 'video' AND diff --git a/sql/2019/media/04_17.sql b/sql/2019/media/04_17.sql index 2d06881e4d4..945b55f12f5 100644 --- a/sql/2019/media/04_17.sql +++ b/sql/2019/media/04_17.sql @@ -15,7 +15,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2019-07-01' AND - type = 'script'), + type = 'script' +), UNNEST([10, 25, 50, 75, 90]) AS percentile WHERE player IS NOT NULL diff --git a/sql/2019/media/04_19.sql b/sql/2019/media/04_19.sql index c11cff24ba8..bd430dd3d3f 100644 --- a/sql/2019/media/04_19.sql +++ b/sql/2019/media/04_19.sql @@ -14,10 +14,8 @@ FROM `httparchive.almanac.summary_response_bodies`, UNNEST(REGEXP_EXTRACT_ALL(body, '(?i)<(video[^>]*)')) AS video, UNNEST(REGEXP_EXTRACT_ALL(video, '(?i)(autoplay|autoPictureInPicture|buffered|controls|controlslist|crossorigin|use-credentials|currentTime|disablePictureInPicture|disableRemotePlayback|duration|height|intrinsicsize|loop|muted|playsinline|poster|preload|src|width)')) AS attr -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE date = '2019-07-01' AND firstHtml diff --git a/sql/2019/media/04_20.sql b/sql/2019/media/04_20.sql index 62bd2d36ac4..df299382925 100644 --- a/sql/2019/media/04_20.sql +++ b/sql/2019/media/04_20.sql @@ -24,7 +24,8 @@ FROM ( _TABLE_SUFFIX AS client, getMediaElements(payload) AS media_elements FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) WHERE 'audio' IN UNNEST(media_elements) OR 'video' IN UNNEST(media_elements) diff --git a/sql/2019/media/04_21.sql b/sql/2019/media/04_21.sql index 8604423556a..c9097d438ba 100644 --- a/sql/2019/media/04_21.sql +++ b/sql/2019/media/04_21.sql @@ -13,6 +13,7 @@ FROM ( JSON_EXTRACT_SCALAR(payload, "$['_heroElementTimes.Image']") IS NOT NULL AS has_hero_image, JSON_EXTRACT_SCALAR(payload, "$['_heroElementTimes.BackgroundImage']") IS NOT NULL AS has_hero_bgimage FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/media/04_25.sql b/sql/2019/media/04_25.sql index e5ef8a3d11d..91333eadd98 100644 --- a/sql/2019/media/04_25.sql +++ b/sql/2019/media/04_25.sql @@ -13,7 +13,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2019-07-01' AND - type = 'script') + type = 'script' +) WHERE framework IS NOT NULL GROUP BY diff --git a/sql/2019/mobile-web/12_04b.sql b/sql/2019/mobile-web/12_04b.sql index 8d092bb5c7d..51dbb22c06c 100755 --- a/sql/2019/mobile-web/12_04b.sql +++ b/sql/2019/mobile-web/12_04b.sql @@ -42,8 +42,7 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(DISTINCT url)) OVER (PARTITION BY viewport_info.directive), 2) AS perc_value_in_directive, ROUND(COUNT(0) * 100 / total_pages, 2) AS perc_in_all_pages FROM - `httparchive.pages.2019_07_01_mobile`, - (SELECT COUNT(0) AS total_pages FROM `httparchive.pages.2019_07_01_mobile`), + `httparchive.pages.2019_07_01_mobile`, (SELECT COUNT(0) AS total_pages FROM `httparchive.pages.2019_07_01_mobile`), UNNEST(getViewportDirectiveData(payload)) AS viewport_info GROUP BY total_pages, diff --git a/sql/2019/mobile-web/12_08.sql b/sql/2019/mobile-web/12_08.sql index 5bf6a673ef2..e2bafb1dcc5 100644 --- a/sql/2019/mobile-web/12_08.sql +++ b/sql/2019/mobile-web/12_08.sql @@ -8,11 +8,10 @@ SELECT COUNTIF(CAST(password_score AS NUMERIC) = 1) AS total_allowing, ROUND(COUNTIF(CAST(password_score AS NUMERIC) = 1) * 100 / COUNTIF(password_score IS NOT NULL), 2) AS perc_allowing -FROM - ( - SELECT - url, - JSON_EXTRACT_SCALAR(report, '$.audits.password-inputs-can-be-pasted-into.score') AS password_score - FROM - `httparchive.lighthouse.2019_07_01_mobile` - ) +FROM ( + SELECT + url, + JSON_EXTRACT_SCALAR(report, '$.audits.password-inputs-can-be-pasted-into.score') AS password_score + FROM + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/mobile-web/12_12.sql b/sql/2019/mobile-web/12_12.sql index 93150abed33..d408ad7abe5 100644 --- a/sql/2019/mobile-web/12_12.sql +++ b/sql/2019/mobile-web/12_12.sql @@ -21,8 +21,7 @@ SELECT total AS total_pages, ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pages_perc FROM - `httparchive.pages.2019_07_01_mobile`, - (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`), + `httparchive.pages.2019_07_01_mobile`, (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_mobile`), UNNEST(getInputTypes(payload)) AS input_type GROUP BY input_type, total ORDER BY occurence DESC diff --git a/sql/2019/mobile-web/12_14.sql b/sql/2019/mobile-web/12_14.sql index 4a33d6a1790..ba7cd619eff 100644 --- a/sql/2019/mobile-web/12_14.sql +++ b/sql/2019/mobile-web/12_14.sql @@ -46,8 +46,7 @@ SELECT ROUND(COUNT(DISTINCT url) * 100 / total_pages_with_inputs, 2) AS perc_of_pages_using FROM `httparchive.pages.2019_07_01_mobile`, - UNNEST(getInputAttributes(payload)) AS input_attributes, - (SELECT COUNTIF(hasInputs(payload)) AS total_pages_with_inputs FROM `httparchive.pages.2019_07_01_mobile`) + UNNEST(getInputAttributes(payload)) AS input_attributes, (SELECT COUNTIF(hasInputs(payload)) AS total_pages_with_inputs FROM `httparchive.pages.2019_07_01_mobile`) GROUP BY input_attributes, total_pages_with_inputs ORDER BY perc_of_pages_using DESC LIMIT 1000 diff --git a/sql/2019/mobile-web/12_17.sql b/sql/2019/mobile-web/12_17.sql index 322ed13fdf9..89152776c2d 100644 --- a/sql/2019/mobile-web/12_17.sql +++ b/sql/2019/mobile-web/12_17.sql @@ -16,6 +16,8 @@ FROM ( FROM `httparchive.summary_pages.2019_07_01_mobile` GROUP BY - bin)) + bin + ) +) ORDER BY bin diff --git a/sql/2019/mobile-web/12_20.sql b/sql/2019/mobile-web/12_20.sql index ed522b89b67..0d63ac87f4f 100644 --- a/sql/2019/mobile-web/12_20.sql +++ b/sql/2019/mobile-web/12_20.sql @@ -14,6 +14,7 @@ FROM ( device = 'phone' AND yyyymm = '201907' ORDER BY - small_cls DESC) + small_cls DESC +) WHERE MOD(row, 5229) = 0 diff --git a/sql/2019/performance/07_01.sql b/sql/2019/performance/07_01.sql index c4dbd229d05..06533015a03 100644 --- a/sql/2019/performance/07_01.sql +++ b/sql/2019/performance/07_01.sql @@ -17,6 +17,7 @@ FROM ( date = '2019-07-01' AND fast_fcp + avg_fcp + slow_fcp > 0 ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_01b.sql b/sql/2019/performance/07_01b.sql index 72be0f4e500..1d919f19a58 100644 --- a/sql/2019/performance/07_01b.sql +++ b/sql/2019/performance/07_01b.sql @@ -19,6 +19,7 @@ FROM ( fast_fcp + avg_fcp + slow_fcp > 0 AND device = 'desktop' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_01c.sql b/sql/2019/performance/07_01c.sql index 71356cfa0a4..6770011d868 100644 --- a/sql/2019/performance/07_01c.sql +++ b/sql/2019/performance/07_01c.sql @@ -19,6 +19,7 @@ FROM ( fast_fcp + avg_fcp + slow_fcp > 0 AND device = 'phone' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_02.sql b/sql/2019/performance/07_02.sql index b40c815eff6..a0ab7345092 100644 --- a/sql/2019/performance/07_02.sql +++ b/sql/2019/performance/07_02.sql @@ -17,6 +17,7 @@ FROM ( date = '2019-07-01' AND fast_fid + avg_fid + slow_fid > 0 ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_02b.sql b/sql/2019/performance/07_02b.sql index 2e9bb3853b9..9ed026abcba 100644 --- a/sql/2019/performance/07_02b.sql +++ b/sql/2019/performance/07_02b.sql @@ -19,6 +19,7 @@ FROM ( fast_fid + avg_fid + slow_fid > 0 AND device = 'desktop' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_02c.sql b/sql/2019/performance/07_02c.sql index 42224095f5e..7e292627410 100644 --- a/sql/2019/performance/07_02c.sql +++ b/sql/2019/performance/07_02c.sql @@ -19,6 +19,7 @@ FROM ( fast_fid + avg_fid + slow_fid > 0 AND device = 'phone' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_03.sql b/sql/2019/performance/07_03.sql index b38ec07c64f..9ddbef213f2 100644 --- a/sql/2019/performance/07_03.sql +++ b/sql/2019/performance/07_03.sql @@ -2,7 +2,7 @@ # 07_03: % fast FCP per PSI SELECT ROUND(COUNTIF(fast_fcp >= .75) * 100 / COUNT(0), 2) AS pct_fast_fcp, - ROUND(COUNTIF(NOT(slow_fcp >= .25) AND NOT(fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, + ROUND(COUNTIF(NOT (slow_fcp >= .25) AND NOT (fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, ROUND(COUNTIF(slow_fcp >= .25) * 100 / COUNT(0), 2) AS pct_slow_fcp FROM `chrome-ux-report.materialized.metrics_summary` diff --git a/sql/2019/performance/07_03b.sql b/sql/2019/performance/07_03b.sql index 22e06350587..4c3c05f643d 100644 --- a/sql/2019/performance/07_03b.sql +++ b/sql/2019/performance/07_03b.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fcp >= .75) * 100 / COUNT(0), 2) AS pct_fast_fcp, - ROUND(COUNTIF(NOT(slow_fcp >= .25) AND NOT(fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, + ROUND(COUNTIF(NOT (slow_fcp >= .25) AND NOT (fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, ROUND(COUNTIF(slow_fcp >= .25) * 100 / COUNT(0), 2) AS pct_slow_fcp FROM ( SELECT @@ -15,6 +15,7 @@ FROM ( `chrome-ux-report.materialized.device_summary` WHERE yyyymm = '201907' AND - device IN ('desktop', 'phone')) + device IN ('desktop', 'phone') +) GROUP BY device diff --git a/sql/2019/performance/07_03b2.sql b/sql/2019/performance/07_03b2.sql index c307d989494..48d956b7edc 100644 --- a/sql/2019/performance/07_03b2.sql +++ b/sql/2019/performance/07_03b2.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fcp >= .75) * 100 / COUNT(0), 2) AS pct_fast_fcp, - ROUND(COUNTIF(NOT(slow_fcp >= .25) AND NOT(fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, + ROUND(COUNTIF(NOT (slow_fcp >= .25) AND NOT (fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, ROUND(COUNTIF(slow_fcp >= .25) * 100 / COUNT(0), 2) AS pct_slow_fcp FROM ( SELECT @@ -16,7 +16,8 @@ FROM ( UNNEST(first_contentful_paint.histogram.bin) AS bin GROUP BY origin, - device) + device +) GROUP BY device ORDER BY diff --git a/sql/2019/performance/07_03c.sql b/sql/2019/performance/07_03c.sql index af98f266153..5307efe7cc4 100644 --- a/sql/2019/performance/07_03c.sql +++ b/sql/2019/performance/07_03c.sql @@ -3,7 +3,7 @@ SELECT speed, ROUND(COUNTIF(fast_fcp >= .75) * 100 / COUNT(0), 2) AS pct_fast_fcp, - ROUND(COUNTIF(NOT(slow_fcp >= .25) AND NOT(fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, + ROUND(COUNTIF(NOT (slow_fcp >= .25) AND NOT (fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, ROUND(COUNTIF(slow_fcp >= .25) * 100 / COUNT(0), 2) AS pct_slow_fcp FROM ( SELECT @@ -16,7 +16,8 @@ FROM ( UNNEST(first_contentful_paint.histogram.bin) AS bin GROUP BY origin, - speed) + speed +) GROUP BY speed ORDER BY diff --git a/sql/2019/performance/07_03d.sql b/sql/2019/performance/07_03d.sql index e08f501ff5f..d7444c46e53 100644 --- a/sql/2019/performance/07_03d.sql +++ b/sql/2019/performance/07_03d.sql @@ -482,7 +482,7 @@ SELECT geo, COUNT(0) AS websites, ROUND(COUNTIF(fast_fcp >= .75) * 100 / COUNT(0), 2) AS pct_fast_fcp, - ROUND(COUNTIF(NOT(slow_fcp >= .25) AND NOT(fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, + ROUND(COUNTIF(NOT (slow_fcp >= .25) AND NOT (fast_fcp >= .75)) * 100 / COUNT(0), 2) AS pct_avg_fcp, ROUND(COUNTIF(slow_fcp >= .25) * 100 / COUNT(0), 2) AS pct_slow_fcp FROM ( SELECT @@ -495,7 +495,8 @@ FROM ( UNNEST(first_contentful_paint.histogram.bin) AS bin GROUP BY origin, - geo) + geo +) GROUP BY geo ORDER BY diff --git a/sql/2019/performance/07_04.sql b/sql/2019/performance/07_04.sql index ececc47ecda..fff06d9c237 100644 --- a/sql/2019/performance/07_04.sql +++ b/sql/2019/performance/07_04.sql @@ -2,7 +2,7 @@ # 07_04: % fast FID per PSI SELECT ROUND(COUNTIF(fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast_fid, - ROUND(COUNTIF(NOT(slow_fid >= 0.05) AND NOT(fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, + ROUND(COUNTIF(NOT (slow_fid >= 0.05) AND NOT (fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, ROUND(COUNTIF(slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow_fid FROM `chrome-ux-report.materialized.metrics_summary` diff --git a/sql/2019/performance/07_04b.sql b/sql/2019/performance/07_04b.sql index 7021f7ed64c..ff90d0515f9 100644 --- a/sql/2019/performance/07_04b.sql +++ b/sql/2019/performance/07_04b.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast_fid, - ROUND(COUNTIF(NOT(slow_fid >= .05) AND NOT(fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, + ROUND(COUNTIF(NOT (slow_fid >= .05) AND NOT (fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, ROUND(COUNTIF(slow_fid >= .05) * 100 / COUNT(0), 2) AS pct_slow_fid FROM ( SELECT @@ -16,6 +16,7 @@ FROM ( WHERE yyyymm = '201907' AND fast_fid + avg_fid + slow_fid > 0 AND - device IN ('desktop', 'phone')) + device IN ('desktop', 'phone') +) GROUP BY device diff --git a/sql/2019/performance/07_04c.sql b/sql/2019/performance/07_04c.sql index 512f763e65a..aaba0d95664 100644 --- a/sql/2019/performance/07_04c.sql +++ b/sql/2019/performance/07_04c.sql @@ -3,7 +3,7 @@ SELECT speed, ROUND(COUNTIF(fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast_fid, - ROUND(COUNTIF(NOT(slow_fid >= .05) AND NOT(fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, + ROUND(COUNTIF(NOT (slow_fid >= .05) AND NOT (fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, ROUND(COUNTIF(slow_fid >= .05) * 100 / COUNT(0), 2) AS pct_slow_fid FROM ( SELECT @@ -16,7 +16,8 @@ FROM ( UNNEST(experimental.first_input_delay.histogram.bin) AS bin GROUP BY origin, - speed) + speed +) WHERE fast_fid + avg_fid + slow_fid > 0 GROUP BY diff --git a/sql/2019/performance/07_04d.sql b/sql/2019/performance/07_04d.sql index 26f17433d9e..2b682da735f 100644 --- a/sql/2019/performance/07_04d.sql +++ b/sql/2019/performance/07_04d.sql @@ -482,22 +482,21 @@ SELECT geo, COUNT(0) AS websites, ROUND(COUNTIF(fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast_fid, - ROUND(COUNTIF(NOT(slow_fid >= .05) AND NOT(fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, + ROUND(COUNTIF(NOT (slow_fid >= .05) AND NOT (fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg_fid, ROUND(COUNTIF(slow_fid >= .05) * 100 / COUNT(0), 2) AS pct_slow_fid -FROM - ( - SELECT - geo, - ROUND(SAFE_DIVIDE(SUM(IF(bin.start < 100, bin.density, 0)), SUM(bin.density)), 4) AS fast_fid, - ROUND(SAFE_DIVIDE(SUM(IF(bin.start >= 100 AND bin.start < 300, bin.density, 0)), SUM(bin.density)), 4) AS avg_fid, - ROUND(SAFE_DIVIDE(SUM(IF(bin.start >= 300, bin.density, 0)), SUM(bin.density)), 4) AS slow_fid - FROM - geos, - UNNEST(experimental.first_input_delay.histogram.bin) AS bin - GROUP BY - origin, - geo - ) +FROM ( + SELECT + geo, + ROUND(SAFE_DIVIDE(SUM(IF(bin.start < 100, bin.density, 0)), SUM(bin.density)), 4) AS fast_fid, + ROUND(SAFE_DIVIDE(SUM(IF(bin.start >= 100 AND bin.start < 300, bin.density, 0)), SUM(bin.density)), 4) AS avg_fid, + ROUND(SAFE_DIVIDE(SUM(IF(bin.start >= 300, bin.density, 0)), SUM(bin.density)), 4) AS slow_fid + FROM + geos, + UNNEST(experimental.first_input_delay.histogram.bin) AS bin + GROUP BY + origin, + geo +) GROUP BY geo ORDER BY diff --git a/sql/2019/performance/07_05.sql b/sql/2019/performance/07_05.sql index a30c507bc89..cbcfcda883c 100644 --- a/sql/2019/performance/07_05.sql +++ b/sql/2019/performance/07_05.sql @@ -2,7 +2,7 @@ # 07_05: % fast FCP+FID per PSI SELECT ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM `chrome-ux-report.materialized.metrics_summary` diff --git a/sql/2019/performance/07_05b.sql b/sql/2019/performance/07_05b.sql index 321710933aa..a41a2dad054 100644 --- a/sql/2019/performance/07_05b.sql +++ b/sql/2019/performance/07_05b.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM ( SELECT @@ -19,6 +19,7 @@ FROM ( WHERE yyyymm = '201907' AND fast_fid + avg_fid + slow_fid > 0 AND - device IN ('desktop', 'phone')) + device IN ('desktop', 'phone') +) GROUP BY device diff --git a/sql/2019/performance/07_05c.sql b/sql/2019/performance/07_05c.sql index c9e94560f2d..bfe5ffabcdf 100644 --- a/sql/2019/performance/07_05c.sql +++ b/sql/2019/performance/07_05c.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM ( SELECT @@ -19,6 +19,7 @@ FROM ( WHERE yyyymm = '201907' AND fast_fid + avg_fid + slow_fid > 0 AND - device IN ('desktop', 'phone')) + device IN ('desktop', 'phone') +) GROUP BY device diff --git a/sql/2019/performance/07_05d.sql b/sql/2019/performance/07_05d.sql index 937be52469e..5b681b4959e 100644 --- a/sql/2019/performance/07_05d.sql +++ b/sql/2019/performance/07_05d.sql @@ -482,7 +482,7 @@ SELECT geo, COUNT(0) AS websites, ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM ( SELECT @@ -499,7 +499,8 @@ FROM ( UNNEST(experimental.first_input_delay.histogram.fid) AS fid GROUP BY origin, - geo) + geo +) WHERE fast_fid + avg_fid + slow_fid > 0 GROUP BY diff --git a/sql/2019/performance/07_07.sql b/sql/2019/performance/07_07.sql index 578400b218b..ea893cfb105 100644 --- a/sql/2019/performance/07_07.sql +++ b/sql/2019/performance/07_07.sql @@ -17,6 +17,7 @@ FROM ( date = '2019-07-01' AND fast_ttfb + avg_ttfb + slow_ttfb > 0 ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_07b.sql b/sql/2019/performance/07_07b.sql index 43425c922d5..4a2da6667c1 100644 --- a/sql/2019/performance/07_07b.sql +++ b/sql/2019/performance/07_07b.sql @@ -19,6 +19,7 @@ FROM ( fast_ttfb + avg_ttfb + slow_ttfb > 0 AND device = 'desktop' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_07c.sql b/sql/2019/performance/07_07c.sql index c10d22a0fe3..87a644376b8 100644 --- a/sql/2019/performance/07_07c.sql +++ b/sql/2019/performance/07_07c.sql @@ -19,6 +19,7 @@ FROM ( fast_ttfb + avg_ttfb + slow_ttfb > 0 AND device = 'phone' ORDER BY - fast DESC) + fast DESC +) WHERE MOD(row, CAST(FLOOR(n / 1000) AS INT64)) = 0 diff --git a/sql/2019/performance/07_08.sql b/sql/2019/performance/07_08.sql index 73d23cee830..a6413884713 100644 --- a/sql/2019/performance/07_08.sql +++ b/sql/2019/performance/07_08.sql @@ -2,7 +2,7 @@ # 07_08: % fast TTFB using FCP-like thresholds SELECT ROUND(COUNTIF(fast_ttfb >= .75) * 100 / COUNT(0), 2) AS pct_fast_ttfb, - ROUND(COUNTIF(NOT(slow_ttfb >= .25) AND NOT(fast_ttfb >= .75)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, + ROUND(COUNTIF(NOT (slow_ttfb >= .25) AND NOT (fast_ttfb >= .75)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, ROUND(COUNTIF(slow_ttfb >= .25) * 100 / COUNT(0), 2) AS pct_slow_ttfb FROM `chrome-ux-report.materialized.metrics_summary` diff --git a/sql/2019/performance/07_08b.sql b/sql/2019/performance/07_08b.sql index 2ca823f1fc2..220e73d4687 100644 --- a/sql/2019/performance/07_08b.sql +++ b/sql/2019/performance/07_08b.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_ttfb >= .9) * 100 / COUNT(0), 2) AS pct_fast_ttfb, - ROUND(COUNTIF(NOT(slow_ttfb >= .1) AND NOT(fast_ttfb >= .9)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, + ROUND(COUNTIF(NOT (slow_ttfb >= .1) AND NOT (fast_ttfb >= .9)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, ROUND(COUNTIF(slow_ttfb >= .1) * 100 / COUNT(0), 2) AS pct_slow_ttfb FROM ( SELECT @@ -16,6 +16,7 @@ FROM ( WHERE yyyymm = '201907' AND fast_ttfb + avg_ttfb + slow_ttfb > 0 AND - device IN ('desktop', 'phone')) + device IN ('desktop', 'phone') +) GROUP BY device diff --git a/sql/2019/performance/07_08c.sql b/sql/2019/performance/07_08c.sql index c72b97a3f52..f03e7580558 100644 --- a/sql/2019/performance/07_08c.sql +++ b/sql/2019/performance/07_08c.sql @@ -3,7 +3,7 @@ SELECT speed, ROUND(COUNTIF(fast_ttfb >= .9) * 100 / COUNT(0), 2) AS pct_fast_ttfb, - ROUND(COUNTIF(NOT(slow_ttfb >= .1) AND NOT(fast_ttfb >= .9)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, + ROUND(COUNTIF(NOT (slow_ttfb >= .1) AND NOT (fast_ttfb >= .9)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, ROUND(COUNTIF(slow_ttfb >= .1) * 100 / COUNT(0), 2) AS pct_slow_ttfb FROM ( SELECT @@ -16,7 +16,8 @@ FROM ( UNNEST(experimental.time_to_first_byte.histogram.bin) AS bin GROUP BY origin, - speed) + speed +) GROUP BY speed ORDER BY diff --git a/sql/2019/performance/07_08d.sql b/sql/2019/performance/07_08d.sql index 1d07a65d33e..2d7594fab55 100644 --- a/sql/2019/performance/07_08d.sql +++ b/sql/2019/performance/07_08d.sql @@ -482,7 +482,7 @@ SELECT geo, COUNT(0) AS websites, ROUND(COUNTIF(fast_ttfb >= .75) * 100 / COUNT(0), 2) AS pct_fast_ttfb, - ROUND(COUNTIF(NOT(slow_ttfb >= .25) AND NOT(fast_ttfb >= .75)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, + ROUND(COUNTIF(NOT (slow_ttfb >= .25) AND NOT (fast_ttfb >= .75)) * 100 / COUNT(0), 2) AS pct_avg_ttfb, ROUND(COUNTIF(slow_ttfb >= .25) * 100 / COUNT(0), 2) AS pct_slow_ttfb FROM ( SELECT @@ -495,7 +495,8 @@ FROM ( UNNEST(experimental.time_to_first_byte.histogram.bin) AS bin GROUP BY origin, - geo) + geo +) GROUP BY geo ORDER BY diff --git a/sql/2019/performance/07_10.sql b/sql/2019/performance/07_10.sql index 614135c8b35..d0da85c37ca 100644 --- a/sql/2019/performance/07_10.sql +++ b/sql/2019/performance/07_10.sql @@ -11,7 +11,8 @@ FROM ( CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.FirstPaintedHero']") AS INT64) AS first_painted_hero, CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.LastPaintedHero']") AS INT64) AS last_painted_hero FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_11.sql b/sql/2019/performance/07_11.sql index 6681952bb43..9c3f7460da8 100644 --- a/sql/2019/performance/07_11.sql +++ b/sql/2019/performance/07_11.sql @@ -9,7 +9,8 @@ FROM ( _TABLE_SUFFIX AS client, CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.Heading']") AS INT64) AS h1_rendered FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_12.sql b/sql/2019/performance/07_12.sql index 7bce12acbd2..e7476ae3f93 100644 --- a/sql/2019/performance/07_12.sql +++ b/sql/2019/performance/07_12.sql @@ -9,7 +9,8 @@ FROM ( _TABLE_SUFFIX AS client, CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.Image']") AS INT64) AS largest_image FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_13.sql b/sql/2019/performance/07_13.sql index ec7910ba41d..d0409ead683 100644 --- a/sql/2019/performance/07_13.sql +++ b/sql/2019/performance/07_13.sql @@ -4,14 +4,13 @@ SELECT percentile, client, ROUND(APPROX_QUANTILES(largest_bg_image, 1000)[OFFSET(percentile * 10)] / 1000, 2) AS largest_bg_image -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.BackgroundImage']") AS INT64) AS largest_bg_image - FROM - `httparchive.pages.2019_07_01_*` - ), +FROM ( + SELECT + _TABLE_SUFFIX AS client, + CAST(JSON_EXTRACT(payload, "$['_heroElementTimes.BackgroundImage']") AS INT64) AS largest_bg_image + FROM + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_15.sql b/sql/2019/performance/07_15.sql index c719e9668ff..6afac20a7a4 100644 --- a/sql/2019/performance/07_15.sql +++ b/sql/2019/performance/07_15.sql @@ -8,7 +8,8 @@ FROM ( SELECT CAST(IFNULL(JSON_EXTRACT(report, '$.audits.first-interactive.numericValue'), JSON_EXTRACT(report, '$.audits.first-cpu-idle.numericValue')) AS FLOAT64) AS first_cpu_idle FROM - `httparchive.lighthouse.2019_07_01_mobile`), + `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/performance/07_16.sql b/sql/2019/performance/07_16.sql index df9abe289f6..7281fc4c7ca 100644 --- a/sql/2019/performance/07_16.sql +++ b/sql/2019/performance/07_16.sql @@ -8,7 +8,8 @@ FROM ( SELECT CAST(IFNULL(JSON_EXTRACT(report, '$.audits.consistently-interactive.numericValue'), JSON_EXTRACT(report, '$.audits.interactive.numericValue')) AS FLOAT64) AS tti FROM - `httparchive.lighthouse.2019_07_01_mobile`), + `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/performance/07_17.sql b/sql/2019/performance/07_17.sql index 8adbd05c03a..d79d3d1e6f7 100644 --- a/sql/2019/performance/07_17.sql +++ b/sql/2019/performance/07_17.sql @@ -25,7 +25,8 @@ SELECT ROUND(APPROX_QUANTILES(render_blocking_css.wasted_ms, 1000)[OFFSET(percentile * 10)] / 1000, 2) AS wasted_sec FROM ( SELECT renderBlockingCSS(report) AS render_blocking_css - FROM `httparchive.lighthouse.2019_07_01_mobile`), + FROM `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/performance/07_18.sql b/sql/2019/performance/07_18.sql index a360c09706c..3fe27dd3675 100644 --- a/sql/2019/performance/07_18.sql +++ b/sql/2019/performance/07_18.sql @@ -25,7 +25,8 @@ SELECT ROUND(APPROX_QUANTILES(render_blocking_js.wasted_ms, 1000)[OFFSET(percentile * 10)] / 1000, 2) AS wasted_sec FROM ( SELECT renderBlockingJS(report) AS render_blocking_js - FROM `httparchive.lighthouse.2019_07_01_mobile`), + FROM `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/performance/07_20.sql b/sql/2019/performance/07_20.sql index e571ccde596..e6d4d7e76b9 100644 --- a/sql/2019/performance/07_20.sql +++ b/sql/2019/performance/07_20.sql @@ -7,8 +7,7 @@ SELECT ROUND(APPROX_QUANTILES(script_cpu_time, 1000)[OFFSET(percentile * 10)] / 1000, 2) AS script_cpu_time FROM ( SELECT - _TABLE_SUFFIX AS client, - ( + _TABLE_SUFFIX AS client, ( CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.EvaluateScript']"), '0') AS INT64) + CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.XHRLoad']"), '0') AS INT64) + CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.XHRReadyStateChange']"), '0') AS INT64) + @@ -21,7 +20,8 @@ FROM ( CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.MajorGC']"), '0') AS INT64) ) AS script_cpu_time FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_21.sql b/sql/2019/performance/07_21.sql index 6db34c2b1c3..07182d2a4ad 100644 --- a/sql/2019/performance/07_21.sql +++ b/sql/2019/performance/07_21.sql @@ -7,14 +7,14 @@ SELECT ROUND(APPROX_QUANTILES(layout_cpu_time, 1000)[OFFSET(percentile * 10)] / 1000, 2) AS layout_cpu_time FROM ( SELECT - _TABLE_SUFFIX AS client, - ( + _TABLE_SUFFIX AS client, ( CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.ParseAuthorStyleSheet']"), '0') AS INT64) + CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.Layout']"), '0') AS INT64) + CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.UpdateLayoutTree']"), '0') AS INT64) ) AS layout_cpu_time FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_22.sql b/sql/2019/performance/07_22.sql index e648990594c..37a44d2ab00 100644 --- a/sql/2019/performance/07_22.sql +++ b/sql/2019/performance/07_22.sql @@ -7,13 +7,13 @@ SELECT APPROX_QUANTILES(paint_cpu_time, 1000)[OFFSET(percentile * 10)] AS paint_cpu_time FROM ( SELECT - _TABLE_SUFFIX AS client, - ( + _TABLE_SUFFIX AS client, ( CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.Paint']"), '0') AS INT64) + CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.UpdateLayerTree']"), '0') AS INT64) ) AS paint_cpu_time FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_23.sql b/sql/2019/performance/07_23.sql index eaf71c44ba0..94ee30b5cba 100644 --- a/sql/2019/performance/07_23.sql +++ b/sql/2019/performance/07_23.sql @@ -7,12 +7,12 @@ SELECT APPROX_QUANTILES(loading_cpu_time, 1000)[OFFSET(100)] AS loading_cpu_time FROM ( SELECT - _TABLE_SUFFIX AS client, - ( + _TABLE_SUFFIX AS client, ( CAST(IFNULL(JSON_EXTRACT(payload, "$['_cpu.ParseHTML']"), '0') AS INT64) ) AS loading_cpu_time FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/performance/07_24.sql b/sql/2019/performance/07_24.sql index b876bdb0c14..dc5fe800f53 100644 --- a/sql/2019/performance/07_24.sql +++ b/sql/2019/performance/07_24.sql @@ -8,7 +8,8 @@ FROM ( SELECT CAST(JSON_EXTRACT(report, '$.categories.performance.score') AS NUMERIC) AS score FROM - `httparchive.lighthouse.2019_07_01_mobile`), + `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile diff --git a/sql/2019/pwa/11_02.sql b/sql/2019/pwa/11_02.sql index f81343bdf92..5a81adb062d 100644 --- a/sql/2019/pwa/11_02.sql +++ b/sql/2019/pwa/11_02.sql @@ -8,4 +8,5 @@ FROM ( SELECT JSON_EXTRACT_SCALAR(report, '$.audits.installable-manifest.score') = '1' AS manifest FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/pwa/11_03.sql b/sql/2019/pwa/11_03.sql index 1e5a31d48e8..e8f85543b61 100644 --- a/sql/2019/pwa/11_03.sql +++ b/sql/2019/pwa/11_03.sql @@ -8,8 +8,7 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.service_workers` -JOIN - (SELECT client, COUNT(DISTINCT page) AS total FROM `httparchive.almanac.service_workers` GROUP BY client) +JOIN (SELECT client, COUNT(DISTINCT page) AS total FROM `httparchive.almanac.service_workers` GROUP BY client) USING (client), UNNEST(ARRAY_CONCAT( REGEXP_EXTRACT_ALL(body, r'\.on(install|activate|fetch|push|notificationclick|notificationclose|sync|canmakepayment|paymentrequest|message|messageerror)\s*='), diff --git a/sql/2019/pwa/11_04.sql b/sql/2019/pwa/11_04.sql index 846e46f683a..ddac9706365 100644 --- a/sql/2019/pwa/11_04.sql +++ b/sql/2019/pwa/11_04.sql @@ -24,7 +24,8 @@ FROM ( FROM `httparchive.almanac.manifests` WHERE - date = '2019-07-01'), + date = '2019-07-01' +), UNNEST(properties) AS property GROUP BY client, diff --git a/sql/2019/pwa/11_05.sql b/sql/2019/pwa/11_05.sql index 5ab4488ff88..cf8b24f0592 100644 --- a/sql/2019/pwa/11_05.sql +++ b/sql/2019/pwa/11_05.sql @@ -7,8 +7,7 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.service_workers` sw -JOIN - (SELECT date, client, COUNT(DISTINCT page) AS total FROM `httparchive.almanac.service_workers` WHERE date = '2019-07-01' GROUP BY client) +JOIN (SELECT date, client, COUNT(DISTINCT page) AS total FROM `httparchive.almanac.service_workers` WHERE date = '2019-07-01' GROUP BY client) USING (date, client), UNNEST(REGEXP_EXTRACT_ALL(body, r'new Workbox|new workbox|workbox\.precaching\.|workbox\.strategies\.')) AS occurrence WHERE diff --git a/sql/2019/pwa/11_06.sql b/sql/2019/pwa/11_06.sql index 53b939eb446..c23b96ac902 100644 --- a/sql/2019/pwa/11_06.sql +++ b/sql/2019/pwa/11_06.sql @@ -7,13 +7,11 @@ SELECT ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct FROM `httparchive.almanac.summary_response_bodies` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client), UNNEST(REGEXP_EXTRACT_ALL(body, 'beforeinstallprompt')) WHERE - date = '2019-07-01' AND - ( + date = '2019-07-01' AND ( firstHtml OR type = 'script' ) diff --git a/sql/2019/resource-hints/19_01.sql b/sql/2019/resource-hints/19_01.sql index 29f6020c782..f8cdefe83bb 100644 --- a/sql/2019/resource-hints/19_01.sql +++ b/sql/2019/resource-hints/19_01.sql @@ -36,6 +36,7 @@ FROM ( _TABLE_SUFFIX AS client, getResourceHints(payload) AS hints FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/resource-hints/19_02.sql b/sql/2019/resource-hints/19_02.sql index 43de97fec8c..1852ec58de3 100644 --- a/sql/2019/resource-hints/19_02.sql +++ b/sql/2019/resource-hints/19_02.sql @@ -38,6 +38,7 @@ FROM ( _TABLE_SUFFIX AS client, getResourceHints(payload) AS hints FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/resource-hints/19_04.sql b/sql/2019/resource-hints/19_04.sql index bee87b0bd60..5bc85db0766 100644 --- a/sql/2019/resource-hints/19_04.sql +++ b/sql/2019/resource-hints/19_04.sql @@ -34,13 +34,14 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, name), 2) AS pct FROM ( SELECT _TABLE_SUFFIX, url AS page, hint.name, hint.href AS url - FROM `httparchive.pages.2019_07_01_*`, UNNEST(getResourceHints(payload)) AS hint) + FROM `httparchive.pages.2019_07_01_*`, UNNEST(getResourceHints(payload)) AS hint +) LEFT JOIN ( SELECT client AS _TABLE_SUFFIX, page, url, type FROM `httparchive.almanac.summary_requests` - WHERE date = '2019-07-01') -USING - (_TABLE_SUFFIX, page, url) + WHERE date = '2019-07-01' +) +USING (_TABLE_SUFFIX, page, url) GROUP BY client, name, diff --git a/sql/2019/resource-hints/19_05.sql b/sql/2019/resource-hints/19_05.sql index bc5c157512a..05b7da543b2 100644 --- a/sql/2019/resource-hints/19_05.sql +++ b/sql/2019/resource-hints/19_05.sql @@ -35,13 +35,14 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX, name), 2) AS pct FROM ( SELECT _TABLE_SUFFIX, url AS page, hint.name, hint.href AS url - FROM `httparchive.pages.2019_07_01_*`, UNNEST(getResourceHints(payload)) AS hint) + FROM `httparchive.pages.2019_07_01_*`, UNNEST(getResourceHints(payload)) AS hint +) LEFT JOIN ( SELECT client AS _TABLE_SUFFIX, page, url, type FROM `httparchive.almanac.summary_requests` - WHERE date = '2019-07-01') -USING - (_TABLE_SUFFIX, page, url) + WHERE date = '2019-07-01' +) +USING (_TABLE_SUFFIX, page, url) GROUP BY client, name, diff --git a/sql/2019/resource-hints/19_07.sql b/sql/2019/resource-hints/19_07.sql index 1847765f377..a7573962eb7 100644 --- a/sql/2019/resource-hints/19_07.sql +++ b/sql/2019/resource-hints/19_07.sql @@ -21,6 +21,7 @@ FROM ( _TABLE_SUFFIX AS client, getPriorityHints(payload) AS has_hint FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/security/08_01.sql b/sql/2019/security/08_01.sql index a5f72e37de0..dd731241fa0 100644 --- a/sql/2019/security/08_01.sql +++ b/sql/2019/security/08_01.sql @@ -6,8 +6,7 @@ SELECT COUNT(0) AS freq, SUM(COUNT(0)) OVER (PARTITION BY client) AS total, ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct -FROM - (SELECT _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tls_version FROM `httparchive.requests.2019_07_01_*`) +FROM (SELECT _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._tls_version') AS tls_version FROM `httparchive.requests.2019_07_01_*`) WHERE tls_version IS NOT NULL GROUP BY diff --git a/sql/2019/security/08_02.sql b/sql/2019/security/08_02.sql index 8e82d8b2bbe..531459a6b09 100644 --- a/sql/2019/security/08_02.sql +++ b/sql/2019/security/08_02.sql @@ -6,8 +6,7 @@ SELECT COUNT(0) AS freq, SUM(COUNT(0)) OVER (PARTITION BY client) AS total, ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct -FROM - (SELECT _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._securityDetails.issuer') AS issuer FROM `httparchive.requests.2019_07_01_*`) +FROM (SELECT _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._securityDetails.issuer') AS issuer FROM `httparchive.requests.2019_07_01_*`) WHERE issuer IS NOT NULL GROUP BY diff --git a/sql/2019/security/08_03-04.sql b/sql/2019/security/08_03-04.sql index 25b2c690d6a..b14845202f1 100644 --- a/sql/2019/security/08_03-04.sql +++ b/sql/2019/security/08_03-04.sql @@ -14,11 +14,13 @@ SELECT FROM ( SELECT client, - COUNTIF(IF(tls13, + COUNTIF(IF( + tls13, getHexCert(cert) LIKE '%2a8648ce3d0201%', REGEXP_CONTAINS(key_exchange, r'ECDSA') )) AS is_ecdsa, - COUNTIF(IF(tls13, + COUNTIF(IF( + tls13, getHexCert(cert) LIKE '%2a864886f70d010101%', REGEXP_CONTAINS(key_exchange, r'RSA') )) AS is_rsa, @@ -30,8 +32,10 @@ FROM ( JSON_EXTRACT(payload, '$._securityDetails.keyExchange') AS key_exchange, JSON_EXTRACT_SCALAR(payload, '$._securityDetails.protocol') = 'TLS 1.3' AS tls13 FROM - `httparchive.requests.2019_07_01_*`) + `httparchive.requests.2019_07_01_*` + ) WHERE cert IS NOT NULL GROUP BY - client) + client +) diff --git a/sql/2019/security/08_06.sql b/sql/2019/security/08_06.sql index c2f06c07dc7..ba0db6dff84 100644 --- a/sql/2019/security/08_06.sql +++ b/sql/2019/security/08_06.sql @@ -11,7 +11,8 @@ FROM ( JSON_EXTRACT(payload, '$._securityDetails.keyExchange') AS key_exchange, JSON_EXTRACT_SCALAR(payload, '$._securityDetails.protocol') AS protocol FROM - `httparchive.requests.2019_07_01_*`) + `httparchive.requests.2019_07_01_*` +) WHERE protocol IS NOT NULL GROUP BY diff --git a/sql/2019/security/08_07.sql b/sql/2019/security/08_07.sql index 54b7a3dd6b4..408edcd1246 100644 --- a/sql/2019/security/08_07.sql +++ b/sql/2019/security/08_07.sql @@ -10,7 +10,8 @@ FROM ( _TABLE_SUFFIX, JSON_EXTRACT(payload, '$._securityDetails.cipher') AS cipher FROM - `httparchive.requests.2019_07_01_*`) + `httparchive.requests.2019_07_01_*` +) WHERE cipher IS NOT NULL GROUP BY diff --git a/sql/2019/security/08_08.sql b/sql/2019/security/08_08.sql index fd5d60cbcf6..db9ef06e296 100644 --- a/sql/2019/security/08_08.sql +++ b/sql/2019/security/08_08.sql @@ -20,9 +20,11 @@ # 0xD002 TLS_ECDHE_PSK_WITH_AES_256_GCM_SHA384 # 0xD005 TLS_ECDHE_PSK_WITH_AES_128_CCM_SHA256 CREATE TEMPORARY FUNCTION isModern(cipher STRING) RETURNS BOOLEAN AS ( - cipher IN ('1301', '1302', '1303', '1304', '1305', + cipher IN ( + '1301', '1302', '1303', '1304', '1305', 'C02B', 'C02C', 'C02F', 'C030', 'CCA8', 'CCA9', - 'CCAC', 'D001', 'D002', 'D005') + 'CCAC', 'D001', 'D002', 'D005' + ) ); SELECT @@ -40,4 +42,5 @@ FROM ( WHERE JSON_EXTRACT(payload, '$._securityDetails') IS NOT NULL GROUP BY - client) + client +) diff --git a/sql/2019/security/08_13.sql b/sql/2019/security/08_13.sql index 015e468fd16..b5a6983d186 100644 --- a/sql/2019/security/08_13.sql +++ b/sql/2019/security/08_13.sql @@ -20,6 +20,7 @@ FROM ( status = 200 GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2019/security/08_14.sql b/sql/2019/security/08_14.sql index 3681dbb016b..b327d99eeda 100644 --- a/sql/2019/security/08_14.sql +++ b/sql/2019/security/08_14.sql @@ -22,4 +22,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_15-16.sql b/sql/2019/security/08_15-16.sql index 615b777595f..e02799c8c0d 100644 --- a/sql/2019/security/08_15-16.sql +++ b/sql/2019/security/08_15-16.sql @@ -22,4 +22,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_17.sql b/sql/2019/security/08_17.sql index 5de689c6434..929e84d8c18 100644 --- a/sql/2019/security/08_17.sql +++ b/sql/2019/security/08_17.sql @@ -28,4 +28,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_18.sql b/sql/2019/security/08_18.sql index c0739750fec..32d9950563f 100644 --- a/sql/2019/security/08_18.sql +++ b/sql/2019/security/08_18.sql @@ -28,4 +28,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_19.sql b/sql/2019/security/08_19.sql index 5cf70a0c073..c761ddbe430 100644 --- a/sql/2019/security/08_19.sql +++ b/sql/2019/security/08_19.sql @@ -25,4 +25,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_20.sql b/sql/2019/security/08_20.sql index 408cc5f28fb..e5053c9d239 100644 --- a/sql/2019/security/08_20.sql +++ b/sql/2019/security/08_20.sql @@ -22,4 +22,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_21.sql b/sql/2019/security/08_21.sql index bd5fca28023..b641e8523e5 100644 --- a/sql/2019/security/08_21.sql +++ b/sql/2019/security/08_21.sql @@ -19,4 +19,5 @@ FROM ( date = '2019-07-01' AND firstHtml GROUP BY - client) + client +) diff --git a/sql/2019/security/08_22.sql b/sql/2019/security/08_22.sql index 331313b507e..f3785a7dd66 100644 --- a/sql/2019/security/08_22.sql +++ b/sql/2019/security/08_22.sql @@ -11,7 +11,8 @@ FROM ( FROM `httparchive.summary_requests.2019_07_01_*` WHERE - firstHtml), + firstHtml +), UNNEST([10, 25, 50, 75, 90]) AS percentile WHERE max_age IS NOT NULL diff --git a/sql/2019/security/08_23-24.sql b/sql/2019/security/08_23-24.sql index 14d879f54ed..3774160694a 100644 --- a/sql/2019/security/08_23-24.sql +++ b/sql/2019/security/08_23-24.sql @@ -9,8 +9,7 @@ SELECT FROM `httparchive.summary_requests.2019_07_01_*`, UNNEST(REGEXP_EXTRACT_ALL(REGEXP_EXTRACT(respOtherHeaders, r'(?i)\W?strict-transport-security =([^,]+)'), '(max-age|includeSubDomains|preload)')) AS directive -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE firstHtml diff --git a/sql/2019/security/08_23-24b.sql b/sql/2019/security/08_23-24b.sql index d4650ccf184..5fe94e3f5d2 100644 --- a/sql/2019/security/08_23-24b.sql +++ b/sql/2019/security/08_23-24b.sql @@ -5,10 +5,8 @@ SELECT COUNT(0) AS freq, total, ROUND(COUNT(0) * 100 / total, 2) AS pct -FROM - (SELECT _TABLE_SUFFIX, REGEXP_EXTRACT(respOtherHeaders, r'(?i)\W?strict-transport-security =([^,]+)') AS hsts FROM `httparchive.summary_requests.2019_07_01_*` WHERE firstHtml) -JOIN - (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +FROM (SELECT _TABLE_SUFFIX, REGEXP_EXTRACT(respOtherHeaders, r'(?i)\W?strict-transport-security =([^,]+)') AS hsts FROM `httparchive.summary_requests.2019_07_01_*` WHERE firstHtml) +JOIN (SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (_TABLE_SUFFIX) WHERE hsts IS NOT NULL AND diff --git a/sql/2019/security/08_25-34.sql b/sql/2019/security/08_25-34.sql index bd6bbcf16b7..f5deabc906e 100644 --- a/sql/2019/security/08_25-34.sql +++ b/sql/2019/security/08_25-34.sql @@ -8,18 +8,21 @@ SELECT ROUND(COUNTIF(REGEXP_CONTAINS(respOtherHeaders, CONCAT('(?i)', header, ' ='))) * 100 / total, 2) AS pct FROM `httparchive.summary_requests.2019_07_01_*`, - UNNEST(['nel', 'report-to', 'referrer-policy', + UNNEST([ + 'nel', 'report-to', 'referrer-policy', 'feature-policy', 'x-content-type-options', 'x-xss-protection', 'x-frame-options', 'cross-origin-resource-policy', 'cross-origin-opener-policy', 'sec-fetch-(dest|mode|site|user)', 'strict-transport-security', - 'content-security-policy']) + 'content-security-policy' + ]) JOIN ( SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` - GROUP BY _TABLE_SUFFIX) + GROUP BY _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE firstHtml diff --git a/sql/2019/security/08_35-37.sql b/sql/2019/security/08_35-37.sql index 70ac5622049..8361f9d0687 100644 --- a/sql/2019/security/08_35-37.sql +++ b/sql/2019/security/08_35-37.sql @@ -1,14 +1,17 @@ #standardSQL - # 08_35-37: Groupings of availably parsed values by percentage by client - # Mostly dynamic, but then removed all of the disinct values at "=" unless - # samesite rule is involved +# 08_35-37: Groupings of availably parsed values by percentage by client +# Mostly dynamic, but then removed all of the disinct values at "=" unless +# samesite rule is involved SELECT client, - TRIM(SUBSTR(LOWER(policy), 1, + TRIM(SUBSTR( + LOWER(policy), 1, CASE - WHEN (STRPOS(LOWER(policy), 'samesite=strict') > 1 OR - STRPOS(LOWER(policy), 'samesite=lax') > 0 OR - STRPOS(LOWER(policy), 'samesite=none') > 1) + WHEN ( + STRPOS(LOWER(policy), 'samesite=strict') > 1 OR + STRPOS(LOWER(policy), 'samesite=lax') > 0 OR + STRPOS(LOWER(policy), 'samesite=none') > 1 + ) THEN LENGTH(policy) WHEN STRPOS(policy, '=') > 1 THEN STRPOS(LOWER(policy), '=') - 1 diff --git a/sql/2019/security/08_35.sql b/sql/2019/security/08_35.sql index 2947dd569df..28430ebe4c9 100644 --- a/sql/2019/security/08_35.sql +++ b/sql/2019/security/08_35.sql @@ -22,8 +22,7 @@ SELECT FROM `httparchive.almanac.requests`, UNNEST(SPLIT(extractHeader(payload, 'Set-Cookie'), ';')) AS directive -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/security/08_36.sql b/sql/2019/security/08_36.sql index 61779ec2158..890d19c362b 100644 --- a/sql/2019/security/08_36.sql +++ b/sql/2019/security/08_36.sql @@ -22,8 +22,7 @@ SELECT FROM `httparchive.almanac.requests`, UNNEST(SPLIT(extractHeader(payload, 'Set-Cookie'), ';')) AS directive -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/security/08_37.sql b/sql/2019/security/08_37.sql index ce2d229cd82..94311b1e189 100644 --- a/sql/2019/security/08_37.sql +++ b/sql/2019/security/08_37.sql @@ -22,8 +22,7 @@ SELECT FROM `httparchive.almanac.requests`, UNNEST(SPLIT(extractHeader(payload, 'Set-Cookie'), ';')) AS directive -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/security/08_38.sql b/sql/2019/security/08_38.sql index 9417f5ea254..2a2ba8fee47 100644 --- a/sql/2019/security/08_38.sql +++ b/sql/2019/security/08_38.sql @@ -24,8 +24,7 @@ FROM `httparchive.almanac.requests`, UNNEST(SPLIT(extractHeader(payload, 'Set-Cookie'), ';')) AS directive, UNNEST(REGEXP_EXTRACT_ALL(directive, '(__Host-|__Secure-)')) AS prefix -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX) USING (client) WHERE date = '2019-07-01' AND diff --git a/sql/2019/security/08_39b.sql b/sql/2019/security/08_39b.sql index b24aa67e9fe..510be49311c 100644 --- a/sql/2019/security/08_39b.sql +++ b/sql/2019/security/08_39b.sql @@ -27,6 +27,7 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client diff --git a/sql/2019/seo/10_01.sql b/sql/2019/seo/10_01.sql index 05c3f510578..4ec2c8966b9 100644 --- a/sql/2019/seo/10_01.sql +++ b/sql/2019/seo/10_01.sql @@ -29,6 +29,7 @@ FROM ( _TABLE_SUFFIX AS client, hasEligibleType(payload) AS has_eligible_type FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/seo/10_03.sql b/sql/2019/seo/10_03.sql index 9826e57727e..93269d404b7 100644 --- a/sql/2019/seo/10_03.sql +++ b/sql/2019/seo/10_03.sql @@ -21,6 +21,7 @@ FROM ( _TABLE_SUFFIX AS client, hasAmpLink(payload) AS has_amp_link FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/seo/10_04a.sql b/sql/2019/seo/10_04a.sql index 321dd7472bf..59221d3ed7a 100644 --- a/sql/2019/seo/10_04a.sql +++ b/sql/2019/seo/10_04a.sql @@ -13,6 +13,7 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml) + firstHtml +) GROUP BY client diff --git a/sql/2019/seo/10_06.sql b/sql/2019/seo/10_06.sql index f60e241db70..7fdb4f08b06 100644 --- a/sql/2019/seo/10_06.sql +++ b/sql/2019/seo/10_06.sql @@ -11,4 +11,5 @@ FROM ( JSON_EXTRACT_SCALAR(report, '$.audits.is-crawlable.score') = '1' AS is_crawlable, JSON_EXTRACT_SCALAR(report, '$.audits.canonical.score') = '1' AS is_canonical FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/seo/10_07a.sql b/sql/2019/seo/10_07a.sql index 0587945e691..a02a55e7b49 100644 --- a/sql/2019/seo/10_07a.sql +++ b/sql/2019/seo/10_07a.sql @@ -13,4 +13,5 @@ FROM ( JSON_EXTRACT_SCALAR(report, '$.audits.document-title.score') = '1' AS has_title, JSON_EXTRACT_SCALAR(report, '$.audits.meta-description.score') = '1' AS has_meta_description FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/seo/10_07b.sql b/sql/2019/seo/10_07b.sql index 35738faf769..98e231bc4b7 100644 --- a/sql/2019/seo/10_07b.sql +++ b/sql/2019/seo/10_07b.sql @@ -12,7 +12,8 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml), + firstHtml +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/seo/10_07c.sql b/sql/2019/seo/10_07c.sql index d95cc4718e2..2802d457789 100644 --- a/sql/2019/seo/10_07c.sql +++ b/sql/2019/seo/10_07c.sql @@ -21,7 +21,8 @@ FROM ( _TABLE_SUFFIX AS client, getMetaDescriptionLength(payload) AS description_length FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/seo/10_09a.sql b/sql/2019/seo/10_09a.sql index fd21a84e061..d45b19e039e 100644 --- a/sql/2019/seo/10_09a.sql +++ b/sql/2019/seo/10_09a.sql @@ -15,7 +15,8 @@ FROM ( CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), "$['seo-titles'].titleWords") AS INT64) AS header_words_count, CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), "$['seo-titles'].titleElements") AS INT64) AS header_elements FROM - `httparchive.pages.2019_07_01_*`), + `httparchive.pages.2019_07_01_*` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/seo/10_09b.sql b/sql/2019/seo/10_09b.sql index 00b50734d8b..cc1865d7b3b 100644 --- a/sql/2019/seo/10_09b.sql +++ b/sql/2019/seo/10_09b.sql @@ -8,4 +8,5 @@ FROM ( SELECT JSON_EXTRACT_SCALAR(report, '$.audits.image-alt.score') = '1' AS img_alt FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/seo/10_10.sql b/sql/2019/seo/10_10.sql index cfbf3de8aa3..b19aa5554d7 100644 --- a/sql/2019/seo/10_10.sql +++ b/sql/2019/seo/10_10.sql @@ -17,7 +17,9 @@ FROM ( _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._almanac') AS almanac FROM - `httparchive.pages.2019_07_01_*`)), + `httparchive.pages.2019_07_01_*` + ) +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/seo/10_11.sql b/sql/2019/seo/10_11.sql index 1b668ab4096..f626c366235 100644 --- a/sql/2019/seo/10_11.sql +++ b/sql/2019/seo/10_11.sql @@ -12,7 +12,8 @@ FROM ( url, CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), "$['seo-anchor-elements'].navigateHash") AS INT64) > 0 AS navigate_hash FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) JOIN `httparchive.technologies.2019_07_01_*` USING (_TABLE_SUFFIX, url) diff --git a/sql/2019/seo/10_13.sql b/sql/2019/seo/10_13.sql index bcc411b3028..f5aca249a8b 100644 --- a/sql/2019/seo/10_13.sql +++ b/sql/2019/seo/10_13.sql @@ -26,9 +26,7 @@ try { SELECT COUNT(DISTINCT page) AS pages, ROUND(COUNT(DISTINCT page) * 100 / total, 2) AS pct -FROM - (SELECT page, css FROM `httparchive.almanac.parsed_css` WHERE client = 'desktop'), - (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_desktop`) +FROM (SELECT page, css FROM `httparchive.almanac.parsed_css` WHERE client = 'desktop'), (SELECT COUNT(0) AS total FROM `httparchive.summary_pages.2019_07_01_desktop`) WHERE date = '2019-07-01' AND hasBreakpoint(css) diff --git a/sql/2019/seo/10_14.sql b/sql/2019/seo/10_14.sql index cf797237b63..9b0a1bc0e45 100644 --- a/sql/2019/seo/10_14.sql +++ b/sql/2019/seo/10_14.sql @@ -8,4 +8,5 @@ FROM ( SELECT JSON_EXTRACT_SCALAR(report, '$.audits.link-text.score') = '1' AS link_text FROM - `httparchive.lighthouse.2019_07_01_mobile`) + `httparchive.lighthouse.2019_07_01_mobile` +) diff --git a/sql/2019/seo/10_15a.sql b/sql/2019/seo/10_15a.sql index 1959a086e19..3fe860b4239 100644 --- a/sql/2019/seo/10_15a.sql +++ b/sql/2019/seo/10_15a.sql @@ -2,7 +2,7 @@ # 10_15a: % of websites classified as fast/avg/slow SELECT ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM `chrome-ux-report.materialized.metrics_summary` diff --git a/sql/2019/seo/10_15b.sql b/sql/2019/seo/10_15b.sql index 6277a636277..c716a8acae4 100644 --- a/sql/2019/seo/10_15b.sql +++ b/sql/2019/seo/10_15b.sql @@ -3,7 +3,7 @@ SELECT device, ROUND(COUNTIF(fast_fcp >= .9 AND fast_fid >= .95) * 100 / COUNT(0), 2) AS pct_fast, - ROUND(COUNTIF(NOT(slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT(fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, + ROUND(COUNTIF(NOT (slow_fcp >= .1 OR slow_fid >= 0.05) AND NOT (fast_fcp >= .9 AND fast_fid >= .95)) * 100 / COUNT(0), 2) AS pct_avg, ROUND(COUNTIF(slow_fcp >= .1 OR slow_fid >= 0.05) * 100 / COUNT(0), 2) AS pct_slow FROM ( SELECT @@ -17,7 +17,8 @@ FROM ( FROM `chrome-ux-report.materialized.device_summary` WHERE - yyyymm = '201907') + yyyymm = '201907' +) WHERE fast_fid IS NOT NULL GROUP BY diff --git a/sql/2019/seo/10_16.sql b/sql/2019/seo/10_16.sql index bb883da2efd..e8c8cac8d07 100644 --- a/sql/2019/seo/10_16.sql +++ b/sql/2019/seo/10_16.sql @@ -12,7 +12,8 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2019-07-01' AND - firstHtml), + firstHtml +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2019/seo/10_18.sql b/sql/2019/seo/10_18.sql index f3a5788c5d1..3b41bf4c09c 100644 --- a/sql/2019/seo/10_18.sql +++ b/sql/2019/seo/10_18.sql @@ -10,6 +10,7 @@ FROM ( CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), "$['seo-words'].wordsCount") AS INT64) AS words_count, CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), "$['seo-titles'].titleElements") AS INT64) AS header_elements FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` +) GROUP BY client diff --git a/sql/2019/seo/10_19.sql b/sql/2019/seo/10_19.sql index 71d44ebce98..c7e0b92bfd6 100644 --- a/sql/2019/seo/10_19.sql +++ b/sql/2019/seo/10_19.sql @@ -16,7 +16,8 @@ FROM ( _TABLE_SUFFIX AS client, JSON_EXTRACT_SCALAR(payload, '$._almanac') AS almanac FROM - `httparchive.pages.2019_07_01_*`) + `httparchive.pages.2019_07_01_*` + ) ) GROUP BY client diff --git a/sql/2019/third-parties/05_03.sql b/sql/2019/third-parties/05_03.sql index 1f9b47b57c2..0bada7ed09c 100644 --- a/sql/2019/third-parties/05_03.sql +++ b/sql/2019/third-parties/05_03.sql @@ -10,7 +10,8 @@ FROM ( SELECT client, type AS contentType, - IFNULL(ThirdPartyTable.category, + IFNULL( + ThirdPartyTable.category, IF(DomainsOver50Table.requestDomain IS NULL, 'first-party', 'other') ) AS thirdPartyCategory FROM diff --git a/sql/2019/third-parties/05_04.sql b/sql/2019/third-parties/05_04.sql index 46ecd8198e1..7311a82056a 100644 --- a/sql/2019/third-parties/05_04.sql +++ b/sql/2019/third-parties/05_04.sql @@ -11,7 +11,8 @@ FROM ( client, type AS contentType, respBodySize AS requestBytes, - IFNULL(ThirdPartyTable.category, + IFNULL( + ThirdPartyTable.category, IF(DomainsOver50Table.requestDomain IS NULL, 'first-party', 'other') ) AS thirdPartyCategory FROM diff --git a/sql/2019/third-parties/05_05.sql b/sql/2019/third-parties/05_05.sql index 2ab9747cd4c..36ceed34ade 100644 --- a/sql/2019/third-parties/05_05.sql +++ b/sql/2019/third-parties/05_05.sql @@ -14,7 +14,8 @@ try { '''; SELECT - IFNULL(ThirdPartyTable.category, + IFNULL( + ThirdPartyTable.category, IF(DomainsOver50Table.requestDomain IS NULL, 'first-party', 'other') ) AS third_party_category, SUM(item.execution_time) AS total_execution_time, diff --git a/sql/2019/third-parties/05_06.sql b/sql/2019/third-parties/05_06.sql index 3260b9f91ac..f8df94ed4ad 100644 --- a/sql/2019/third-parties/05_06.sql +++ b/sql/2019/third-parties/05_06.sql @@ -17,8 +17,7 @@ FROM ( ON NET.HOST(url) = DomainsOver50Table.requestDomain WHERE date = '2019-07-01' -), -( +), ( SELECT COUNT(0) AS totalRequestCount FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01' ) GROUP BY diff --git a/sql/2019/third-parties/05_07.sql b/sql/2019/third-parties/05_07.sql index 994ce4f206a..534b4b77b8c 100644 --- a/sql/2019/third-parties/05_07.sql +++ b/sql/2019/third-parties/05_07.sql @@ -17,8 +17,7 @@ FROM ( ON NET.HOST(url) = DomainsOver50Table.requestDomain WHERE date = '2019-07-01' -), -( +), ( SELECT SUM(respSize) AS totalRequestBytes FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01' ) WHERE thirdPartyDomain IS NOT NULL diff --git a/sql/2019/third-parties/05_08.sql b/sql/2019/third-parties/05_08.sql index f90ebe8abef..f8c1bb2cfe2 100644 --- a/sql/2019/third-parties/05_08.sql +++ b/sql/2019/third-parties/05_08.sql @@ -30,13 +30,14 @@ FROM ( LEFT JOIN `lighthouse-infrastructure.third_party_web.2019_07_01_all_observed_domains` AS DomainsOver50Table ON - NET.HOST(item.url) = DomainsOver50Table.requestDomain) t1, - ( - SELECT - SUM(item.execution_time) AS totalExecutionTime - FROM - `httparchive.lighthouse.2019_07_01_mobile`, - UNNEST(getExecutionTimes(report)) AS item) t2 + NET.HOST(item.url) = DomainsOver50Table.requestDomain +) t1, ( + SELECT + SUM(item.execution_time) AS totalExecutionTime + FROM + `httparchive.lighthouse.2019_07_01_mobile`, + UNNEST(getExecutionTimes(report)) AS item +) t2 WHERE thirdPartyDomain IS NOT NULL GROUP BY diff --git a/sql/2019/third-parties/05_09.sql b/sql/2019/third-parties/05_09.sql index 1cd4b0aabea..86355acf587 100644 --- a/sql/2019/third-parties/05_09.sql +++ b/sql/2019/third-parties/05_09.sql @@ -13,8 +13,7 @@ FROM ( `httparchive.almanac.summary_requests` WHERE date = '2019-07-01' -), -( +), ( SELECT COUNT(0) AS totalRequestCount FROM `httparchive.almanac.summary_requests` WHERE date = '2019-07-01' ) GROUP BY diff --git a/sql/2019/third-parties/05_10.sql b/sql/2019/third-parties/05_10.sql index c0d44b6cb5f..4059b0f3e93 100644 --- a/sql/2019/third-parties/05_10.sql +++ b/sql/2019/third-parties/05_10.sql @@ -25,13 +25,14 @@ FROM ( item.execution_time AS executionTime FROM `httparchive.lighthouse.2019_07_01_mobile`, - UNNEST(getExecutionTimes(report)) AS item) t1, - ( - SELECT - SUM(item.execution_time) AS totalExecutionTime - FROM - `httparchive.lighthouse.2019_07_01_mobile`, - UNNEST(getExecutionTimes(report)) AS item) t2 + UNNEST(getExecutionTimes(report)) AS item +) t1, ( + SELECT + SUM(item.execution_time) AS totalExecutionTime + FROM + `httparchive.lighthouse.2019_07_01_mobile`, + UNNEST(getExecutionTimes(report)) AS item +) t2 WHERE requestUrl != 'Other' GROUP BY requestUrl diff --git a/sql/2020/accessibility/alt_text_length.sql b/sql/2020/accessibility/alt_text_length.sql index e96192034d2..0a2e779e396 100644 --- a/sql/2020/accessibility/alt_text_length.sql +++ b/sql/2020/accessibility/alt_text_length.sql @@ -5,17 +5,16 @@ SELECT percentile, APPROX_QUANTILES(alt_length, 1000)[OFFSET(percentile * 10)] AS alt_length -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - SAFE_CAST(alt_length_string AS INT64) AS alt_length - FROM - `httparchive.pages.2020_08_01_*`, - UNNEST( - JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.images.alt_lengths') - ) AS alt_length_string - ), +FROM ( + SELECT + _TABLE_SUFFIX AS client, + SAFE_CAST(alt_length_string AS INT64) AS alt_length + FROM + `httparchive.pages.2020_08_01_*`, + UNNEST( + JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.images.alt_lengths') + ) AS alt_length_string +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE alt_length > 0 diff --git a/sql/2020/accessibility/duplicate_id.sql b/sql/2020/accessibility/duplicate_id.sql index 2fbac3f70f5..fd70637371f 100644 --- a/sql/2020/accessibility/duplicate_id.sql +++ b/sql/2020/accessibility/duplicate_id.sql @@ -9,12 +9,12 @@ SELECT percentile, APPROX_QUANTILES(total_duplicate_ids, 1000)[OFFSET(percentile * 10)] AS total_duplicate_ids FROM ( - SELECT - _TABLE_SUFFIX AS client, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._markup'), '$.ids.duplicate_ids_total') AS INT64) AS total_duplicate_ids - FROM - `httparchive.pages.2020_08_01_*` - ), + SELECT + _TABLE_SUFFIX AS client, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._markup'), '$.ids.duplicate_ids_total') AS INT64) AS total_duplicate_ids + FROM + `httparchive.pages.2020_08_01_*` +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/accessibility/lighthouse_a11y_score.sql b/sql/2020/accessibility/lighthouse_a11y_score.sql index 9206cdcce38..7cccb1f2f69 100644 --- a/sql/2020/accessibility/lighthouse_a11y_score.sql +++ b/sql/2020/accessibility/lighthouse_a11y_score.sql @@ -10,7 +10,8 @@ FROM ( SELECT CAST(JSON_EXTRACT(report, '$.categories.accessibility.score') AS NUMERIC) AS score FROM - `httparchive.lighthouse.2019_07_01_mobile`), + `httparchive.lighthouse.2019_07_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY date, @@ -24,7 +25,8 @@ FROM ( SELECT CAST(JSON_EXTRACT(report, '$.categories.accessibility.score') AS NUMERIC) AS score FROM - `httparchive.lighthouse.2020_08_01_mobile`), + `httparchive.lighthouse.2020_08_01_mobile` +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY date, diff --git a/sql/2020/accessibility/sites_using_role.sql b/sql/2020/accessibility/sites_using_role.sql index d05888ca88f..b52d714cb7c 100644 --- a/sql/2020/accessibility/sites_using_role.sql +++ b/sql/2020/accessibility/sites_using_role.sql @@ -9,12 +9,12 @@ SELECT percentile, APPROX_QUANTILES(total_role_attributes, 1000)[OFFSET(percentile * 10)] AS total_role_usages FROM ( - SELECT - _TABLE_SUFFIX AS client, - CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.nodes_using_role.total') AS INT64) AS total_role_attributes - FROM - `httparchive.pages.2020_08_01_*` - ), + SELECT + _TABLE_SUFFIX AS client, + CAST(JSON_EXTRACT_SCALAR(JSON_EXTRACT_SCALAR(payload, '$._almanac'), '$.nodes_using_role.total') AS INT64) AS total_role_attributes + FROM + `httparchive.pages.2020_08_01_*` +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/accessibility/valid_html_lang.sql b/sql/2020/accessibility/valid_html_lang.sql index 1aaeaaed5c7..932a95248f3 100644 --- a/sql/2020/accessibility/valid_html_lang.sql +++ b/sql/2020/accessibility/valid_html_lang.sql @@ -11,4 +11,5 @@ FROM ( JSON_EXTRACT_SCALAR(report, "$.audits['html-has-lang'].score") = '1' AS has_lang, JSON_EXTRACT_SCALAR(report, "$.audits['html-lang-valid'].score") = '1' AS valid_lang FROM - `httparchive.lighthouse.2020_08_01_mobile`) + `httparchive.lighthouse.2020_08_01_mobile` +) diff --git a/sql/2020/caching/cache_ttl_and_content_age_diff.sql b/sql/2020/caching/cache_ttl_and_content_age_diff.sql index c903f7de34d..dbf69cce57f 100644 --- a/sql/2020/caching/cache_ttl_and_content_age_diff.sql +++ b/sql/2020/caching/cache_ttl_and_content_age_diff.sql @@ -14,17 +14,16 @@ SELECT client, percentile, APPROX_QUANTILES(diff_in_days, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS diff_in_days -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / (60 * 60 * 24), 2) AS diff_in_days - FROM - `httparchive.summary_requests.2020_08_01_*` - WHERE - resp_last_modified != '' AND - expAge > 0 - ), +FROM ( + SELECT + _TABLE_SUFFIX AS client, + ROUND((expAge - (startedDateTime - toTimestamp(resp_last_modified))) / (60 * 60 * 24), 2) AS diff_in_days + FROM + `httparchive.summary_requests.2020_08_01_*` + WHERE + resp_last_modified != '' AND + expAge > 0 +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY client, diff --git a/sql/2020/caching/content_age_older_than_ttl.sql b/sql/2020/caching/content_age_older_than_ttl.sql index de788d0a36f..4346a4cef5c 100644 --- a/sql/2020/caching/content_age_older_than_ttl.sql +++ b/sql/2020/caching/content_age_older_than_ttl.sql @@ -15,17 +15,16 @@ SELECT COUNT(0) AS total_req, COUNTIF(diff < 0) AS req_too_short_cache, COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff - FROM - `httparchive.summary_requests.2020_08_01_*` - WHERE - resp_last_modified != '' AND - expAge > 0 - ) +FROM ( + SELECT + _TABLE_SUFFIX AS client, + expAge - (startedDateTime - toTimestamp(resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_*` + WHERE + resp_last_modified != '' AND + expAge > 0 +) GROUP BY client ORDER BY diff --git a/sql/2020/caching/content_age_older_than_ttl_by_party.sql b/sql/2020/caching/content_age_older_than_ttl_by_party.sql index 9257b3de2f4..9d83e2e4d57 100644 --- a/sql/2020/caching/content_age_older_than_ttl_by_party.sql +++ b/sql/2020/caching/content_age_older_than_ttl_by_party.sql @@ -16,32 +16,31 @@ SELECT COUNT(0) AS total_req, COUNTIF(diff < 0) AS req_too_short_cache, COUNTIF(diff < 0) / COUNT(0) AS perc_req_too_short_cache -FROM - ( - SELECT - 'desktop' AS client, - IF(NET.HOST(url) IN ( - SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' - ), 'third party', 'first party') AS party, - requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff - FROM - `httparchive.summary_requests.2020_08_01_desktop` requests - WHERE - TRIM(requests.resp_last_modified) != '' AND - expAge > 0 - UNION ALL - SELECT - 'mobile' AS client, - IF(NET.HOST(url) IN ( - SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' - ), 'third party', 'first party') AS party, - requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff - FROM - `httparchive.summary_requests.2020_08_01_mobile` requests - WHERE - TRIM(requests.resp_last_modified) != '' AND - expAge > 0 - ) +FROM ( + SELECT + 'desktop' AS client, + IF(NET.HOST(url) IN ( + SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' + ), 'third party', 'first party') AS party, + requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_desktop` requests + WHERE + TRIM(requests.resp_last_modified) != '' AND + expAge > 0 + UNION ALL + SELECT + 'mobile' AS client, + IF(NET.HOST(url) IN ( + SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' + ), 'third party', 'first party') AS party, + requests.expAge - (requests.startedDateTime - toTimestamp(requests.resp_last_modified)) AS diff + FROM + `httparchive.summary_requests.2020_08_01_mobile` requests + WHERE + TRIM(requests.resp_last_modified) != '' AND + expAge > 0 +) GROUP BY client, party diff --git a/sql/2020/caching/invalid_cache_control_directives.sql b/sql/2020/caching/invalid_cache_control_directives.sql index 60715be8029..428d703c112 100644 --- a/sql/2020/caching/invalid_cache_control_directives.sql +++ b/sql/2020/caching/invalid_cache_control_directives.sql @@ -8,60 +8,56 @@ SELECT directive_occurrences, pct_of_cache_control, pct_of_total_requests -FROM - ( - ( - SELECT - 'desktop' AS client, - total_requests, - total_using_cache_control, - directive_name, - COUNT(0) AS directive_occurrences, - COUNT(0) / total_using_cache_control AS pct_of_cache_control, - COUNT(0) / total_requests AS pct_of_total_requests - FROM - `httparchive.summary_requests.2020_08_01_desktop`, - UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name - CROSS JOIN ( - SELECT - COUNT(0) AS total_requests, - COUNTIF(TRIM(resp_cache_control) != '') AS total_using_cache_control - FROM - `httparchive.summary_requests.2020_08_01_desktop` - ) - GROUP BY - client, - total_requests, - total_using_cache_control, - directive_name - ) - UNION ALL - ( - SELECT - 'mobile' AS client, - total_requests, - total_using_cache_control, - directive_name, - COUNT(0) AS directive_occurrences, - COUNT(0) / total_using_cache_control AS pct_of_cache_control, - COUNT(0) / total_requests AS pct_of_total_requests - FROM - `httparchive.summary_requests.2020_08_01_mobile`, - UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name - CROSS JOIN ( - SELECT - COUNT(0) AS total_requests, - COUNTIF(TRIM(resp_cache_control) != '') AS total_using_cache_control - FROM - `httparchive.summary_requests.2020_08_01_mobile` - ) - GROUP BY - client, - total_requests, - total_using_cache_control, - directive_name - ) +FROM ( + SELECT + 'desktop' AS client, + total_requests, + total_using_cache_control, + directive_name, + COUNT(0) AS directive_occurrences, + COUNT(0) / total_using_cache_control AS pct_of_cache_control, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_desktop`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_cache_control) != '') AS total_using_cache_control + FROM + `httparchive.summary_requests.2020_08_01_desktop` ) + GROUP BY + client, + total_requests, + total_using_cache_control, + directive_name + UNION ALL + SELECT + 'mobile' AS client, + total_requests, + total_using_cache_control, + directive_name, + COUNT(0) AS directive_occurrences, + COUNT(0) / total_using_cache_control AS pct_of_cache_control, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_mobile`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_cache_control), r'([a-z][^,\s="\']*)')) AS directive_name + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_cache_control) != '') AS total_using_cache_control + FROM + `httparchive.summary_requests.2020_08_01_mobile` + ) + GROUP BY + client, + total_requests, + total_using_cache_control, + directive_name + +) WHERE directive_name NOT IN ('max-age', 'public', 'no-cache', 'must-revalidate', 'no-store', 'private', 'proxy-revalidate', 's-maxage', 'no-transform', 'immutable', 'stale-while-revalidate', 'stale-if-error', 'pre-check', 'post-check') ORDER BY diff --git a/sql/2020/caching/resource_age_party_and_type_wise_groups.sql b/sql/2020/caching/resource_age_party_and_type_wise_groups.sql index cdacb71cda9..1f1956d15cf 100644 --- a/sql/2020/caching/resource_age_party_and_type_wise_groups.sql +++ b/sql/2020/caching/resource_age_party_and_type_wise_groups.sql @@ -28,20 +28,19 @@ SELECT SAFE_DIVIDE(COUNTIF(age_weeks >= 8 AND age_weeks <= 52), COUNTIF(age_weeks IS NOT NULL)) AS age_8_to_52wk_pct, SAFE_DIVIDE(COUNTIF(age_weeks >= 53 AND age_weeks <= 104), COUNTIF(age_weeks IS NOT NULL)) AS age_gt_1y_pct, SAFE_DIVIDE(COUNTIF(age_weeks >= 105), COUNTIF(age_weeks IS NOT NULL)) AS age_gt_2y_pct -FROM - ( - SELECT - _TABLE_SUFFIX AS client, - IF(NET.HOST(url) IN ( - SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' - ), 'third party', 'first party') AS party, - type AS resource_type, - ROUND((startedDateTime - toTimestamp(resp_last_modified)) / (60 * 60 * 24 * 7)) AS age_weeks - FROM - `httparchive.summary_requests.2020_08_01_*` - WHERE - TRIM(resp_last_modified) != '' - ) +FROM ( + SELECT + _TABLE_SUFFIX AS client, + IF(NET.HOST(url) IN ( + SELECT domain FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND category != 'hosting' + ), 'third party', 'first party') AS party, + type AS resource_type, + ROUND((startedDateTime - toTimestamp(resp_last_modified)) / (60 * 60 * 24 * 7)) AS age_weeks + FROM + `httparchive.summary_requests.2020_08_01_*` + WHERE + TRIM(resp_last_modified) != '' +) GROUP BY client, party, diff --git a/sql/2020/caching/set_cookie.sql b/sql/2020/caching/set_cookie.sql index 665ae5651de..5833794b0a2 100644 --- a/sql/2020/caching/set_cookie.sql +++ b/sql/2020/caching/set_cookie.sql @@ -16,8 +16,7 @@ FROM ( SELECT _TABLE_SUFFIX AS client, REGEXP_CONTAINS(resp_cache_control, r'(?i)no-store') AS uses_no_store, - REGEXP_CONTAINS(resp_cache_control, r'(?i)private') AS uses_private, - (reqCookieLen > 0) AS uses_cookies + REGEXP_CONTAINS(resp_cache_control, r'(?i)private') AS uses_private, (reqCookieLen > 0) AS uses_cookies FROM `httparchive.summary_requests.2020_08_01_*` ) diff --git a/sql/2020/caching/vary_headers.sql b/sql/2020/caching/vary_headers.sql index a313815c5e7..c165dbda255 100644 --- a/sql/2020/caching/vary_headers.sql +++ b/sql/2020/caching/vary_headers.sql @@ -10,65 +10,62 @@ SELECT pct_of_total_requests, total_using_both / total_using_vary AS pct_of_vary_with_cache_control, total_using_vary / total_requests AS pct_using_vary -FROM - ( - ( - SELECT - 'desktop' AS client, - total_requests, - total_using_vary, - total_using_both, - vary_header, - COUNT(0) AS occurrences, - COUNT(0) / total_using_vary AS pct_of_vary, - COUNT(0) / total_requests AS pct_of_total_requests - FROM - `httparchive.summary_requests.2020_08_01_desktop`, - UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header - CROSS JOIN ( - SELECT - COUNT(0) AS total_requests, - COUNTIF(TRIM(resp_vary) != '') AS total_using_vary, - COUNTIF(TRIM(resp_vary) != '' AND TRIM(resp_cache_control) != '') AS total_using_both - FROM - `httparchive.summary_requests.2020_08_01_desktop` - ) - GROUP BY - client, - total_requests, - total_using_vary, - total_using_both, - vary_header - ) - UNION ALL - ( - SELECT - 'mobile' AS client, - total_requests, - total_using_vary, - total_using_both, - vary_header, - COUNT(0) AS occurrences, - COUNT(0) / total_using_vary AS pct_of_vary, - COUNT(0) / total_requests AS pct_of_total_requests - FROM - `httparchive.summary_requests.2020_08_01_mobile`, - UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header - CROSS JOIN ( - SELECT - COUNT(0) AS total_requests, - COUNTIF(TRIM(resp_vary) != '') AS total_using_vary, - COUNTIF(TRIM(resp_vary) != '' AND TRIM(resp_cache_control) != '') AS total_using_both - FROM - `httparchive.summary_requests.2020_08_01_mobile` - ) - GROUP BY - client, - total_requests, - total_using_vary, - total_using_both, - vary_header - ) +FROM ( + SELECT + 'desktop' AS client, + total_requests, + total_using_vary, + total_using_both, + vary_header, + COUNT(0) AS occurrences, + COUNT(0) / total_using_vary AS pct_of_vary, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_desktop`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_vary) != '') AS total_using_vary, + COUNTIF(TRIM(resp_vary) != '' AND TRIM(resp_cache_control) != '') AS total_using_both + FROM + `httparchive.summary_requests.2020_08_01_desktop` ) + GROUP BY + client, + total_requests, + total_using_vary, + total_using_both, + vary_header + + UNION ALL + SELECT + 'mobile' AS client, + total_requests, + total_using_vary, + total_using_both, + vary_header, + COUNT(0) AS occurrences, + COUNT(0) / total_using_vary AS pct_of_vary, + COUNT(0) / total_requests AS pct_of_total_requests + FROM + `httparchive.summary_requests.2020_08_01_mobile`, + UNNEST(REGEXP_EXTRACT_ALL(LOWER(resp_vary), r'([a-z][^,\s="\']*)')) AS vary_header + CROSS JOIN ( + SELECT + COUNT(0) AS total_requests, + COUNTIF(TRIM(resp_vary) != '') AS total_using_vary, + COUNTIF(TRIM(resp_vary) != '' AND TRIM(resp_cache_control) != '') AS total_using_both + FROM + `httparchive.summary_requests.2020_08_01_mobile` + ) + GROUP BY + client, + total_requests, + total_using_vary, + total_using_both, + vary_header + +) ORDER BY client, occurrences DESC diff --git a/sql/2020/cms/adoption_of_image_formats_in_cmss.sql b/sql/2020/cms/adoption_of_image_formats_in_cmss.sql index 547ad701ce7..eaf3fdbc5b6 100644 --- a/sql/2020/cms/adoption_of_image_formats_in_cmss.sql +++ b/sql/2020/cms/adoption_of_image_formats_in_cmss.sql @@ -15,7 +15,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - type = 'image') + type = 'image' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -23,9 +24,9 @@ JOIN ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') -USING - (client, page) + category = 'CMS' +) +USING (client, page) GROUP BY client, format diff --git a/sql/2020/cms/cms_adoption_compared_to_2019.sql b/sql/2020/cms/cms_adoption_compared_to_2019.sql index ebb6a6761b7..01463d3d66d 100644 --- a/sql/2020/cms/cms_adoption_compared_to_2019.sql +++ b/sql/2020/cms/cms_adoption_compared_to_2019.sql @@ -1,5 +1,5 @@ #standardSQL - # CMS adoptions, compared to 2019 +# CMS adoptions, compared to 2019 SELECT _TABLE_SUFFIX AS client, 2020 AS year, @@ -15,9 +15,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'CMS' GROUP BY @@ -39,9 +39,9 @@ JOIN ( FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'CMS' GROUP BY diff --git a/sql/2020/cms/core_web_vitals_distribution.sql b/sql/2020/cms/core_web_vitals_distribution.sql index 9088404b8f5..c4fc2042e45 100644 --- a/sql/2020/cms/core_web_vitals_distribution.sql +++ b/sql/2020/cms/core_web_vitals_distribution.sql @@ -29,7 +29,8 @@ JOIN ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') + category = 'CMS' +) ON CONCAT(origin, '/') = url AND IF(device = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2020/cms/core_web_vitals_passing.sql b/sql/2020/cms/core_web_vitals_passing.sql index e616a39194b..adb8a014c4d 100644 --- a/sql/2020/cms/core_web_vitals_passing.sql +++ b/sql/2020/cms/core_web_vitals_passing.sql @@ -1,10 +1,10 @@ #standardSQL # Core Web Vitals performance by CMS -CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good / (good + needs_improvement + poor) >= 0.75 ); -CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good + needs_improvement + poor > 0 ); @@ -16,28 +16,34 @@ SELECT # Origins with good LCP divided by origins with any LCP. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL)) + ) AS pct_good_lcp, # Origins with good FID divided by origins with any FID. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL)) + ) AS pct_good_fid, # Origins with good CLS divided by origins with any CLS. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls, + COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL)) + ) AS pct_good_cls, # Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS. SAFE_DIVIDE( COUNT(DISTINCT IF( IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND IS_GOOD(fast_fid, avg_fid, slow_fid) AND - IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), + IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL + )), COUNT(DISTINCT IF( IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND - IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv + IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL + )) + ) AS pct_good_cwv FROM `chrome-ux-report.materialized.device_summary` JOIN ( @@ -48,7 +54,8 @@ JOIN ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') + category = 'CMS' +) ON CONCAT(origin, '/') = url AND IF(device = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2020/cms/distribution_of_cms_page_kilobytes_per_resource_type.sql b/sql/2020/cms/distribution_of_cms_page_kilobytes_per_resource_type.sql index b496c9e6d10..1452e7d3bb3 100644 --- a/sql/2020/cms/distribution_of_cms_page_kilobytes_per_resource_type.sql +++ b/sql/2020/cms/distribution_of_cms_page_kilobytes_per_resource_type.sql @@ -21,7 +21,8 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -29,13 +30,14 @@ FROM ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') - USING - (client, page) + category = 'CMS' + ) + USING (client, page) GROUP BY client, type, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/cms/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql b/sql/2020/cms/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql index eefe9c5f1b3..049a7be5090 100644 --- a/sql/2020/cms/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql +++ b/sql/2020/cms/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql @@ -35,9 +35,10 @@ FROM ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') - USING - (_TABLE_SUFFIX, url)), + category = 'CMS' + ) + USING (_TABLE_SUFFIX, url) +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/cms/image_bytes_on_cmss.sql b/sql/2020/cms/image_bytes_on_cmss.sql index cbe892d366c..1d576432281 100644 --- a/sql/2020/cms/image_bytes_on_cmss.sql +++ b/sql/2020/cms/image_bytes_on_cmss.sql @@ -9,8 +9,7 @@ FROM `httparchive.summary_pages.2020_08_01_*` JOIN `httparchive.technologies.2020_08_01_*` -USING - (_TABLE_SUFFIX, url), +USING (_TABLE_SUFFIX, url), UNNEST([10, 25, 50, 75, 90]) AS percentile WHERE category = 'CMS' diff --git a/sql/2020/cms/median_lighthouse_score.sql b/sql/2020/cms/median_lighthouse_score.sql index a002dc15659..7a9374c6a8f 100644 --- a/sql/2020/cms/median_lighthouse_score.sql +++ b/sql/2020/cms/median_lighthouse_score.sql @@ -13,8 +13,7 @@ FROM `httparchive.lighthouse.2020_08_01_mobile` JOIN `httparchive.technologies.2020_08_01_mobile` -USING - (url) +USING (url) WHERE category = 'CMS' GROUP BY diff --git a/sql/2020/cms/third_party_bytes_and_requests_on_cmss.sql b/sql/2020/cms/third_party_bytes_and_requests_on_cmss.sql index 167ffc8fbee..51eb1c0075a 100644 --- a/sql/2020/cms/third_party_bytes_and_requests_on_cmss.sql +++ b/sql/2020/cms/third_party_bytes_and_requests_on_cmss.sql @@ -19,7 +19,8 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -27,9 +28,9 @@ FROM ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'CMS') - USING - (client, page) + category = 'CMS' + ) + USING (client, page) WHERE NET.HOST(url) IN ( SELECT @@ -38,10 +39,12 @@ FROM ( `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND - category != 'hosting') + category != 'hosting' + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/cms/top_cms_platforms_compared_to_2019.sql b/sql/2020/cms/top_cms_platforms_compared_to_2019.sql index 7ca770c719d..64a67ea7c42 100644 --- a/sql/2020/cms/top_cms_platforms_compared_to_2019.sql +++ b/sql/2020/cms/top_cms_platforms_compared_to_2019.sql @@ -16,9 +16,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'CMS' GROUP BY @@ -42,9 +42,9 @@ JOIN ( FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'CMS' GROUP BY diff --git a/sql/2020/cms/top_cmss_yoy_all_clients.sql b/sql/2020/cms/top_cmss_yoy_all_clients.sql index 5edc802ffce..fe490bcc07c 100644 --- a/sql/2020/cms/top_cmss_yoy_all_clients.sql +++ b/sql/2020/cms/top_cmss_yoy_all_clients.sql @@ -13,7 +13,8 @@ CROSS JOIN ( SELECT COUNT(DISTINCT url) AS total FROM - `httparchive.summary_pages.2020_08_01_*`) + `httparchive.summary_pages.2020_08_01_*` +) WHERE category = 'CMS' GROUP BY @@ -32,7 +33,8 @@ CROSS JOIN ( SELECT COUNT(DISTINCT url) AS total FROM - `httparchive.summary_pages.2019_07_01_*`) + `httparchive.summary_pages.2019_07_01_*` +) WHERE category = 'CMS' GROUP BY diff --git a/sql/2020/cms/wp_resources.sql b/sql/2020/cms/wp_resources.sql index 40273f7adb9..98c2ebb1278 100644 --- a/sql/2020/cms/wp_resources.sql +++ b/sql/2020/cms/wp_resources.sql @@ -9,22 +9,18 @@ FROM ( page, REGEXP_EXTRACT(url, r'/(themes|plugins|wp-includes)/') AS path, COUNT(0) AS freq - FROM - (SELECT _TABLE_SUFFIX, url AS page FROM `httparchive.technologies.2020_09_01_*` WHERE app = 'WordPress') - JOIN - (SELECT _TABLE_SUFFIX, pageid, url AS page FROM `httparchive.summary_pages.2020_09_01_*`) - USING - (_TABLE_SUFFIX, page) - JOIN - (SELECT _TABLE_SUFFIX, pageid, url FROM `httparchive.summary_requests.2020_09_01_*`) - USING - (_TABLE_SUFFIX, pageid) + FROM (SELECT _TABLE_SUFFIX, url AS page FROM `httparchive.technologies.2020_09_01_*` WHERE app = 'WordPress') + JOIN (SELECT _TABLE_SUFFIX, pageid, url AS page FROM `httparchive.summary_pages.2020_09_01_*`) + USING (_TABLE_SUFFIX, page) + JOIN (SELECT _TABLE_SUFFIX, pageid, url FROM `httparchive.summary_requests.2020_09_01_*`) + USING (_TABLE_SUFFIX, pageid) GROUP BY client, page, path HAVING - path IS NOT NULL), + path IS NOT NULL +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/all_functions.sql b/sql/2020/css/all_functions.sql index d023b2bc227..df4c01869a5 100644 --- a/sql/2020/css/all_functions.sql +++ b/sql/2020/css/all_functions.sql @@ -50,7 +50,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - prop) + prop +) WHERE pages >= 1000 ORDER BY diff --git a/sql/2020/css/all_properties.sql b/sql/2020/css/all_properties.sql index a913a7a4c08..fb2fe6bf7ef 100644 --- a/sql/2020/css/all_properties.sql +++ b/sql/2020/css/all_properties.sql @@ -47,7 +47,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - prop) + prop +) WHERE pages >= 1000 ORDER BY diff --git a/sql/2020/css/box_sizing.sql b/sql/2020/css/box_sizing.sql index 978627c649c..61ddc32dbc6 100644 --- a/sql/2020/css/box_sizing.sql +++ b/sql/2020/css/box_sizing.sql @@ -29,7 +29,8 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01'), + date = '2020-08-01' +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/calc_complexity_operators.sql b/sql/2020/css/calc_complexity_operators.sql index f61042b309c..c3ddbb3d470 100644 --- a/sql/2020/css/calc_complexity_operators.sql +++ b/sql/2020/css/calc_complexity_operators.sql @@ -82,7 +82,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, num diff --git a/sql/2020/css/calc_complexity_parens.sql b/sql/2020/css/calc_complexity_parens.sql index dc297b74d92..aa4895b647c 100644 --- a/sql/2020/css/calc_complexity_parens.sql +++ b/sql/2020/css/calc_complexity_parens.sql @@ -82,7 +82,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, num diff --git a/sql/2020/css/calc_complexity_units.sql b/sql/2020/css/calc_complexity_units.sql index 37f2c38fe17..b9c44425ece 100644 --- a/sql/2020/css/calc_complexity_units.sql +++ b/sql/2020/css/calc_complexity_units.sql @@ -82,7 +82,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, num diff --git a/sql/2020/css/calc_constants.sql b/sql/2020/css/calc_constants.sql index 8b407c4c4c7..59a0d21fc61 100644 --- a/sql/2020/css/calc_constants.sql +++ b/sql/2020/css/calc_constants.sql @@ -82,7 +82,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, const diff --git a/sql/2020/css/calc_constants_pages.sql b/sql/2020/css/calc_constants_pages.sql index f7f2a35ba59..2fa1a739c71 100644 --- a/sql/2020/css/calc_constants_pages.sql +++ b/sql/2020/css/calc_constants_pages.sql @@ -81,6 +81,7 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client diff --git a/sql/2020/css/calc_operators.sql b/sql/2020/css/calc_operators.sql index f264cfa02da..0762896bb9c 100644 --- a/sql/2020/css/calc_operators.sql +++ b/sql/2020/css/calc_operators.sql @@ -83,7 +83,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, operator diff --git a/sql/2020/css/calc_properties.sql b/sql/2020/css/calc_properties.sql index 6ad13842e57..5cdcd478a9c 100644 --- a/sql/2020/css/calc_properties.sql +++ b/sql/2020/css/calc_properties.sql @@ -83,7 +83,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, name diff --git a/sql/2020/css/calc_units.sql b/sql/2020/css/calc_units.sql index 1f0c601a06a..37b0a05524f 100644 --- a/sql/2020/css/calc_units.sql +++ b/sql/2020/css/calc_units.sql @@ -83,7 +83,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, unit diff --git a/sql/2020/css/color_arg_comma.sql b/sql/2020/css/color_arg_comma.sql index f694dc33922..708e6e0a194 100644 --- a/sql/2020/css/color_arg_comma.sql +++ b/sql/2020/css/color_arg_comma.sql @@ -166,6 +166,7 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client diff --git a/sql/2020/css/color_formats.sql b/sql/2020/css/color_formats.sql index 74ab3974bf5..aa3dc633dc2 100644 --- a/sql/2020/css/color_formats.sql +++ b/sql/2020/css/color_formats.sql @@ -187,7 +187,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorFormats(css)) AS format WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, format diff --git a/sql/2020/css/color_functions.sql b/sql/2020/css/color_functions.sql index fb9bef957b0..4075b20c223 100644 --- a/sql/2020/css/color_functions.sql +++ b/sql/2020/css/color_functions.sql @@ -169,7 +169,8 @@ FROM ( UNNEST(getColorFunctions(css)) AS function WHERE date = '2020-08-01' AND - function IS NOT NULL) + function IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -177,9 +178,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/color_keywords.sql b/sql/2020/css/color_keywords.sql index a5da2a4f952..499c1834133 100644 --- a/sql/2020/css/color_keywords.sql +++ b/sql/2020/css/color_keywords.sql @@ -177,7 +177,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorKeywords(css)) AS keyword WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, keyword diff --git a/sql/2020/css/color_keywords_no_system_casefold.sql b/sql/2020/css/color_keywords_no_system_casefold.sql index 52b931439f3..47eb088d1bc 100644 --- a/sql/2020/css/color_keywords_no_system_casefold.sql +++ b/sql/2020/css/color_keywords_no_system_casefold.sql @@ -177,7 +177,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorKeywords(css)) AS keyword WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, keyword diff --git a/sql/2020/css/color_p3.sql b/sql/2020/css/color_p3.sql index a6b8d6e7654..12e377887b5 100644 --- a/sql/2020/css/color_p3.sql +++ b/sql/2020/css/color_p3.sql @@ -176,7 +176,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getP3Usage(css)) AS p3 WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, p3 diff --git a/sql/2020/css/color_spaces.sql b/sql/2020/css/color_spaces.sql index 1464ed7f7dc..19bdae713f2 100644 --- a/sql/2020/css/color_spaces.sql +++ b/sql/2020/css/color_spaces.sql @@ -175,7 +175,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getColorSpaces(css)) AS color_space WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, color_space diff --git a/sql/2020/css/css_in_js.sql b/sql/2020/css/css_in_js.sql index b0c4f3a3863..ab4a2bfa07c 100644 --- a/sql/2020/css/css_in_js.sql +++ b/sql/2020/css/css_in_js.sql @@ -45,7 +45,8 @@ FROM ( cssInJs FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCssInJS(payload)) AS cssInJs) + UNNEST(getCssInJS(payload)) AS cssInJs +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -53,7 +54,8 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) + client +) USING (client) GROUP BY client, diff --git a/sql/2020/css/custom_property_adoption.sql b/sql/2020/css/custom_property_adoption.sql index 6d132535710..ce1661bfd60 100644 --- a/sql/2020/css/custom_property_adoption.sql +++ b/sql/2020/css/custom_property_adoption.sql @@ -37,11 +37,10 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY client) -USING - (client) + page +) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY client) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/custom_property_cycles.sql b/sql/2020/css/custom_property_cycles.sql index 6ffb017311d..f241ec239b9 100644 --- a/sql/2020/css/custom_property_cycles.sql +++ b/sql/2020/css/custom_property_cycles.sql @@ -101,7 +101,8 @@ FROM ( `httparchive.pages.2020_08_01_*` GROUP BY client, - url) + url +) WHERE cycles IS NOT NULL GROUP BY diff --git a/sql/2020/css/custom_property_cycles_max.sql b/sql/2020/css/custom_property_cycles_max.sql index cd1eaf52247..deb32e8ee08 100644 --- a/sql/2020/css/custom_property_cycles_max.sql +++ b/sql/2020/css/custom_property_cycles_max.sql @@ -99,7 +99,8 @@ FROM ( `httparchive.pages.2020_08_01_*` GROUP BY client, - url), + url +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/custom_property_depth.sql b/sql/2020/css/custom_property_depth.sql index 16ee20b7fae..d65f2159397 100644 --- a/sql/2020/css/custom_property_depth.sql +++ b/sql/2020/css/custom_property_depth.sql @@ -96,7 +96,8 @@ FROM ( custom_properties.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomPropertyLengths(payload)) AS custom_properties) + UNNEST(getCustomPropertyLengths(payload)) AS custom_properties +) GROUP BY client, depth diff --git a/sql/2020/css/custom_property_functions.sql b/sql/2020/css/custom_property_functions.sql index d1b282ba110..7f7a4d53fee 100644 --- a/sql/2020/css/custom_property_functions.sql +++ b/sql/2020/css/custom_property_functions.sql @@ -104,7 +104,8 @@ FROM ( UNNEST(getCustomPropertyFunctions(css)) AS function WHERE date = '2020-08-01' AND - function IS NOT NULL) + function IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -112,9 +113,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/custom_property_get_set.sql b/sql/2020/css/custom_property_get_set.sql index 5a81f3ce685..28b7ff88818 100644 --- a/sql/2020/css/custom_property_get_set.sql +++ b/sql/2020/css/custom_property_get_set.sql @@ -52,7 +52,8 @@ FROM ( usage.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomPropertyUsage(payload)) AS usage) + UNNEST(getCustomPropertyUsage(payload)) AS usage +) GROUP BY client, usage diff --git a/sql/2020/css/custom_property_names.sql b/sql/2020/css/custom_property_names.sql index 2b22fcb0504..4b735bc4357 100644 --- a/sql/2020/css/custom_property_names.sql +++ b/sql/2020/css/custom_property_names.sql @@ -31,8 +31,10 @@ FROM ( FROM `httparchive.pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) - USING (_TABLE_SUFFIX)), + _TABLE_SUFFIX + ) + USING (_TABLE_SUFFIX) +), UNNEST(names) AS name GROUP BY client, diff --git a/sql/2020/css/custom_property_properties.sql b/sql/2020/css/custom_property_properties.sql index a4ed8acc5f9..d2c16cd5c96 100644 --- a/sql/2020/css/custom_property_properties.sql +++ b/sql/2020/css/custom_property_properties.sql @@ -104,7 +104,8 @@ FROM ( UNNEST(getCustomPropertyProperties(css)) AS property WHERE date = '2020-08-01' AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -112,9 +113,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/custom_property_root.sql b/sql/2020/css/custom_property_root.sql index 6a874e1d3c5..0f3fc966538 100644 --- a/sql/2020/css/custom_property_root.sql +++ b/sql/2020/css/custom_property_root.sql @@ -76,7 +76,8 @@ FROM ( root.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomPropertyRoots(payload)) AS root) + UNNEST(getCustomPropertyRoots(payload)) AS root +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -84,9 +85,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, root, diff --git a/sql/2020/css/custom_property_value_types.sql b/sql/2020/css/custom_property_value_types.sql index 56444f2ce4f..0436caf634f 100644 --- a/sql/2020/css/custom_property_value_types.sql +++ b/sql/2020/css/custom_property_value_types.sql @@ -156,6 +156,7 @@ FROM ( FROM `httparchive.pages.2020_08_01_*` LEFT JOIN - UNNEST(getCustomPropertyValueTypes(JSON_EXTRACT_SCALAR(payload, "$['_css-variables']"))) AS value) + UNNEST(getCustomPropertyValueTypes(JSON_EXTRACT_SCALAR(payload, "$['_css-variables']"))) AS value +) ORDER BY pct DESC diff --git a/sql/2020/css/custom_property_values.sql b/sql/2020/css/custom_property_values.sql index d57cdddae31..59cb544e088 100644 --- a/sql/2020/css/custom_property_values.sql +++ b/sql/2020/css/custom_property_values.sql @@ -31,8 +31,10 @@ FROM ( FROM `httparchive.pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) - USING (_TABLE_SUFFIX)), + _TABLE_SUFFIX + ) + USING (_TABLE_SUFFIX) +), UNNEST(values) AS value GROUP BY client, diff --git a/sql/2020/css/flexbox_grid.sql b/sql/2020/css/flexbox_grid.sql index 0b115a7be59..3dd93834a81 100644 --- a/sql/2020/css/flexbox_grid.sql +++ b/sql/2020/css/flexbox_grid.sql @@ -9,10 +9,8 @@ SELECT COUNT(DISTINCT url) / total AS pct FROM `httparchive.blink_features.features` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY client) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY client) +USING (client) WHERE yyyymmdd IN ('20200801', '20190701') AND feature IN ('CSSFlexibleBox', 'CSSGridLayout') diff --git a/sql/2020/css/gradient_adoption.sql b/sql/2020/css/gradient_adoption.sql index 5aa24a2b371..1e7eb1a31b2 100644 --- a/sql/2020/css/gradient_adoption.sql +++ b/sql/2020/css/gradient_adoption.sql @@ -159,7 +159,8 @@ FROM ( UNNEST(getGradientAdoption(css)) AS property WHERE date = '2020-08-01' AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -167,9 +168,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/gradient_bg_properties.sql b/sql/2020/css/gradient_bg_properties.sql index 7c4ebd8f4a8..bb1bfed42c4 100644 --- a/sql/2020/css/gradient_bg_properties.sql +++ b/sql/2020/css/gradient_bg_properties.sql @@ -160,7 +160,8 @@ FROM ( UNNEST(getGradientUsageBeyondBg(css)) AS property WHERE date = '2020-08-01' AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -168,9 +169,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, property_type, diff --git a/sql/2020/css/gradient_functions.sql b/sql/2020/css/gradient_functions.sql index 3e32b55c80d..91f6e174536 100644 --- a/sql/2020/css/gradient_functions.sql +++ b/sql/2020/css/gradient_functions.sql @@ -160,7 +160,8 @@ FROM ( UNNEST(getGradientFunctions(css)) AS function WHERE date = '2020-08-01' AND - function IS NOT NULL) + function IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -168,9 +169,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, function, diff --git a/sql/2020/css/gradient_hard_stops.sql b/sql/2020/css/gradient_hard_stops.sql index 51ba1dacddc..59be75b3e80 100644 --- a/sql/2020/css/gradient_hard_stops.sql +++ b/sql/2020/css/gradient_hard_stops.sql @@ -174,7 +174,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -182,9 +183,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/gradient_hints.sql b/sql/2020/css/gradient_hints.sql index caf204c8556..83e21e3f1d6 100644 --- a/sql/2020/css/gradient_hints.sql +++ b/sql/2020/css/gradient_hints.sql @@ -174,7 +174,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -182,9 +183,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/gradient_max_stops.sql b/sql/2020/css/gradient_max_stops.sql index 1aef5533f4c..d07d55b1991 100644 --- a/sql/2020/css/gradient_max_stops.sql +++ b/sql/2020/css/gradient_max_stops.sql @@ -182,7 +182,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile WHERE getStops(max_color_stops) > 0 diff --git a/sql/2020/css/gradient_properties.sql b/sql/2020/css/gradient_properties.sql index 43501cf0cf0..acdbf617f42 100644 --- a/sql/2020/css/gradient_properties.sql +++ b/sql/2020/css/gradient_properties.sql @@ -160,7 +160,8 @@ FROM ( UNNEST(getGradientUsageBeyondBg(css)) AS property WHERE date = '2020-08-01' AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -168,9 +169,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, property, diff --git a/sql/2020/css/gradient_stops.sql b/sql/2020/css/gradient_stops.sql index 51e3ddfa3a1..b2204058bbe 100644 --- a/sql/2020/css/gradient_stops.sql +++ b/sql/2020/css/gradient_stops.sql @@ -174,7 +174,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/grid_named_lines.sql b/sql/2020/css/grid_named_lines.sql index 758a6b2cdbf..e09fbc32e6e 100644 --- a/sql/2020/css/grid_named_lines.sql +++ b/sql/2020/css/grid_named_lines.sql @@ -29,7 +29,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -37,9 +38,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/grid_template_areas.sql b/sql/2020/css/grid_template_areas.sql index 7a6de3a7f17..ee770b823ca 100644 --- a/sql/2020/css/grid_template_areas.sql +++ b/sql/2020/css/grid_template_areas.sql @@ -46,7 +46,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -54,9 +55,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/gridlike_flexbox.sql b/sql/2020/css/gridlike_flexbox.sql index 9bb7d3c5256..3bd35d97442 100644 --- a/sql/2020/css/gridlike_flexbox.sql +++ b/sql/2020/css/gridlike_flexbox.sql @@ -36,7 +36,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -44,9 +45,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/houdini_paint_worklets.sql b/sql/2020/css/houdini_paint_worklets.sql index f114f8a880d..0dcbe2a6e92 100644 --- a/sql/2020/css/houdini_paint_worklets.sql +++ b/sql/2020/css/houdini_paint_worklets.sql @@ -43,7 +43,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getPaintWorklets(css)) AS paint WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, worklet diff --git a/sql/2020/css/i18n_dir_css_elements.sql b/sql/2020/css/i18n_dir_css_elements.sql index 2a86fcc8c5e..fc7ff332bce 100644 --- a/sql/2020/css/i18n_dir_css_elements.sql +++ b/sql/2020/css/i18n_dir_css_elements.sql @@ -74,11 +74,13 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, element, - value) + value +) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/i18n_dir_html_elements.sql b/sql/2020/css/i18n_dir_html_elements.sql index 4d391098a6c..4ef753f1f68 100644 --- a/sql/2020/css/i18n_dir_html_elements.sql +++ b/sql/2020/css/i18n_dir_html_elements.sql @@ -42,7 +42,8 @@ FROM ( GROUP BY client, element, - value) + value +) WHERE freq >= 100 ORDER BY diff --git a/sql/2020/css/i18n_logical_properties.sql b/sql/2020/css/i18n_logical_properties.sql index 58c0664191d..59d94dd6ccd 100644 --- a/sql/2020/css/i18n_logical_properties.sql +++ b/sql/2020/css/i18n_logical_properties.sql @@ -102,10 +102,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - property) + property +) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/i18n_physical_properties.sql b/sql/2020/css/i18n_physical_properties.sql index eeba7bf94d8..819a47001db 100644 --- a/sql/2020/css/i18n_physical_properties.sql +++ b/sql/2020/css/i18n_physical_properties.sql @@ -102,10 +102,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - property) + property +) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/keyword_properties.sql b/sql/2020/css/keyword_properties.sql index 37a2560d61a..2646627af48 100644 --- a/sql/2020/css/keyword_properties.sql +++ b/sql/2020/css/keyword_properties.sql @@ -59,7 +59,8 @@ FROM ( GROUP BY client, keyword, - property) + property +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -67,9 +68,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/keyword_totals.sql b/sql/2020/css/keyword_totals.sql index 91657804a36..61688c1a92c 100644 --- a/sql/2020/css/keyword_totals.sql +++ b/sql/2020/css/keyword_totals.sql @@ -57,7 +57,8 @@ FROM ( GROUP BY client, keyword, - property) + property +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -65,9 +66,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/layout_properties.sql b/sql/2020/css/layout_properties.sql index 0aa9b3f3ad1..b9f6c58c381 100644 --- a/sql/2020/css/layout_properties.sql +++ b/sql/2020/css/layout_properties.sql @@ -63,10 +63,12 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getLayoutUsage(css)) AS layout WHERE - date = '2020-08-01') + date = '2020-08-01' + ) GROUP BY client, - layout) + layout +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -74,9 +76,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) WHERE pages >= 100 ORDER BY diff --git a/sql/2020/css/media_query_features.sql b/sql/2020/css/media_query_features.sql index 25e3c679b9e..058d01889e7 100644 --- a/sql/2020/css/media_query_features.sql +++ b/sql/2020/css/media_query_features.sql @@ -50,7 +50,8 @@ FROM ( UNNEST(getMediaQueryFeatures(css)) AS feature WHERE date = '2020-08-01' AND - feature IS NOT NULL) + feature IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -58,9 +59,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/media_query_properties.sql b/sql/2020/css/media_query_properties.sql index 678c70801e8..48340505cf3 100644 --- a/sql/2020/css/media_query_properties.sql +++ b/sql/2020/css/media_query_properties.sql @@ -44,7 +44,8 @@ FROM ( UNNEST(getMediaQueryProperties(css)) AS property WHERE date = '2020-08-01' AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -52,9 +53,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/media_query_responsive.sql b/sql/2020/css/media_query_responsive.sql index 530c0ef523a..915bfa227c4 100644 --- a/sql/2020/css/media_query_responsive.sql +++ b/sql/2020/css/media_query_responsive.sql @@ -49,7 +49,8 @@ FROM ( UNNEST(getMediaQueryFeatures(css)) AS feature WHERE date = '2020-08-01' AND - feature IS NOT NULL) + feature IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -57,9 +58,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) WHERE REGEXP_CONTAINS(feature, r'(-width|-height|-aspect-ratio)$') GROUP BY diff --git a/sql/2020/css/media_query_values.sql b/sql/2020/css/media_query_values.sql index dc4b304772d..f7043ebd033 100644 --- a/sql/2020/css/media_query_values.sql +++ b/sql/2020/css/media_query_values.sql @@ -48,7 +48,8 @@ FROM ( UNNEST(getMediaQueryValues(css)) AS value WHERE date = '2020-08-01' AND - value IS NOT NULL) + value IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -56,9 +57,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/css/meta_important_adoption.sql b/sql/2020/css/meta_important_adoption.sql index 0499152d50a..67e85377b03 100644 --- a/sql/2020/css/meta_important_adoption.sql +++ b/sql/2020/css/meta_important_adoption.sql @@ -40,17 +40,19 @@ FROM ( page, SAFE_DIVIDE(SUM(properties.important), SUM(properties.total)) AS pct_important FROM ( - SELECT - client, - page, - getImportantProperties(css) AS properties - FROM - `httparchive.almanac.parsed_css` - WHERE - date = '2020-08-01') + SELECT + client, + page, + getImportantProperties(css) AS properties + FROM + `httparchive.almanac.parsed_css` + WHERE + date = '2020-08-01' + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/meta_important_properties.sql b/sql/2020/css/meta_important_properties.sql index ae79fba02f9..83f47a3515a 100644 --- a/sql/2020/css/meta_important_properties.sql +++ b/sql/2020/css/meta_important_properties.sql @@ -48,7 +48,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getImportantProperties(css)) AS important WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, property diff --git a/sql/2020/css/meta_longhand_first_distribution.sql b/sql/2020/css/meta_longhand_first_distribution.sql index c6e97726713..34ffc1e2474 100644 --- a/sql/2020/css/meta_longhand_first_distribution.sql +++ b/sql/2020/css/meta_longhand_first_distribution.sql @@ -457,7 +457,8 @@ FROM ( UNNEST(getLonghandFirstProperties(css)) AS property WHERE date = '2020-08-01' AND - property.freq > 0), + property.freq > 0 +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/meta_longhand_first_pages.sql b/sql/2020/css/meta_longhand_first_pages.sql index ff5e97aa90e..0628cb7dca0 100644 --- a/sql/2020/css/meta_longhand_first_pages.sql +++ b/sql/2020/css/meta_longhand_first_pages.sql @@ -462,6 +462,7 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2020/css/meta_longhand_first_properties.sql b/sql/2020/css/meta_longhand_first_properties.sql index 8cdddadcbe8..f3e6e46884f 100644 --- a/sql/2020/css/meta_longhand_first_properties.sql +++ b/sql/2020/css/meta_longhand_first_properties.sql @@ -461,7 +461,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getLonghandFirstProperties(css)) AS property WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, property diff --git a/sql/2020/css/meta_longhand_properties.sql b/sql/2020/css/meta_longhand_properties.sql index f1f97f842ef..3190fe796eb 100644 --- a/sql/2020/css/meta_longhand_properties.sql +++ b/sql/2020/css/meta_longhand_properties.sql @@ -461,7 +461,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getLonghandProperties(css)) AS property WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, property diff --git a/sql/2020/css/meta_property_pairs.sql b/sql/2020/css/meta_property_pairs.sql index a520578b09b..f1118808a7c 100644 --- a/sql/2020/css/meta_property_pairs.sql +++ b/sql/2020/css/meta_property_pairs.sql @@ -104,7 +104,8 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client, pair diff --git a/sql/2020/css/meta_shorthand_first_pages.sql b/sql/2020/css/meta_shorthand_first_pages.sql index e75713c210d..c39f63b2048 100644 --- a/sql/2020/css/meta_shorthand_first_pages.sql +++ b/sql/2020/css/meta_shorthand_first_pages.sql @@ -462,6 +462,7 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2020/css/meta_shorthand_first_properties.sql b/sql/2020/css/meta_shorthand_first_properties.sql index 0fc0a853818..78815b54063 100644 --- a/sql/2020/css/meta_shorthand_first_properties.sql +++ b/sql/2020/css/meta_shorthand_first_properties.sql @@ -462,7 +462,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getShorthandFirstProperties(css)) AS property WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, property diff --git a/sql/2020/css/meta_shorthand_properties.sql b/sql/2020/css/meta_shorthand_properties.sql index bf5db62340e..d6e0dddaddf 100644 --- a/sql/2020/css/meta_shorthand_properties.sql +++ b/sql/2020/css/meta_shorthand_properties.sql @@ -461,7 +461,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getShorthandProperties(css)) AS property WHERE - date = '2020-08-01') + date = '2020-08-01' +) GROUP BY client, property diff --git a/sql/2020/css/meta_shorthand_values.sql b/sql/2020/css/meta_shorthand_values.sql index 8e4627c0c5d..6c7b452594c 100644 --- a/sql/2020/css/meta_shorthand_values.sql +++ b/sql/2020/css/meta_shorthand_values.sql @@ -459,7 +459,8 @@ FROM ( UNNEST(getShorthandValueCounts(css)) AS shorthand, UNNEST(shorthand.values) AS value WHERE - date = '2020-08-01'), + date = '2020-08-01' +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/meta_unknown_properties.sql b/sql/2020/css/meta_unknown_properties.sql index 23b22db8a58..78904064e7b 100644 --- a/sql/2020/css/meta_unknown_properties.sql +++ b/sql/2020/css/meta_unknown_properties.sql @@ -42,10 +42,10 @@ FROM ( client, property, COUNT(DISTINCT page) OVER (PARTITION BY client, property) AS pages, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages, COUNT(DISTINCT page) OVER (PARTITION BY client, property) / COUNT(DISTINCT page) OVER (PARTITION BY client) AS pct_pages, SUM(freq) OVER (PARTITION BY client, property) AS freq, - SUM(freq) OVER (PARTITION BY client) AS total, + SUM(freq) OVER (PARTITION BY client) AS total_freq, SUM(freq) OVER (PARTITION BY client, property) / SUM(freq) OVER (PARTITION BY client) AS pct FROM ( SELECT @@ -58,7 +58,9 @@ FROM ( UNNEST(getUnknownProperties(css)) AS property WHERE date = '2020-08-01' AND - LENGTH(property.property) > 1)) + LENGTH(property.property) > 1 + ) +) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/multicol.sql b/sql/2020/css/multicol.sql index 4e75efa70fc..4b14ba3f32f 100644 --- a/sql/2020/css/multicol.sql +++ b/sql/2020/css/multicol.sql @@ -29,7 +29,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -37,9 +38,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, total diff --git a/sql/2020/css/repetition.sql b/sql/2020/css/repetition.sql index 7eb0b757f3c..9127eed6042 100644 --- a/sql/2020/css/repetition.sql +++ b/sql/2020/css/repetition.sql @@ -52,10 +52,12 @@ FROM ( FROM `httparchive.almanac.parsed_css` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 95, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/rework_errors.sql b/sql/2020/css/rework_errors.sql index a00579838b0..bf3be7101c6 100644 --- a/sql/2020/css/rework_errors.sql +++ b/sql/2020/css/rework_errors.sql @@ -14,7 +14,8 @@ FROM ( date = '2020-08-01' AND type = 'css' GROUP BY - client) + client +) JOIN ( SELECT client, @@ -25,6 +26,6 @@ JOIN ( date = '2020-08-01' AND url != 'inline' GROUP BY - client) -USING - (client) + client +) +USING (client) diff --git a/sql/2020/css/sass_animated_custom_properties.sql b/sql/2020/css/sass_animated_custom_properties.sql index 6cf03c6f13f..e87e5cfef2b 100644 --- a/sql/2020/css/sass_animated_custom_properties.sql +++ b/sql/2020/css/sass_animated_custom_properties.sql @@ -77,7 +77,8 @@ FROM ( `httparchive.almanac.parsed_css`, UNNEST(getAnimatedCustomProperties(css)) AS prop WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -85,8 +86,8 @@ JOIN ( prop FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomPropertiesWithComputedStyle(payload)) AS prop) -USING - (client, page, prop) + UNNEST(getCustomPropertiesWithComputedStyle(payload)) AS prop +) +USING (client, page, prop) GROUP BY client diff --git a/sql/2020/css/sass_combined_variables_distribution.sql b/sql/2020/css/sass_combined_variables_distribution.sql index 5a0814e3690..6afbb9938df 100644 --- a/sql/2020/css/sass_combined_variables_distribution.sql +++ b/sql/2020/css/sass_combined_variables_distribution.sql @@ -26,7 +26,8 @@ FROM ( var.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(countCombinedVariables(payload)) AS var), + UNNEST(countCombinedVariables(payload)) AS var +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/sass_combined_variables_pages.sql b/sql/2020/css/sass_combined_variables_pages.sql index 2e52d9e185b..2fad88e8270 100644 --- a/sql/2020/css/sass_combined_variables_pages.sql +++ b/sql/2020/css/sass_combined_variables_pages.sql @@ -29,7 +29,8 @@ FROM ( var.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(countCombinedVariables(payload)) AS var) + UNNEST(countCombinedVariables(payload)) AS var +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -37,9 +38,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, usage, diff --git a/sql/2020/css/sass_control_flow_statements.sql b/sql/2020/css/sass_control_flow_statements.sql index 3677ce37f76..d3f84ae7543 100644 --- a/sql/2020/css/sass_control_flow_statements.sql +++ b/sql/2020/css/sass_control_flow_statements.sql @@ -46,7 +46,8 @@ FROM ( GROUP BY client, page, - statement) + statement +) GROUP BY client, statement diff --git a/sql/2020/css/sass_custom_function_calls.sql b/sql/2020/css/sass_custom_function_calls.sql index 4708f2a16a9..d5a202bb58c 100644 --- a/sql/2020/css/sass_custom_function_calls.sql +++ b/sql/2020/css/sass_custom_function_calls.sql @@ -32,10 +32,12 @@ FROM ( fn.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomFunctionCalls(payload)) AS fn) + UNNEST(getCustomFunctionCalls(payload)) AS fn + ) GROUP BY client, - fn) + fn +) WHERE freq >= 1000 ORDER BY diff --git a/sql/2020/css/sass_custom_function_names.sql b/sql/2020/css/sass_custom_function_names.sql index 4ee99f59a64..0cf5685fada 100644 --- a/sql/2020/css/sass_custom_function_names.sql +++ b/sql/2020/css/sass_custom_function_names.sql @@ -26,7 +26,8 @@ FROM ( sass_custom_function FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getCustomFunctionNames(payload)) AS sass_custom_function) + UNNEST(getCustomFunctionNames(payload)) AS sass_custom_function +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -34,9 +35,9 @@ JOIN ( FROM `httparchive.pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, sass_custom_function, diff --git a/sql/2020/css/sass_custom_functions.sql b/sql/2020/css/sass_custom_functions.sql index 3f2768e0c65..b455376b20f 100644 --- a/sql/2020/css/sass_custom_functions.sql +++ b/sql/2020/css/sass_custom_functions.sql @@ -26,7 +26,8 @@ FROM ( `httparchive.pages.2020_08_01_*` GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/sass_function_calls.sql b/sql/2020/css/sass_function_calls.sql index 383ce5db18d..cbbdd6bc02f 100644 --- a/sql/2020/css/sass_function_calls.sql +++ b/sql/2020/css/sass_function_calls.sql @@ -32,10 +32,12 @@ FROM ( fn.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getFunctionCalls(payload)) AS fn) + UNNEST(getFunctionCalls(payload)) AS fn + ) GROUP BY client, - fn) + fn +) WHERE freq >= 1000 ORDER BY diff --git a/sql/2020/css/sass_mixin_calls.sql b/sql/2020/css/sass_mixin_calls.sql index c9e42265a9b..c3b1ba33291 100644 --- a/sql/2020/css/sass_mixin_calls.sql +++ b/sql/2020/css/sass_mixin_calls.sql @@ -32,10 +32,12 @@ FROM ( mixin.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getMixinUsage(payload)) AS mixin) + UNNEST(getMixinUsage(payload)) AS mixin + ) GROUP BY client, - mixin) + mixin +) WHERE freq >= 1000 ORDER BY diff --git a/sql/2020/css/sass_mixin_names.sql b/sql/2020/css/sass_mixin_names.sql index e9016492c64..a404567bee8 100644 --- a/sql/2020/css/sass_mixin_names.sql +++ b/sql/2020/css/sass_mixin_names.sql @@ -26,7 +26,8 @@ FROM ( mixin FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getMixinNames(payload)) AS mixin) + UNNEST(getMixinNames(payload)) AS mixin +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -34,9 +35,9 @@ JOIN ( FROM `httparchive.pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, mixin, diff --git a/sql/2020/css/sass_nesting.sql b/sql/2020/css/sass_nesting.sql index b94ede73834..8845a8098ed 100644 --- a/sql/2020/css/sass_nesting.sql +++ b/sql/2020/css/sass_nesting.sql @@ -40,7 +40,8 @@ FROM ( GROUP BY client, page, - nested) + nested +) GROUP BY client, nested diff --git a/sql/2020/css/sass_variable_usage.sql b/sql/2020/css/sass_variable_usage.sql index 9be83c04fb0..b5675755d3f 100644 --- a/sql/2020/css/sass_variable_usage.sql +++ b/sql/2020/css/sass_variable_usage.sql @@ -31,7 +31,8 @@ FROM ( variable.freq FROM `httparchive.pages.2020_08_01_*`, - UNNEST(getVariableUsage(payload)) AS variable) + UNNEST(getVariableUsage(payload)) AS variable +) GROUP BY client, variable diff --git a/sql/2020/css/selector_parts_freq_per_page.sql b/sql/2020/css/selector_parts_freq_per_page.sql index 2a5ddf01c06..08b26a471ec 100644 --- a/sql/2020/css/selector_parts_freq_per_page.sql +++ b/sql/2020/css/selector_parts_freq_per_page.sql @@ -98,7 +98,8 @@ FROM ( SUM(class.value) OVER (PARTITION BY client, class.name) AS class_freq FROM selector_parts, - UNNEST(parts.class) AS class) + UNNEST(parts.class) AS class +) JOIN ( SELECT client, @@ -106,9 +107,9 @@ JOIN ( SUM(id.value) OVER (PARTITION BY client, id.name) AS id_freq FROM selector_parts, - UNNEST(parts.id) AS id) -USING - (client) + UNNEST(parts.id) AS id +) +USING (client) JOIN ( SELECT client, @@ -116,9 +117,9 @@ JOIN ( SUM(attribute.value) OVER (PARTITION BY client, attribute.name) AS attribute_freq FROM selector_parts, - UNNEST(parts.attribute) AS attribute) -USING - (client) + UNNEST(parts.attribute) AS attribute +) +USING (client) JOIN ( SELECT client, @@ -126,9 +127,9 @@ JOIN ( SUM(pseudo_class.value) OVER (PARTITION BY client, pseudo_class.name) AS pseudo_class_freq FROM selector_parts, - UNNEST(parts.pseudo_class) AS pseudo_class) -USING - (client) + UNNEST(parts.pseudo_class) AS pseudo_class +) +USING (client) JOIN ( SELECT client, @@ -136,8 +137,8 @@ JOIN ( SUM(pseudo_element.value) OVER (PARTITION BY client, pseudo_element.name) AS pseudo_element_freq FROM selector_parts, - UNNEST(parts.pseudo_element) AS pseudo_element) -USING - (client) + UNNEST(parts.pseudo_element) AS pseudo_element +) +USING (client) GROUP BY client diff --git a/sql/2020/css/selectors.sql b/sql/2020/css/selectors.sql index 1e9a7f82256..bee7408e348 100644 --- a/sql/2020/css/selectors.sql +++ b/sql/2020/css/selectors.sql @@ -100,10 +100,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. This loses ~20% of stylesheets. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/sourcemap_adoption.sql b/sql/2020/css/sourcemap_adoption.sql index 2e3b5e6abcd..52abe12a0bf 100644 --- a/sql/2020/css/sourcemap_adoption.sql +++ b/sql/2020/css/sourcemap_adoption.sql @@ -20,6 +20,7 @@ FROM ( _TABLE_SUFFIX AS client, countSourcemaps(payload) > 0 AS has_sourcemap FROM - `httparchive.pages.2020_08_01_*`) + `httparchive.pages.2020_08_01_*` +) GROUP BY client diff --git a/sql/2020/css/specificity.sql b/sql/2020/css/specificity.sql index 5025bc724ed..55b64d05959 100644 --- a/sql/2020/css/specificity.sql +++ b/sql/2020/css/specificity.sql @@ -109,13 +109,16 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024), + LENGTH(css) < 0.1 * 1024 * 1024 + ), UNNEST(info.distribution) AS bin WHERE - bin.specificity_cmp IS NOT NULL) + bin.specificity_cmp IS NOT NULL + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 95, 99, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/specificity_hacks.sql b/sql/2020/css/specificity_hacks.sql index d2e32b3aa09..d63e328f0ba 100644 --- a/sql/2020/css/specificity_hacks.sql +++ b/sql/2020/css/specificity_hacks.sql @@ -128,10 +128,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/stylesheet_count.sql b/sql/2020/css/stylesheet_count.sql index a4090822dfc..c42001871bb 100644 --- a/sql/2020/css/stylesheet_count.sql +++ b/sql/2020/css/stylesheet_count.sql @@ -22,7 +22,8 @@ FROM ( url, getStylesheets(payload) AS stylesheets FROM - `httparchive.pages.2020_08_01_*`), + `httparchive.pages.2020_08_01_*` +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/css/stylesheet_count_one_remote.sql b/sql/2020/css/stylesheet_count_one_remote.sql index d2a70eb7728..487390ca25e 100644 --- a/sql/2020/css/stylesheet_count_one_remote.sql +++ b/sql/2020/css/stylesheet_count_one_remote.sql @@ -21,7 +21,8 @@ FROM ( url, getStylesheets(payload) AS stylesheets FROM - `httparchive.pages.2020_08_01_*`) + `httparchive.pages.2020_08_01_*` +) GROUP BY client ORDER BY diff --git a/sql/2020/css/top_selector_attributes.sql b/sql/2020/css/top_selector_attributes.sql index 47aa9bb37c5..aa44cd7050b 100644 --- a/sql/2020/css/top_selector_attributes.sql +++ b/sql/2020/css/top_selector_attributes.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(attribute, 100) AS attributes FROM ( - SELECT DISTINCT - client, - page, - attribute - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).attribute) AS attribute - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + attribute + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).attribute) AS attribute + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(attributes) AS attribute WHERE attribute.value IS NOT NULL diff --git a/sql/2020/css/top_selector_class_prefixes.sql b/sql/2020/css/top_selector_class_prefixes.sql index e51fcd1c546..f2a75969bfa 100644 --- a/sql/2020/css/top_selector_class_prefixes.sql +++ b/sql/2020/css/top_selector_class_prefixes.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(class_prefix, 200) AS class_prefixes FROM ( - SELECT DISTINCT - client, - page, - IF(LENGTH(class) > LENGTH(REGEXP_EXTRACT(class, r'^([^-]+)')), REGEXP_REPLACE(class, r'^([^-]+).*', r'\1-*'), class) AS class_prefix - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).class) AS class - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + IF(LENGTH(class) > LENGTH(REGEXP_EXTRACT(class, r'^([^-]+)')), REGEXP_REPLACE(class, r'^([^-]+).*', r'\1-*'), class) AS class_prefix + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).class) AS class + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(class_prefixes) AS class_prefix WHERE class_prefix.value IS NOT NULL diff --git a/sql/2020/css/top_selector_classes.sql b/sql/2020/css/top_selector_classes.sql index dcb4c061e7b..da7b614c346 100644 --- a/sql/2020/css/top_selector_classes.sql +++ b/sql/2020/css/top_selector_classes.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(class, 100) AS classes FROM ( - SELECT DISTINCT - client, - page, - class - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).class) AS class - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + class + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).class) AS class + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(classes) AS class WHERE class.value IS NOT NULL diff --git a/sql/2020/css/top_selector_classes_wp_fa_prefixes.sql b/sql/2020/css/top_selector_classes_wp_fa_prefixes.sql index e94153529c9..a147f281b35 100644 --- a/sql/2020/css/top_selector_classes_wp_fa_prefixes.sql +++ b/sql/2020/css/top_selector_classes_wp_fa_prefixes.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(class_prefix, 200) AS class_prefixes FROM ( - SELECT DISTINCT - client, - page, - IF(REGEXP_CONTAINS(class, r'^(wp|fa)-.+'), REGEXP_REPLACE(class, r'^([^-]+).*', r'\1-*'), class) AS class_prefix - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).class) AS class - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + IF(REGEXP_CONTAINS(class, r'^(wp|fa)-.+'), REGEXP_REPLACE(class, r'^([^-]+).*', r'\1-*'), class) AS class_prefix + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).class) AS class + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(class_prefixes) AS class_prefix WHERE class_prefix.value IS NOT NULL diff --git a/sql/2020/css/top_selector_ids.sql b/sql/2020/css/top_selector_ids.sql index d06a4e629c1..ee211846392 100644 --- a/sql/2020/css/top_selector_ids.sql +++ b/sql/2020/css/top_selector_ids.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(id, 100) AS ids FROM ( - SELECT DISTINCT - client, - page, - id - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).id) AS id - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + id + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).id) AS id + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(ids) AS id WHERE id.value IS NOT NULL diff --git a/sql/2020/css/top_selector_pseudo_classes.sql b/sql/2020/css/top_selector_pseudo_classes.sql index 840dbb6758a..876c75b9d25 100644 --- a/sql/2020/css/top_selector_pseudo_classes.sql +++ b/sql/2020/css/top_selector_pseudo_classes.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(pseudo_class, 100) AS pseudo_classes FROM ( - SELECT DISTINCT - client, - page, - pseudo_class - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).pseudo_class) AS pseudo_class - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + pseudo_class + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).pseudo_class) AS pseudo_class + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(pseudo_classes) AS pseudo_class WHERE pseudo_class.value IS NOT NULL diff --git a/sql/2020/css/top_selector_pseudo_elements.sql b/sql/2020/css/top_selector_pseudo_elements.sql index 9a986959756..9060545cc65 100644 --- a/sql/2020/css/top_selector_pseudo_elements.sql +++ b/sql/2020/css/top_selector_pseudo_elements.sql @@ -69,20 +69,22 @@ FROM ( COUNT(DISTINCT page) AS pages, APPROX_TOP_COUNT(pseudo_element, 100) AS pseudo_elements FROM ( - SELECT DISTINCT - client, - page, - pseudo_element - FROM - `httparchive.almanac.parsed_css` - LEFT JOIN - UNNEST(getSelectorParts(css).pseudo_element) AS pseudo_element - WHERE - date = '2020-08-01' AND - # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + SELECT DISTINCT + client, + page, + pseudo_element + FROM + `httparchive.almanac.parsed_css` + LEFT JOIN + UNNEST(getSelectorParts(css).pseudo_element) AS pseudo_element + WHERE + date = '2020-08-01' AND + # Limit the size of the CSS to avoid OOM crashes. + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY - client), + client +), UNNEST(pseudo_elements) AS pseudo_element WHERE pseudo_element.value IS NOT NULL diff --git a/sql/2020/css/transition_animation_names.sql b/sql/2020/css/transition_animation_names.sql index 2f1f8f1c703..d7e0264e82a 100644 --- a/sql/2020/css/transition_animation_names.sql +++ b/sql/2020/css/transition_animation_names.sql @@ -116,10 +116,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - animation_name) + animation_name +) WHERE pct >= 0.001 ORDER BY diff --git a/sql/2020/css/transition_properties.sql b/sql/2020/css/transition_properties.sql index 45809fb402d..3ef7da0ba9d 100644 --- a/sql/2020/css/transition_properties.sql +++ b/sql/2020/css/transition_properties.sql @@ -113,7 +113,8 @@ FROM ( date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. LENGTH(css) < 0.1 * 1024 * 1024 AND - property IS NOT NULL) + property IS NOT NULL +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -121,9 +122,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, property, diff --git a/sql/2020/css/transition_timing_functions.sql b/sql/2020/css/transition_timing_functions.sql index 508eb9d980f..f6d4d9c56d5 100644 --- a/sql/2020/css/transition_timing_functions.sql +++ b/sql/2020/css/transition_timing_functions.sql @@ -119,10 +119,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - fn) + fn +) WHERE pct >= 0.01 ORDER BY diff --git a/sql/2020/css/units_frequency.sql b/sql/2020/css/units_frequency.sql index 2b97e699c42..511d27dc29a 100644 --- a/sql/2020/css/units_frequency.sql +++ b/sql/2020/css/units_frequency.sql @@ -117,10 +117,12 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, - unit) + unit +) WHERE freq >= 1000 ORDER BY diff --git a/sql/2020/css/units_properties.sql b/sql/2020/css/units_properties.sql index 6839453fd65..54578619eb3 100644 --- a/sql/2020/css/units_properties.sql +++ b/sql/2020/css/units_properties.sql @@ -118,11 +118,13 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 + ) GROUP BY client, unit, - property) + property +) WHERE total >= 1000 AND pct >= 0.01 diff --git a/sql/2020/css/units_unitless_zero.sql b/sql/2020/css/units_unitless_zero.sql index 28ae7766766..3858eee53a6 100644 --- a/sql/2020/css/units_unitless_zero.sql +++ b/sql/2020/css/units_unitless_zero.sql @@ -105,6 +105,7 @@ FROM ( WHERE date = '2020-08-01' AND # Limit the size of the CSS to avoid OOM crashes. - LENGTH(css) < 0.1 * 1024 * 1024) + LENGTH(css) < 0.1 * 1024 * 1024 +) GROUP BY client diff --git a/sql/2020/css/units_zero.sql b/sql/2020/css/units_zero.sql index 44123b346a4..300775ddf85 100644 --- a/sql/2020/css/units_zero.sql +++ b/sql/2020/css/units_zero.sql @@ -111,7 +111,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - unit) + unit +) WHERE freq >= 1000 ORDER BY diff --git a/sql/2020/css/vendor_prefix_functions.sql b/sql/2020/css/vendor_prefix_functions.sql index 52296482da2..2b7846e6d9b 100644 --- a/sql/2020/css/vendor_prefix_functions.sql +++ b/sql/2020/css/vendor_prefix_functions.sql @@ -99,7 +99,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - function) + function +) ORDER BY pct DESC LIMIT 500 diff --git a/sql/2020/css/vendor_prefix_keywords.sql b/sql/2020/css/vendor_prefix_keywords.sql index 938c4263b40..6de3c90a3d4 100644 --- a/sql/2020/css/vendor_prefix_keywords.sql +++ b/sql/2020/css/vendor_prefix_keywords.sql @@ -101,7 +101,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - keyword) + keyword +) ORDER BY pct DESC LIMIT 500 diff --git a/sql/2020/css/vendor_prefix_media.sql b/sql/2020/css/vendor_prefix_media.sql index 33ed4ed914d..22ac5e1f938 100644 --- a/sql/2020/css/vendor_prefix_media.sql +++ b/sql/2020/css/vendor_prefix_media.sql @@ -99,7 +99,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - media) + media +) ORDER BY pct DESC LIMIT 500 diff --git a/sql/2020/css/vendor_prefix_properties.sql b/sql/2020/css/vendor_prefix_properties.sql index 4b3c5fd8957..d7599582375 100644 --- a/sql/2020/css/vendor_prefix_properties.sql +++ b/sql/2020/css/vendor_prefix_properties.sql @@ -97,7 +97,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - prop) + prop +) WHERE pages >= 1000 ORDER BY diff --git a/sql/2020/css/vendor_prefix_pseudo_classes.sql b/sql/2020/css/vendor_prefix_pseudo_classes.sql index 52876d4d7d9..a55a15da3ca 100644 --- a/sql/2020/css/vendor_prefix_pseudo_classes.sql +++ b/sql/2020/css/vendor_prefix_pseudo_classes.sql @@ -99,7 +99,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - pseudo_class) + pseudo_class +) ORDER BY pct DESC LIMIT 500 diff --git a/sql/2020/css/vendor_prefix_pseudo_elements.sql b/sql/2020/css/vendor_prefix_pseudo_elements.sql index e35f98d42ea..de7013063a3 100644 --- a/sql/2020/css/vendor_prefix_pseudo_elements.sql +++ b/sql/2020/css/vendor_prefix_pseudo_elements.sql @@ -99,7 +99,8 @@ FROM ( LENGTH(css) < 0.1 * 1024 * 1024 GROUP BY client, - pseudo_element) + pseudo_element +) ORDER BY pct DESC LIMIT 500 diff --git a/sql/2020/css/vendor_prefix_summary.sql b/sql/2020/css/vendor_prefix_summary.sql index 302660692bb..29331cfa5d4 100644 --- a/sql/2020/css/vendor_prefix_summary.sql +++ b/sql/2020/css/vendor_prefix_summary.sql @@ -113,7 +113,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - prop) + prop +) WHERE pages >= 1000 ORDER BY diff --git a/sql/2020/ecommerce/android_ios_app_links_ecomm_sites.sql b/sql/2020/ecommerce/android_ios_app_links_ecomm_sites.sql index 43c74979684..6e68009cd16 100644 --- a/sql/2020/ecommerce/android_ios_app_links_ecomm_sites.sql +++ b/sql/2020/ecommerce/android_ios_app_links_ecomm_sites.sql @@ -15,7 +15,8 @@ FROM ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'Ecommerce') + category = 'Ecommerce' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -23,9 +24,9 @@ JOIN ( JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._ecommerce'), '$.AndroidAppLinks') = '1' AS android_app_links, JSON_EXTRACT(JSON_EXTRACT_SCALAR(payload, '$._ecommerce'), '$.iOSUniveralLinks') = '1' AS ios_universal_links FROM - `httparchive.pages.2020_08_01_*`) -USING - (client, url) + `httparchive.pages.2020_08_01_*` +) +USING (client, url) GROUP BY client ORDER BY diff --git a/sql/2020/ecommerce/core_web_vitals_distribution_byvendor_bydevice.sql b/sql/2020/ecommerce/core_web_vitals_distribution_byvendor_bydevice.sql index 6fa9dffca34..6dbabd3afb5 100644 --- a/sql/2020/ecommerce/core_web_vitals_distribution_byvendor_bydevice.sql +++ b/sql/2020/ecommerce/core_web_vitals_distribution_byvendor_bydevice.sql @@ -29,12 +29,11 @@ JOIN ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) - ) +) ON CONCAT(origin, '/') = url AND IF(device = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2020/ecommerce/core_web_vitals_passingmetrics_byvendor_bydevice.sql b/sql/2020/ecommerce/core_web_vitals_passingmetrics_byvendor_bydevice.sql index a4e310dc3b3..bc636286424 100644 --- a/sql/2020/ecommerce/core_web_vitals_passingmetrics_byvendor_bydevice.sql +++ b/sql/2020/ecommerce/core_web_vitals_passingmetrics_byvendor_bydevice.sql @@ -1,10 +1,10 @@ #standardSQL # CrUX Core Web Vitals performance of Ecommerce vendors by device -CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good / (good + needs_improvement + poor) >= 0.75 ); -CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good + needs_improvement + poor > 0 ); @@ -16,28 +16,34 @@ SELECT # Origins with good LCP divided by origins with any LCP. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL)) + ) AS pct_good_lcp, # Origins with good FID divided by origins with any FID. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL)) + ) AS pct_good_fid, # Origins with good CLS divided by origins with any CLS. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls, + COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL)) + ) AS pct_good_cls, # Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS. SAFE_DIVIDE( COUNT(DISTINCT IF( IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND IS_GOOD(fast_fid, avg_fid, slow_fid) AND - IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), + IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL + )), COUNT(DISTINCT IF( IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND - IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv + IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL + )) + ) AS pct_good_cwv FROM `chrome-ux-report.materialized.device_summary` JOIN ( @@ -48,12 +54,11 @@ JOIN ( FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) - ) +) ON CONCAT(origin, '/') = url AND IF(device = 'desktop', 'desktop', 'mobile') = client diff --git a/sql/2020/ecommerce/ecomm_covid_growth.sql b/sql/2020/ecommerce/ecomm_covid_growth.sql index 4baed50a3b3..32d2eff21ca 100644 --- a/sql/2020/ecommerce/ecomm_covid_growth.sql +++ b/sql/2020/ecommerce/ecomm_covid_growth.sql @@ -10,16 +10,15 @@ SELECT LEFT(_TABLE_SUFFIX, 2) AS month FROM `httparchive.technologies.2020_*` -JOIN - ( - SELECT - _TABLE_SUFFIX, - COUNT(DISTINCT url) AS total - FROM - `httparchive.summary_pages.2020_*` - GROUP BY - _TABLE_SUFFIX - ) +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(DISTINCT url) AS total + FROM + `httparchive.summary_pages.2020_*` + GROUP BY + _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' @@ -40,16 +39,15 @@ SELECT LEFT(_TABLE_SUFFIX, 2) AS month FROM `httparchive.technologies.2019_*` -JOIN - ( - SELECT - _TABLE_SUFFIX, - COUNT(DISTINCT url) AS total - FROM - `httparchive.summary_pages.2019_*` - GROUP BY - _TABLE_SUFFIX - ) +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(DISTINCT url) AS total + FROM + `httparchive.summary_pages.2019_*` + GROUP BY + _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' diff --git a/sql/2020/ecommerce/ecomm_vendors_covid_growth.sql b/sql/2020/ecommerce/ecomm_vendors_covid_growth.sql index a76a3bd7574..70cba407e2d 100644 --- a/sql/2020/ecommerce/ecomm_vendors_covid_growth.sql +++ b/sql/2020/ecommerce/ecomm_vendors_covid_growth.sql @@ -12,20 +12,18 @@ SELECT SUBSTR(_TABLE_SUFFIX, 6, 2) AS month FROM `httparchive.technologies.*` -JOIN - ( - SELECT - _TABLE_SUFFIX, - COUNT(DISTINCT url) AS total - FROM - `httparchive.summary_pages.*` - GROUP BY - _TABLE_SUFFIX - ) +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(DISTINCT url) AS total + FROM + `httparchive.summary_pages.*` + GROUP BY + _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) diff --git a/sql/2020/ecommerce/median_lighthouse_score_ecommsites.sql b/sql/2020/ecommerce/median_lighthouse_score_ecommsites.sql index 2eab126257e..844af887c06 100644 --- a/sql/2020/ecommerce/median_lighthouse_score_ecommsites.sql +++ b/sql/2020/ecommerce/median_lighthouse_score_ecommsites.sql @@ -13,11 +13,9 @@ FROM `httparchive.lighthouse.2020_09_01_mobile` JOIN `httparchive.technologies.2020_09_01_mobile` -USING - (url) +USING (url) WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) diff --git a/sql/2020/ecommerce/pagestats_html_bydevice.sql b/sql/2020/ecommerce/pagestats_html_bydevice.sql index c64950b58d7..135eba7dc4f 100644 --- a/sql/2020/ecommerce/pagestats_html_bydevice.sql +++ b/sql/2020/ecommerce/pagestats_html_bydevice.sql @@ -13,8 +13,7 @@ JOIN `httparchive.technologies.2020_08_01_*` USING (_TABLE_SUFFIX, url) WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) diff --git a/sql/2020/ecommerce/pagestats_image_bydevice.sql b/sql/2020/ecommerce/pagestats_image_bydevice.sql index 2e2950f8131..6620db9faf4 100644 --- a/sql/2020/ecommerce/pagestats_image_bydevice.sql +++ b/sql/2020/ecommerce/pagestats_image_bydevice.sql @@ -12,7 +12,8 @@ JOIN ( _TABLE_SUFFIX, url FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') + WHERE category = 'Ecommerce' +) USING (_TABLE_SUFFIX, url), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY diff --git a/sql/2020/ecommerce/pagestats_image_bydevice_format.sql b/sql/2020/ecommerce/pagestats_image_bydevice_format.sql index 5e39450ade2..10e5bcbc97c 100644 --- a/sql/2020/ecommerce/pagestats_image_bydevice_format.sql +++ b/sql/2020/ecommerce/pagestats_image_bydevice_format.sql @@ -8,8 +8,7 @@ SELECT ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct FROM `httparchive.almanac.summary_requests` -JOIN - (SELECT DISTINCT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` WHERE category = 'Ecommerce') +JOIN (SELECT DISTINCT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` WHERE category = 'Ecommerce') USING (client, page) WHERE type = 'image' diff --git a/sql/2020/ecommerce/pagestats_percentile_bydevice_format.sql b/sql/2020/ecommerce/pagestats_percentile_bydevice_format.sql index 68cdc5a9d7d..7b6d895e8ce 100644 --- a/sql/2020/ecommerce/pagestats_percentile_bydevice_format.sql +++ b/sql/2020/ecommerce/pagestats_percentile_bydevice_format.sql @@ -19,15 +19,16 @@ FROM ( _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2020-08-01' GROUP BY client, type, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/ecommerce/pagestats_percentiles_bydevice.sql b/sql/2020/ecommerce/pagestats_percentiles_bydevice.sql index ca88f3137b4..e96e8fddfb3 100644 --- a/sql/2020/ecommerce/pagestats_percentiles_bydevice.sql +++ b/sql/2020/ecommerce/pagestats_percentiles_bydevice.sql @@ -17,14 +17,15 @@ FROM ( _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2020-08-01' GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/ecommerce/pct_3pusage_bydevice.sql b/sql/2020/ecommerce/pct_3pusage_bydevice.sql index c52f71fb0df..42d8f5fcfd3 100644 --- a/sql/2020/ecommerce/pct_3pusage_bydevice.sql +++ b/sql/2020/ecommerce/pct_3pusage_bydevice.sql @@ -15,18 +15,20 @@ FROM ( JOIN ( SELECT DISTINCT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2020-08-01' AND - NET.HOST(url) IN - (SELECT domain - FROM `httparchive.almanac.third_parties` - WHERE category != 'hosting') + NET.HOST(url) IN ( + SELECT domain + FROM `httparchive.almanac.third_parties` + WHERE category != 'hosting' + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/ecommerce/pct_3pusage_bydevice_vendor.sql b/sql/2020/ecommerce/pct_3pusage_bydevice_vendor.sql index 754a8953fb4..9a9be5f2568 100644 --- a/sql/2020/ecommerce/pct_3pusage_bydevice_vendor.sql +++ b/sql/2020/ecommerce/pct_3pusage_bydevice_vendor.sql @@ -18,20 +18,22 @@ FROM ( JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, app FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') - USING - (client, page) + WHERE category = 'Ecommerce' + ) + USING (client, page) WHERE date = '2020-08-01' AND - NET.HOST(url) IN - (SELECT domain - FROM `httparchive.almanac.third_parties` + NET.HOST(url) IN ( + SELECT domain + FROM `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND - category != 'hosting') + category != 'hosting' + ) GROUP BY client, app, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY client, diff --git a/sql/2020/ecommerce/pct_3pusage_bydevice_vendor_category.sql b/sql/2020/ecommerce/pct_3pusage_bydevice_vendor_category.sql index 74c749300f9..dd883e93655 100644 --- a/sql/2020/ecommerce/pct_3pusage_bydevice_vendor_category.sql +++ b/sql/2020/ecommerce/pct_3pusage_bydevice_vendor_category.sql @@ -20,9 +20,9 @@ FROM ( url AS page FROM `httparchive.technologies.2020_08_01_*` WHERE - category = 'Ecommerce') - USING - (client, page) + category = 'Ecommerce' + ) + USING (client, page) JOIN `httparchive.almanac.third_parties` ON @@ -32,7 +32,8 @@ FROM ( GROUP BY client, category, - page), + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY diff --git a/sql/2020/ecommerce/pct_ampusage_bydevice_vendor.sql b/sql/2020/ecommerce/pct_ampusage_bydevice_vendor.sql index 980a34f10fd..b456bbea4fe 100644 --- a/sql/2020/ecommerce/pct_ampusage_bydevice_vendor.sql +++ b/sql/2020/ecommerce/pct_ampusage_bydevice_vendor.sql @@ -16,8 +16,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -29,8 +28,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE app = 'AMP' GROUP BY diff --git a/sql/2020/ecommerce/pct_ecommsites_bydevice_compare20192020.sql b/sql/2020/ecommerce/pct_ecommsites_bydevice_compare20192020.sql index 427d57c6136..1118db1f00f 100644 --- a/sql/2020/ecommerce/pct_ecommsites_bydevice_compare20192020.sql +++ b/sql/2020/ecommerce/pct_ecommsites_bydevice_compare20192020.sql @@ -17,9 +17,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' GROUP BY @@ -41,9 +41,9 @@ JOIN ( FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE category = 'Ecommerce' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_a11y_solutions.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_a11y_solutions.sql index 4d0ff59b612..eb2c251300c 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_a11y_solutions.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_a11y_solutions.sql @@ -16,8 +16,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -29,8 +28,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Accessibility' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_cmp.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_cmp.sql index c370fc4a8f2..d675dcb0f50 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_cmp.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_cmp.sql @@ -16,8 +16,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -29,8 +28,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Cookie compliance' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_each_a11y_solutions.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_each_a11y_solutions.sql index a0a1f415974..9065ad25f03 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_each_a11y_solutions.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_each_a11y_solutions.sql @@ -17,8 +17,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -30,8 +29,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Accessibility' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_each_cmp.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_each_cmp.sql index 86c594f2588..a93ff86e5fb 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_each_cmp.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_each_cmp.sql @@ -17,8 +17,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -30,8 +29,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Cookie compliance' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_each_payment_processors.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_each_payment_processors.sql index 5494592241a..728a8d1d9e1 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_each_payment_processors.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_each_payment_processors.sql @@ -17,8 +17,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -30,8 +29,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Payment processors' GROUP BY diff --git a/sql/2020/ecommerce/percent_of_ecommsites_using_each_tag_managers.sql b/sql/2020/ecommerce/percent_of_ecommsites_using_each_tag_managers.sql index f7e30637971..c0aabb644b3 100644 --- a/sql/2020/ecommerce/percent_of_ecommsites_using_each_tag_managers.sql +++ b/sql/2020/ecommerce/percent_of_ecommsites_using_each_tag_managers.sql @@ -17,8 +17,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -30,8 +29,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Tag managers' GROUP BY diff --git a/sql/2020/ecommerce/top_adplatform_bydevice_vendor.sql b/sql/2020/ecommerce/top_adplatform_bydevice_vendor.sql index 5b7a58df909..8ff400a78da 100644 --- a/sql/2020/ecommerce/top_adplatform_bydevice_vendor.sql +++ b/sql/2020/ecommerce/top_adplatform_bydevice_vendor.sql @@ -16,9 +16,9 @@ JOIN ( _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') -USING - (client, page) + WHERE category = 'Ecommerce' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` ON @@ -26,9 +26,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE `httparchive.almanac.summary_requests`.date = '2020-08-01' AND LOWER(category) = 'ad' diff --git a/sql/2020/ecommerce/top_adplatform_bydevice_vendor_wapp.sql b/sql/2020/ecommerce/top_adplatform_bydevice_vendor_wapp.sql index ef9cbb843ab..ad6de591e30 100644 --- a/sql/2020/ecommerce/top_adplatform_bydevice_vendor_wapp.sql +++ b/sql/2020/ecommerce/top_adplatform_bydevice_vendor_wapp.sql @@ -9,19 +9,17 @@ SELECT ROUND(COUNTIF(category = 'Advertising') * 100 / SUM(COUNT(0)) OVER (PARTITION BY vendor), 2) AS pct FROM `httparchive.technologies.2020_08_01_*` -JOIN - ( - SELECT - _TABLE_SUFFIX AS client, - url, - app AS vendor - FROM - `httparchive.technologies.2020_08_01_*` - WHERE - category = 'Ecommerce' - ) -USING - (url) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url, + app AS vendor + FROM + `httparchive.technologies.2020_08_01_*` + WHERE + category = 'Ecommerce' +) +USING (url) GROUP BY client, vendor, app HAVING diff --git a/sql/2020/ecommerce/top_analytics_bydevice_vendor.sql b/sql/2020/ecommerce/top_analytics_bydevice_vendor.sql index 3ba101aed9a..d979e721c3e 100644 --- a/sql/2020/ecommerce/top_analytics_bydevice_vendor.sql +++ b/sql/2020/ecommerce/top_analytics_bydevice_vendor.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') -USING - (client, page) + WHERE category = 'Ecommerce' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` ON @@ -26,9 +26,9 @@ JOIN ( _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE `httparchive.almanac.summary_requests`.date = '2020-08-01' AND LOWER(category) = 'analytics' diff --git a/sql/2020/ecommerce/top_analytics_providers_bydevice_wapp.sql b/sql/2020/ecommerce/top_analytics_providers_bydevice_wapp.sql index 5ca3b7fb46a..747fd778f66 100644 --- a/sql/2020/ecommerce/top_analytics_providers_bydevice_wapp.sql +++ b/sql/2020/ecommerce/top_analytics_providers_bydevice_wapp.sql @@ -17,8 +17,7 @@ JOIN ( WHERE category = 'Ecommerce' ) -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) JOIN ( SELECT _TABLE_SUFFIX, @@ -30,8 +29,7 @@ JOIN ( GROUP BY _TABLE_SUFFIX ) -USING - (_TABLE_SUFFIX) +USING (_TABLE_SUFFIX) WHERE category = 'Analytics' GROUP BY diff --git a/sql/2020/ecommerce/top_cdn_bydevice.sql b/sql/2020/ecommerce/top_cdn_bydevice.sql index f46e8e0d3e6..7ef1f7f46d1 100644 --- a/sql/2020/ecommerce/top_cdn_bydevice.sql +++ b/sql/2020/ecommerce/top_cdn_bydevice.sql @@ -14,9 +14,9 @@ FROM JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page FROM `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') -USING - (client, page) + WHERE category = 'Ecommerce' +) +USING (client, page) JOIN `httparchive.almanac.third_parties` ON @@ -24,9 +24,9 @@ ON JOIN ( SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total FROM `httparchive.summary_pages.2020_08_01_*` - GROUP BY _TABLE_SUFFIX) -USING - (client) + GROUP BY _TABLE_SUFFIX +) +USING (client) WHERE `httparchive.almanac.summary_requests`.date = '2020-08-01' AND LOWER(category) = 'cdn' diff --git a/sql/2020/ecommerce/top_cdn_bydevice_vendor_cdn.sql b/sql/2020/ecommerce/top_cdn_bydevice_vendor_cdn.sql index 0e76a8ebe5a..c0076fbbe6c 100644 --- a/sql/2020/ecommerce/top_cdn_bydevice_vendor_cdn.sql +++ b/sql/2020/ecommerce/top_cdn_bydevice_vendor_cdn.sql @@ -9,19 +9,17 @@ SELECT ROUND(COUNTIF(category = 'CDN') * 100 / SUM(COUNT(0)) OVER (PARTITION BY vendor), 2) AS pct FROM `httparchive.technologies.2020_08_01_*` -JOIN - ( - SELECT - _TABLE_SUFFIX AS client, - url, - app AS vendor - FROM - `httparchive.technologies.2020_08_01_*` - WHERE - category = 'Ecommerce' - ) -USING - (url) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url, + app AS vendor + FROM + `httparchive.technologies.2020_08_01_*` + WHERE + category = 'Ecommerce' +) +USING (url) GROUP BY client, vendor, diff --git a/sql/2020/ecommerce/top_cdn_bydevice_vendor_wapp.sql b/sql/2020/ecommerce/top_cdn_bydevice_vendor_wapp.sql index 0bcdbd10335..1cd5d247744 100644 --- a/sql/2020/ecommerce/top_cdn_bydevice_vendor_wapp.sql +++ b/sql/2020/ecommerce/top_cdn_bydevice_vendor_wapp.sql @@ -9,19 +9,17 @@ SELECT ROUND(COUNTIF(category = 'CDN') * 100 / SUM(COUNT(0)) OVER (PARTITION BY vendor), 2) AS pct FROM `httparchive.technologies.2020_08_01_*` -JOIN - ( - SELECT - _TABLE_SUFFIX AS client, - url, - app AS vendor - FROM - `httparchive.technologies.2020_08_01_*` - WHERE - category = 'Ecommerce' - ) -USING - (url) +JOIN ( + SELECT + _TABLE_SUFFIX AS client, + url, + app AS vendor + FROM + `httparchive.technologies.2020_08_01_*` + WHERE + category = 'Ecommerce' +) +USING (url) GROUP BY client, vendor, diff --git a/sql/2020/ecommerce/top_vendors.sql b/sql/2020/ecommerce/top_vendors.sql index f6a47ae2b56..f0b95e7e52b 100644 --- a/sql/2020/ecommerce/top_vendors.sql +++ b/sql/2020/ecommerce/top_vendors.sql @@ -9,20 +9,18 @@ SELECT COUNT(DISTINCT url) / total AS pct FROM `httparchive.technologies.2020_08_01_*` -JOIN - ( - SELECT - _TABLE_SUFFIX, - COUNT(DISTINCT url) AS total - FROM - `httparchive.summary_pages.2020_08_01_*` - GROUP BY - _TABLE_SUFFIX - ) +JOIN ( + SELECT + _TABLE_SUFFIX, + COUNT(DISTINCT url) AS total + FROM + `httparchive.summary_pages.2020_08_01_*` + GROUP BY + _TABLE_SUFFIX +) USING (_TABLE_SUFFIX) WHERE - category = 'Ecommerce' AND - ( + category = 'Ecommerce' AND ( app != 'Cart Functionality' AND app != 'Google Analytics Enhanced eCommerce' ) diff --git a/sql/2020/ecommerce/webpush_adoption_by_ecommsites.sql b/sql/2020/ecommerce/webpush_adoption_by_ecommsites.sql index 8bda053262a..f5a27ccaac3 100644 --- a/sql/2020/ecommerce/webpush_adoption_by_ecommsites.sql +++ b/sql/2020/ecommerce/webpush_adoption_by_ecommsites.sql @@ -10,14 +10,14 @@ SELECT FROM `chrome-ux-report.materialized.metrics_summary` JOIN ( - SELECT DISTINCT - _TABLE_SUFFIX AS client, - RTRIM(url, '/') AS origin - FROM - `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') -USING - (origin) + SELECT DISTINCT + _TABLE_SUFFIX AS client, + RTRIM(url, '/') AS origin + FROM + `httparchive.technologies.2020_08_01_*` + WHERE category = 'Ecommerce' +) +USING (origin) WHERE date IN ('2020-08-01') GROUP BY client diff --git a/sql/2020/ecommerce/webpushstats_ecommsites.sql b/sql/2020/ecommerce/webpushstats_ecommsites.sql index 64a4cddfb80..8a7c944d060 100644 --- a/sql/2020/ecommerce/webpushstats_ecommsites.sql +++ b/sql/2020/ecommerce/webpushstats_ecommsites.sql @@ -36,14 +36,14 @@ SELECT FROM `chrome-ux-report.materialized.metrics_summary` JOIN ( - SELECT DISTINCT - _TABLE_SUFFIX AS client, - RTRIM(url, '/') AS origin - FROM - `httparchive.technologies.2020_08_01_*` - WHERE category = 'Ecommerce') -USING - (origin) + SELECT DISTINCT + _TABLE_SUFFIX AS client, + RTRIM(url, '/') AS origin + FROM + `httparchive.technologies.2020_08_01_*` + WHERE category = 'Ecommerce' +) +USING (origin) WHERE date IN ('2020-08-01') AND notification_permission_accept IS NOT NULL GROUP BY diff --git a/sql/2020/fonts/04_02.web_fonts_usage_by_country.sql b/sql/2020/fonts/04_02.web_fonts_usage_by_country.sql index 5dc4ad861b7..2e3514bf127 100644 --- a/sql/2020/fonts/04_02.web_fonts_usage_by_country.sql +++ b/sql/2020/fonts/04_02.web_fonts_usage_by_country.sql @@ -13,7 +13,8 @@ FROM ( FROM `chrome-ux-report.materialized.country_summary` WHERE - yyyymm = 202008) + yyyymm = 202008 +) JOIN `httparchive.summary_pages.2020_08_01_*` ON diff --git a/sql/2020/fonts/04_03.popular_font_hosts_by_country.sql b/sql/2020/fonts/04_03.popular_font_hosts_by_country.sql index 5d586439f4f..cd8bac87fb7 100644 --- a/sql/2020/fonts/04_03.popular_font_hosts_by_country.sql +++ b/sql/2020/fonts/04_03.popular_font_hosts_by_country.sql @@ -7,38 +7,38 @@ SELECT pages, total, pct -FROM - ( - SELECT - client, - country, - NET.HOST(url) AS host, - COUNT(DISTINCT page) AS pages, - SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total, - COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct, - ROW_NUMBER() OVER (PARTITION BY client, country ORDER BY COUNT(DISTINCT page) DESC) AS sort_row +FROM ( + SELECT + client, + country, + NET.HOST(url) AS host, + COUNT(DISTINCT page) AS pages, + SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total, + COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct, + ROW_NUMBER() OVER (PARTITION BY client, country ORDER BY COUNT(DISTINCT page) DESC) AS sort_row + FROM + `httparchive.almanac.requests` + JOIN ( + SELECT DISTINCT + origin, device, + `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS country FROM - `httparchive.almanac.requests` - JOIN ( - SELECT DISTINCT - origin, device, - `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS country - FROM - `chrome-ux-report.materialized.country_summary` - WHERE - yyyymm = 202008) - ON - CONCAT(origin, '/') = page AND - IF(device = 'desktop', 'desktop', 'mobile') = client + `chrome-ux-report.materialized.country_summary` WHERE - type = 'font' AND - NET.HOST(url) != NET.HOST(page) AND - date = '2020-08-01' - GROUP BY - client, - country, - host - ORDER BY - pct DESC + yyyymm = 202008 ) + ON + CONCAT(origin, '/') = page AND + IF(device = 'desktop', 'desktop', 'mobile') = client + WHERE + type = 'font' AND + NET.HOST(url) != NET.HOST(page) AND + date = '2020-08-01' + GROUP BY + client, + country, + host + ORDER BY + pct DESC +) WHERE sort_row <= 1 diff --git a/sql/2020/fonts/04_04.self_hosted_vs_hosted_with_fcp.sql b/sql/2020/fonts/04_04.self_hosted_vs_hosted_with_fcp.sql index 8c9f6e14bc5..f81aeef2042 100644 --- a/sql/2020/fonts/04_04.self_hosted_vs_hosted_with_fcp.sql +++ b/sql/2020/fonts/04_04.self_hosted_vs_hosted_with_fcp.sql @@ -6,7 +6,7 @@ SELECT WHEN pct_self_hosted_hosted = 1 THEN 'self-hosted' WHEN pct_self_hosted_hosted = 0 THEN 'external' ELSE 'both' END - AS font_host, + AS font_host, COUNT(DISTINCT page) AS pages, SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total, COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct, @@ -24,7 +24,8 @@ FROM ( type = 'font' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -32,9 +33,9 @@ JOIN ( CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp FROM - `httparchive.pages.2020_08_01_*`) -USING - (client, page) + `httparchive.pages.2020_08_01_*` +) +USING (client, page) GROUP BY client, font_host diff --git a/sql/2020/fonts/04_05.web_font_usage_breakdown_with_fcp.sql b/sql/2020/fonts/04_05.web_font_usage_breakdown_with_fcp.sql index 12edf1ffccb..63b06b4ff6d 100644 --- a/sql/2020/fonts/04_05.web_font_usage_breakdown_with_fcp.sql +++ b/sql/2020/fonts/04_05.web_font_usage_breakdown_with_fcp.sql @@ -21,7 +21,8 @@ FROM ( NET.HOST(page) != NET.HOST(url) GROUP BY client, url, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -29,9 +30,9 @@ JOIN ( CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp FROM - `httparchive.pages.2020_08_01_*`) -USING - (client, page) + `httparchive.pages.2020_08_01_*` +) +USING (client, page) GROUP BY client, host diff --git a/sql/2020/fonts/04_06.font_display_with_fcp.sql b/sql/2020/fonts/04_06.font_display_with_fcp.sql index e8ca3ef0a84..fb59dcbf5ac 100644 --- a/sql/2020/fonts/04_06.font_display_with_fcp.sql +++ b/sql/2020/fonts/04_06.font_display_with_fcp.sql @@ -1,7 +1,7 @@ #standardSQL #font_display_with_fcp CREATE TEMPORARY FUNCTION getFontDisplay(css STRING) -RETURNS ARRAY < STRING > LANGUAGE js AS ''' +RETURNS ARRAY LANGUAGE js AS ''' try { var reduceValues = (values, rule) => { if ('rules' in rule) { @@ -45,24 +45,28 @@ FROM ( LEFT JOIN UNNEST(getFontDisplay(css)) AS font_display WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.firstContentfulPaint']" + ) AS INT64) AS fcp, + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.LargestContentfulPaint']" + ) AS INT64) AS lcp FROM `httparchive.pages.2020_08_01_*` GROUP BY _TABLE_SUFFIX, url, - payload) -USING - (client, - page) + payload +) +USING (client, page) GROUP BY client, font_display diff --git a/sql/2020/fonts/04_07.font_resource_hints_with_fcp.sql b/sql/2020/fonts/04_07.font_resource_hints_with_fcp.sql index 957aed69321..c7812ee940a 100644 --- a/sql/2020/fonts/04_07.font_resource_hints_with_fcp.sql +++ b/sql/2020/fonts/04_07.font_resource_hints_with_fcp.sql @@ -1,5 +1,5 @@ CREATE TEMPORARY FUNCTION getResourceHints(payload STRING) -RETURNS ARRAY < STRUCT < name STRING, href STRING >> +RETURNS ARRAY> LANGUAGE js AS ''' var hints = new Set(['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch']); try { @@ -33,14 +33,19 @@ FROM ( _TABLE_SUFFIX AS client, url AS page, hint.name, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.firstContentfulPaint']" + ) AS INT64) AS fcp, + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.LargestContentfulPaint']" + ) AS INT64) AS lcp FROM `httparchive.pages.2020_08_01_*` LEFT JOIN - UNNEST(getResourceHints(payload)) AS hint) + UNNEST(getResourceHints(payload)) AS hint +) LEFT JOIN ( SELECT client, @@ -49,9 +54,9 @@ LEFT JOIN ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') -USING - (client, page) + date = '2020-08-01' +) +USING (client, page) WHERE type = 'font' GROUP BY diff --git a/sql/2020/fonts/04_08.font_unicode_range_with_fcp.sql b/sql/2020/fonts/04_08.font_unicode_range_with_fcp.sql index 7ffe84ba0e2..491ccd1e5cb 100644 --- a/sql/2020/fonts/04_08.font_unicode_range_with_fcp.sql +++ b/sql/2020/fonts/04_08.font_unicode_range_with_fcp.sql @@ -51,21 +51,26 @@ FROM ( GROUP BY client, page, - unicode) + unicode +) JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.firstContentfulPaint']" + ) AS INT64) AS fcp, + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.LargestContentfulPaint']" + ) AS INT64) AS lcp FROM `httparchive.pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX, page, payload) -USING - (client, page) + _TABLE_SUFFIX, page, payload +) +USING (client, page) GROUP BY client, use_unicode diff --git a/sql/2020/fonts/04_09.font_subset_with_fcp.sql b/sql/2020/fonts/04_09.font_subset_with_fcp.sql index 52b346582ea..79f5cd1ec46 100644 --- a/sql/2020/fonts/04_09.font_subset_with_fcp.sql +++ b/sql/2020/fonts/04_09.font_subset_with_fcp.sql @@ -44,24 +44,28 @@ FROM ( LEFT JOIN UNNEST(getFont(css)) AS font_subset WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp, - CAST(JSON_EXTRACT_SCALAR(payload, - "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.firstContentfulPaint']" + ) AS INT64) AS fcp, + CAST(JSON_EXTRACT_SCALAR( + payload, + "$['_chromeUserTiming.LargestContentfulPaint']" + ) AS INT64) AS lcp FROM `httparchive.pages.2020_08_01_*` GROUP BY _TABLE_SUFFIX, url, - payload) -USING - (client, - page) + payload +) +USING (client, page) GROUP BY client, font_subset diff --git a/sql/2020/fonts/04_11.popular_typeface_by_country.sql b/sql/2020/fonts/04_11.popular_typeface_by_country.sql index ffbb96b8dfa..4d4b7798fd5 100644 --- a/sql/2020/fonts/04_11.popular_typeface_by_country.sql +++ b/sql/2020/fonts/04_11.popular_typeface_by_country.sql @@ -1,7 +1,7 @@ #standardSQL #popular typeface by country CREATE TEMPORARY FUNCTION getFontFamilies(css STRING) -RETURNS ARRAY < STRING > LANGUAGE js AS ''' +RETURNS ARRAY LANGUAGE js AS ''' try { var $ = JSON.parse(css); return $.stylesheet.rules.filter(rule => rule.type == 'font-face').map(rule => { @@ -39,7 +39,8 @@ FROM ( FROM `chrome-ux-report.materialized.country_summary` WHERE - yyyymm = 202008) + yyyymm = 202008 + ) ON CONCAT(origin, '/') = page AND IF(device = 'desktop', 'desktop', 'mobile') = client @@ -50,6 +51,7 @@ FROM ( country, font_family ORDER BY - client, country, freq DESC) + client, country, freq DESC +) WHERE sort_row <= 1 diff --git a/sql/2020/fonts/04_11a.popular_typeface.sql b/sql/2020/fonts/04_11a.popular_typeface.sql index 469b769df9e..982aa6fb60e 100644 --- a/sql/2020/fonts/04_11a.popular_typeface.sql +++ b/sql/2020/fonts/04_11a.popular_typeface.sql @@ -1,7 +1,7 @@ #standardSQL #popular_typeface CREATE TEMPORARY FUNCTION getFontFamilies(css STRING) -RETURNS ARRAY LANGUAGE js AS ''' +RETURNS ARRAY LANGUAGE js AS ''' try { var $ = JSON.parse(css); return $.stylesheet.rules.filter(rule => rule.type == 'font-face').map(rule => { @@ -31,7 +31,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - font_family) + font_family +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -39,9 +40,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) WHERE pages / total >= 0.004 ORDER BY diff --git a/sql/2020/fonts/04_12.font_weight_font_style.sql b/sql/2020/fonts/04_12.font_weight_font_style.sql index 62be6d93aaa..dc1749a8f6e 100644 --- a/sql/2020/fonts/04_12.font_weight_font_style.sql +++ b/sql/2020/fonts/04_12.font_weight_font_style.sql @@ -53,7 +53,8 @@ FROM ( `httparchive.almanac.parsed_css` LEFT JOIN UNNEST(getFonts(css)) AS font WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -61,9 +62,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, style, diff --git a/sql/2020/fonts/04_14.variable_font.sql b/sql/2020/fonts/04_14.variable_font.sql index 0e6f29cdaab..5ada9b93bbf 100644 --- a/sql/2020/fonts/04_14.variable_font.sql +++ b/sql/2020/fonts/04_14.variable_font.sql @@ -29,7 +29,8 @@ FROM ( WHERE date = '2020-09-01' AND type = 'font' AND - REGEXP_CONTAINS(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)gvar')) + REGEXP_CONTAINS(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)gvar') +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -38,9 +39,9 @@ JOIN ( FROM `httparchive.pages.2020_09_01_*` GROUP BY - _TABLE_SUFFIX, url, payload) -USING - (client, page) + _TABLE_SUFFIX, url, payload +) +USING (client, page) WHERE name IS NOT NULL GROUP BY diff --git a/sql/2020/fonts/04_14a.variable_font_comparison_fcp.sql b/sql/2020/fonts/04_14a.variable_font_comparison_fcp.sql index 791cd64ac8a..249885bad3b 100644 --- a/sql/2020/fonts/04_14a.variable_font_comparison_fcp.sql +++ b/sql/2020/fonts/04_14a.variable_font_comparison_fcp.sql @@ -19,7 +19,8 @@ FROM ( date = '2020-09-01' GROUP BY client, - page) + page +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -29,9 +30,9 @@ JOIN ( FROM `httparchive.pages.2020_09_01_*` GROUP BY - _TABLE_SUFFIX, url, payload) -USING - (client, page) + _TABLE_SUFFIX, url, payload +) +USING (client, page) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -39,9 +40,9 @@ JOIN ( FROM `httparchive.pages.2020_09_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (client) + _TABLE_SUFFIX +) +USING (client) GROUP BY client, total, diff --git a/sql/2020/fonts/04_18.VF_animation.sql b/sql/2020/fonts/04_18.VF_animation.sql index 6dd9992c273..9f4838999bc 100644 --- a/sql/2020/fonts/04_18.VF_animation.sql +++ b/sql/2020/fonts/04_18.VF_animation.sql @@ -3,7 +3,7 @@ CREATE TEMPORARY FUNCTION animatesVariableFonts(css STRING) RETURNS BOOLEAN LANGUAGE js -OPTIONS(library = "gs://httparchive/lib/css-utils.js") +OPTIONS (library = "gs://httparchive/lib/css-utils.js") AS ''' try { var ast = JSON.parse(css); @@ -27,6 +27,7 @@ FROM ( `httparchive.almanac.parsed_css` GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2020/fonts/04_19.color_fonts.sql b/sql/2020/fonts/04_19.color_fonts.sql index ae6edd6be45..d5b96a58734 100644 --- a/sql/2020/fonts/04_19.color_fonts.sql +++ b/sql/2020/fonts/04_19.color_fonts.sql @@ -16,7 +16,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-09-01' AND - type = 'font') + type = 'font' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -24,9 +25,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_09_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (client), + _TABLE_SUFFIX +) +USING (client), # Color fonts have any of sbix, cbdt, svg or colr tables. UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)(sbix|CBDT|SVG|COLR)')) AS format GROUP BY diff --git a/sql/2020/fonts/04_20.icon_fonts.sql b/sql/2020/fonts/04_20.icon_fonts.sql index 5c70cc5b295..38766d32ae7 100644 --- a/sql/2020/fonts/04_20.icon_fonts.sql +++ b/sql/2020/fonts/04_20.icon_fonts.sql @@ -23,10 +23,8 @@ SELECT COUNT(DISTINCT page) / total_page AS pct_ficon FROM `httparchive.almanac.parsed_css` -JOIN - (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total_page FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY _TABLE_SUFFIX) -USING - (client) +JOIN (SELECT _TABLE_SUFFIX AS client, COUNT(0) AS total_page FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY _TABLE_SUFFIX) +USING (client) WHERE ARRAY_LENGTH(checksSupports(css)) > 0 AND date = '2020-08-01' OR url LIKE '%fontawesome%' OR url LIKE '%icomoon%' OR url LIKE '%fontello%' OR url LIKE '%iconic%' GROUP BY diff --git a/sql/2020/fonts/04_21.openType_axis.sql b/sql/2020/fonts/04_21.openType_axis.sql index 12c994b4e8f..f7e2080f4e9 100644 --- a/sql/2020/fonts/04_21.openType_axis.sql +++ b/sql/2020/fonts/04_21.openType_axis.sql @@ -23,7 +23,8 @@ FROM ( UNNEST(getAxes(JSON_EXTRACT(payload, '$._font_details'))) AS axis WHERE date = '2020-09-01' AND - type = 'font') + type = 'font' +) GROUP BY client, axis diff --git a/sql/2020/http/avg_percentage_of_resources_loaded_over_HTTP_by_version_per_site.sql b/sql/2020/http/avg_percentage_of_resources_loaded_over_HTTP_by_version_per_site.sql index e3b7a4eaf5a..3ac4c4bda56 100644 --- a/sql/2020/http/avg_percentage_of_resources_loaded_over_HTTP_by_version_per_site.sql +++ b/sql/2020/http/avg_percentage_of_resources_loaded_over_HTTP_by_version_per_site.sql @@ -28,7 +28,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page) + page +) GROUP BY client ORDER BY diff --git a/sql/2020/http/cdn_detail_by_cdn.sql b/sql/2020/http/cdn_detail_by_cdn.sql index 5f5e003858a..ac59cbee3a7 100644 --- a/sql/2020/http/cdn_detail_by_cdn.sql +++ b/sql/2020/http/cdn_detail_by_cdn.sql @@ -24,12 +24,14 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) GROUP BY client, page, firstHTML, - CDN), + CDN +), UNNEST([0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/cdn_summary.sql b/sql/2020/http/cdn_summary.sql index dbd2e4ad574..01173b1cc99 100644 --- a/sql/2020/http/cdn_summary.sql +++ b/sql/2020/http/cdn_summary.sql @@ -24,12 +24,14 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) GROUP BY client, page, firstHTML, - CDN), + CDN +), UNNEST(GENERATE_ARRAY(1, 100)) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/count_of_h2_and_h3_sites_grouped_by_server.sql b/sql/2020/http/count_of_h2_and_h3_sites_grouped_by_server.sql index 42529e7c2eb..4060f7fd6a2 100644 --- a/sql/2020/http/count_of_h2_and_h3_sites_grouped_by_server.sql +++ b/sql/2020/http/count_of_h2_and_h3_sites_grouped_by_server.sql @@ -12,8 +12,7 @@ FROM `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - firstHtml AND - ( + firstHtml AND ( LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR diff --git a/sql/2020/http/count_of_h2_and_h3_sites_using_push.sql b/sql/2020/http/count_of_h2_and_h3_sites_using_push.sql index 9920facc0ad..3bd505fb4d6 100644 --- a/sql/2020/http/count_of_h2_and_h3_sites_using_push.sql +++ b/sql/2020/http/count_of_h2_and_h3_sites_using_push.sql @@ -15,8 +15,7 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01' AND - ( + date = '2020-08-01' AND ( LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR diff --git a/sql/2020/http/count_of_preload_http_headers_with_nopush_attribute_set.sql b/sql/2020/http/count_of_preload_http_headers_with_nopush_attribute_set.sql index 1648f3bb9cd..a4d539ad86b 100644 --- a/sql/2020/http/count_of_preload_http_headers_with_nopush_attribute_set.sql +++ b/sql/2020/http/count_of_preload_http_headers_with_nopush_attribute_set.sql @@ -24,7 +24,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - firstHtml), + firstHtml +), UNNEST(link_headers) AS link_header WHERE link_header LIKE '%preload%' diff --git a/sql/2020/http/http2_1st_party_vs_3rd_party.sql b/sql/2020/http/http2_1st_party_vs_3rd_party.sql index 4e515f4ff2a..b104117bd73 100644 --- a/sql/2020/http/http2_1st_party_vs_3rd_party.sql +++ b/sql/2020/http/http2_1st_party_vs_3rd_party.sql @@ -23,13 +23,15 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) WHERE type = 'script' GROUP BY client, page, - is_third_party), + is_third_party +), UNNEST(GENERATE_ARRAY(1, 100)) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/http2_1st_party_vs_3rd_party_by_type.sql b/sql/2020/http/http2_1st_party_vs_3rd_party_by_type.sql index 5c80d94f63c..c390ea2df98 100644 --- a/sql/2020/http/http2_1st_party_vs_3rd_party_by_type.sql +++ b/sql/2020/http/http2_1st_party_vs_3rd_party_by_type.sql @@ -25,12 +25,14 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) GROUP BY client, page, is_third_party, - type), + type +), UNNEST([5, 10, 20, 30, 40, 50, 60, 70, 90, 95, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/http2_3rd_party_by_types.sql b/sql/2020/http/http2_3rd_party_by_types.sql index 0bd564c1347..bb77faf94cf 100644 --- a/sql/2020/http/http2_3rd_party_by_types.sql +++ b/sql/2020/http/http2_3rd_party_by_types.sql @@ -24,11 +24,13 @@ FROM ( WHERE r.date = '2020-08-01' AND tp.date = '2020-08-01' AND - NET.HOST(url) = domain) + NET.HOST(url) = domain + ) GROUP BY client, page, - category), + category +), UNNEST([5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/measure_number_of_tcp_connections_per_site.sql b/sql/2020/http/measure_number_of_tcp_connections_per_site.sql index 816a6892cf1..f0166b0b4a6 100644 --- a/sql/2020/http/measure_number_of_tcp_connections_per_site.sql +++ b/sql/2020/http/measure_number_of_tcp_connections_per_site.sql @@ -15,16 +15,17 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - firstHtml) + firstHtml +) JOIN ( SELECT _TABLE_SUFFIX AS client, url AS page, _connections FROM - `httparchive.summary_pages.2020_08_01_*`) -USING - (client, page), + `httparchive.summary_pages.2020_08_01_*` +) +USING (client, page), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_by_content_type.sql b/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_by_content_type.sql index 7804600c4db..163a72bca7c 100644 --- a/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_by_content_type.sql +++ b/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_by_content_type.sql @@ -20,8 +20,7 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - JSON_EXTRACT_SCALAR(payload, '$._was_pushed') = '1' AND - ( + JSON_EXTRACT_SCALAR(payload, '$._was_pushed') = '1' AND ( LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR @@ -31,7 +30,8 @@ FROM ( client, http_version, page, - type), + type +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_transferred.sql b/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_transferred.sql index 6d5fd4b6c62..ab19a8c8306 100644 --- a/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_transferred.sql +++ b/sql/2020/http/number_of_h2_and_h3_pushed_resources_and_bytes_transferred.sql @@ -8,27 +8,27 @@ SELECT APPROX_QUANTILES(num_requests, 1000)[OFFSET(percentile * 10)] AS pushed_requests, APPROX_QUANTILES(kb_transfered, 1000)[OFFSET(percentile * 10)] AS kb_transfered FROM ( - SELECT - client, - page, - JSON_EXTRACT_SCALAR(payload, '$._protocol') AS http_version, - SUM(CAST(JSON_EXTRACT_SCALAR(payload, '$._bytesIn') AS INT64) / 1024) AS kb_transfered, - COUNT(0) AS num_requests - FROM - `httparchive.almanac.requests` - WHERE - date = '2020-08-01' AND - JSON_EXTRACT_SCALAR(payload, '$._was_pushed') = '1' AND - ( - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/3%' - ) - GROUP BY - client, - http_version, - page), + SELECT + client, + page, + JSON_EXTRACT_SCALAR(payload, '$._protocol') AS http_version, + SUM(CAST(JSON_EXTRACT_SCALAR(payload, '$._bytesIn') AS INT64) / 1024) AS kb_transfered, + COUNT(0) AS num_requests + FROM + `httparchive.almanac.requests` + WHERE + date = '2020-08-01' AND + JSON_EXTRACT_SCALAR(payload, '$._was_pushed') = '1' AND ( + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/3%' + ) + GROUP BY + client, + http_version, + page +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/http/percentage_of_h2_and_h3_sites_affected_by_cdn_prioritization.sql b/sql/2020/http/percentage_of_h2_and_h3_sites_affected_by_cdn_prioritization.sql index 87b73b18d86..a6b0a5f9de7 100644 --- a/sql/2020/http/percentage_of_h2_and_h3_sites_affected_by_cdn_prioritization.sql +++ b/sql/2020/http/percentage_of_h2_and_h3_sites_affected_by_cdn_prioritization.sql @@ -8,23 +8,22 @@ SELECT COUNT(0) AS num_pages, ROUND(COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client), 4) AS pct FROM ( - SELECT - date, - client, - JSON_EXTRACT_SCALAR(payload, '$._protocol') AS http_version, - url, - _cdn_provider AS cdn - FROM - `httparchive.almanac.requests` - WHERE - date = '2020-08-01' AND - firstHtml AND - ( - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR - LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/3%' - ) + SELECT + date, + client, + JSON_EXTRACT_SCALAR(payload, '$._protocol') AS http_version, + url, + _cdn_provider AS cdn + FROM + `httparchive.almanac.requests` + WHERE + date = '2020-08-01' AND + firstHtml AND ( + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/2' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE '%quic%' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'h3%' OR + LOWER(JSON_EXTRACT_SCALAR(payload, '$._protocol')) LIKE 'http/3%' + ) ) AS pages LEFT JOIN `httparchive.almanac.h2_prioritization_cdns` diff --git a/sql/2020/http/percentiles_of_resources_loaded_over_HTTP2_or_better_per_site.sql b/sql/2020/http/percentiles_of_resources_loaded_over_HTTP2_or_better_per_site.sql index 4df03cb05e0..5e0874ae442 100644 --- a/sql/2020/http/percentiles_of_resources_loaded_over_HTTP2_or_better_per_site.sql +++ b/sql/2020/http/percentiles_of_resources_loaded_over_HTTP2_or_better_per_site.sql @@ -15,7 +15,8 @@ FROM ( date = '2020-08-01' GROUP BY client, - page), + page +), UNNEST([5, 6, 7, 8, 9, 10, 25, 50, 75, 90, 95, 100]) AS percentile GROUP BY client, diff --git a/sql/2020/http/tls_adoption_by_http_version.sql b/sql/2020/http/tls_adoption_by_http_version.sql index c22c96e05dc..89234219c50 100644 --- a/sql/2020/http/tls_adoption_by_http_version.sql +++ b/sql/2020/http/tls_adoption_by_http_version.sql @@ -18,7 +18,8 @@ FROM ( WHERE date = '2020-08-01' AND STARTS_WITH(url, 'https') AND - firstHtml) + firstHtml +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -28,9 +29,9 @@ JOIN ( WHERE STARTS_WITH(url, 'https') GROUP BY - client) -USING - (client) + client +) +USING (client) GROUP BY client, protocol, diff --git a/sql/2020/jamstack/adoption_of_image_formats_in_ssgs.sql b/sql/2020/jamstack/adoption_of_image_formats_in_ssgs.sql index c4d4ea849c9..5dba910a672 100644 --- a/sql/2020/jamstack/adoption_of_image_formats_in_ssgs.sql +++ b/sql/2020/jamstack/adoption_of_image_formats_in_ssgs.sql @@ -15,7 +15,8 @@ FROM ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - type = 'image') + type = 'image' +) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -26,9 +27,9 @@ JOIN ( LOWER(category) = 'static site generator' OR app = 'Next.js' OR app = 'Nuxt.js' OR - app = 'Docusaurus') -USING - (client, page) + app = 'Docusaurus' +) +USING (client, page) GROUP BY client, format diff --git a/sql/2020/jamstack/core_web_vitals_distribution.sql b/sql/2020/jamstack/core_web_vitals_distribution.sql index 3e76cb05626..9be15292ec8 100644 --- a/sql/2020/jamstack/core_web_vitals_distribution.sql +++ b/sql/2020/jamstack/core_web_vitals_distribution.sql @@ -27,7 +27,8 @@ FROM ( FROM `chrome-ux-report.materialized.device_summary` WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT CASE @@ -51,9 +52,9 @@ JOIN ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - firstHtml) -USING - (client, url) + firstHtml +) +USING (client, url) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -66,7 +67,7 @@ JOIN ( app = 'Next.js' OR app = 'Nuxt.js' OR app = 'Docusaurus' - ) +) USING (client, url) WHERE CDN IS NOT NULL diff --git a/sql/2020/jamstack/core_web_vitals_passing.sql b/sql/2020/jamstack/core_web_vitals_passing.sql index e89f4f3205c..c6ae6e65116 100644 --- a/sql/2020/jamstack/core_web_vitals_passing.sql +++ b/sql/2020/jamstack/core_web_vitals_passing.sql @@ -1,10 +1,10 @@ #standardSQL # Core Web Vitals performance by CMS -CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good / (good + needs_improvement + poor) >= 0.75 ); -CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( +CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( good + needs_improvement + poor > 0 ); @@ -17,28 +17,34 @@ SELECT # Origins with good LCP divided by origins with any LCP. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL)) + ) AS pct_good_lcp, # Origins with good FID divided by origins with any FID. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid, + COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL)) + ) AS pct_good_fid, # Origins with good CLS divided by origins with any CLS. SAFE_DIVIDE( COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), - COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls, + COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL)) + ) AS pct_good_cls, # Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS. SAFE_DIVIDE( COUNT(DISTINCT IF( IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND IS_GOOD(fast_fid, avg_fid, slow_fid) AND - IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), + IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL + )), COUNT(DISTINCT IF( IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND - IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv + IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL + )) + ) AS pct_good_cwv FROM ( SELECT IF(device = 'desktop', 'desktop', 'mobile') AS client, @@ -47,7 +53,8 @@ FROM ( FROM `chrome-ux-report.materialized.device_summary` WHERE - date = '2020-08-01') + date = '2020-08-01' +) JOIN ( SELECT CASE @@ -71,9 +78,9 @@ JOIN ( `httparchive.almanac.requests` WHERE date = '2020-08-01' AND - firstHtml) -USING - (client, url) + firstHtml +) +USING (client, url) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -86,7 +93,7 @@ JOIN ( app = 'Next.js' OR app = 'Nuxt.js' OR app = 'Docusaurus' - ) +) USING (client, url) WHERE CDN IS NOT NULL diff --git a/sql/2020/jamstack/distribution_of_page_weight_requests_and_co2_grams_per_ssg_web_page.sql b/sql/2020/jamstack/distribution_of_page_weight_requests_and_co2_grams_per_ssg_web_page.sql index 986a8440bf2..1ea62e5dcf4 100644 --- a/sql/2020/jamstack/distribution_of_page_weight_requests_and_co2_grams_per_ssg_web_page.sql +++ b/sql/2020/jamstack/distribution_of_page_weight_requests_and_co2_grams_per_ssg_web_page.sql @@ -46,8 +46,8 @@ FROM ( app = 'Nuxt.js' OR app = 'Docusaurus' ) - USING - (_TABLE_SUFFIX, url)), + USING (_TABLE_SUFFIX, url) +), UNNEST([10, 25, 50, 75, 90]) AS percentile GROUP BY percentile, diff --git a/sql/2020/jamstack/median_lighthouse_score.sql b/sql/2020/jamstack/median_lighthouse_score.sql index db627dfa3d6..38193255c95 100644 --- a/sql/2020/jamstack/median_lighthouse_score.sql +++ b/sql/2020/jamstack/median_lighthouse_score.sql @@ -11,8 +11,7 @@ FROM `httparchive.lighthouse.2020_09_01_*` JOIN `httparchive.technologies.2020_09_01_*` -USING - (_TABLE_SUFFIX, url) +USING (_TABLE_SUFFIX, url) WHERE LOWER(category) = 'static site generator' OR app = 'Next.js' OR diff --git a/sql/2020/jamstack/ssg_compared_to_2019.sql b/sql/2020/jamstack/ssg_compared_to_2019.sql index 32645dca687..310142504d8 100644 --- a/sql/2020/jamstack/ssg_compared_to_2019.sql +++ b/sql/2020/jamstack/ssg_compared_to_2019.sql @@ -16,9 +16,9 @@ JOIN ( FROM `httparchive.summary_pages.2020_08_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE LOWER(category) = 'static site generator' OR app = 'Next.js' OR @@ -45,9 +45,9 @@ JOIN ( FROM `httparchive.summary_pages.2019_07_01_*` GROUP BY - _TABLE_SUFFIX) -USING - (_TABLE_SUFFIX) + _TABLE_SUFFIX +) +USING (_TABLE_SUFFIX) WHERE LOWER(category) = 'static site generator' OR app = 'Next.js' OR diff --git a/sql/2020/jamstack/third_party_bytes_and_requests_on_ssgs.sql b/sql/2020/jamstack/third_party_bytes_and_requests_on_ssgs.sql index f8f5419ba77..a9efb226aa0 100644 --- a/sql/2020/jamstack/third_party_bytes_and_requests_on_ssgs.sql +++ b/sql/2020/jamstack/third_party_bytes_and_requests_on_ssgs.sql @@ -19,7 +19,8 @@ FROM ( FROM `httparchive.almanac.requests` WHERE - date = '2020-08-01') + date = '2020-08-01' + ) JOIN ( SELECT _TABLE_SUFFIX AS client, @@ -30,9 +31,9 @@ FROM ( LOWER(category) = 'static site generator' OR app = 'Next.js' OR app = 'Nuxt.js' OR - app = 'Docusaurus') - USING - (client, page) + app = 'Docusaurus' + ) + USING (client, page) WHERE NET.HOST(url) IN ( SELECT @@ -41,10 +42,12 @@ FROM ( `httparchive.almanac.third_parties` WHERE date = '2020-08-01' AND - category != 'hosting') + category != 'hosting' + ) GROUP BY client, - page), + page +), UNNEST([10, 25, 50, 75, 90, 100]) AS percentile GROUP BY percentile, diff --git a/sql/2020/javascript/async_and_defer.sql b/sql/2020/javascript/async_and_defer.sql index fee33c37ca4..28ea1217364 100644 --- a/sql/2020/javascript/async_and_defer.sql +++ b/sql/2020/javascript/async_and_defer.sql @@ -20,6 +20,7 @@ FROM ( UNNEST(REGEXP_EXTRACT_ALL(body, r'(?i)(]*>)')) AS script WHERE date = '2020-08-01' AND - firstHtml) + firstHtml +) GROUP BY client diff --git a/sql/2020/javascript/avg_pct_per_page_scripts_using_async_defer_module_nomodule.sql b/sql/2020/javascript/avg_pct_per_page_scripts_using_async_defer_module_nomodule.sql index 80d6cae6e63..c25256c7aca 100644 --- a/sql/2020/javascript/avg_pct_per_page_scripts_using_async_defer_module_nomodule.sql +++ b/sql/2020/javascript/avg_pct_per_page_scripts_using_async_defer_module_nomodule.sql @@ -25,12 +25,14 @@ FROM ( `httparchive.almanac.summary_response_bodies` WHERE date = '2020-08-01' AND - firstHtml), + firstHtml + ), UNNEST(scripts) AS script WHERE REGEXP_CONTAINS(script, r'\bsrc\b') GROUP BY client, - page) + page +) GROUP BY client diff --git a/sql/2020/javascript/breakdown_of_scripts_using_async_defer_module_nomodule.sql b/sql/2020/javascript/breakdown_of_scripts_using_async_defer_module_nomodule.sql index 44300506ea6..0f564f4a7a6 100644 --- a/sql/2020/javascript/breakdown_of_scripts_using_async_defer_module_nomodule.sql +++ b/sql/2020/javascript/breakdown_of_scripts_using_async_defer_module_nomodule.sql @@ -15,19 +15,18 @@ SELECT SUM(IF(script LIKE '%defer%', 1, 0)) / SUM(IF(script LIKE '%src%', 1, 0)) AS pct_external_defer, SUM(IF(script LIKE '%module%', 1, 0)) / SUM(IF(script LIKE '%src%', 1, 0)) AS pct_external_module, SUM(IF(script LIKE '%nomodule%', 1, 0)) / SUM(IF(script LIKE '%src%', 1, 0)) AS pct_external_nomodule -FROM - ( - SELECT - client, - page, - url, - REGEXP_EXTRACT_ALL(LOWER(body), '(