Skip to content

Commit 4a19dc1

Browse files
committed
Merge branch 'main' into privacy-markdown-2024
2 parents 781d0af + 6375165 commit 4a19dc1

File tree

520 files changed

+17778
-491
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

520 files changed

+17778
-491
lines changed

.github/workflows/code-static-analysis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
uses: actions/checkout@v4
3636
- name: Set up Python 3.12
3737
if: ${{ matrix.language == 'python' }}
38-
uses: actions/setup-python@v5.2.0
38+
uses: actions/setup-python@v5.3.0
3939
with:
4040
python-version: '3.12'
4141
- name: Install dependencies

.github/workflows/lintsql.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
# Full git history is needed to get a proper list of changed files within `super-linter`
2020
fetch-depth: 0
2121
- name: Set up Python 3.12
22-
uses: actions/setup-python@v5.2.0
22+
uses: actions/setup-python@v5.3.0
2323
with:
2424
python-version: '3.12'
2525
- name: Lint SQL code

.github/workflows/predeploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
with:
3838
node-version: '20'
3939
- name: Set up Python 3.12
40-
uses: actions/setup-python@v5.2.0
40+
uses: actions/setup-python@v5.3.0
4141
with:
4242
python-version: '3.12'
4343
- name: Install Asian Fonts

.github/workflows/test_website.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
with:
3131
node-version: '20'
3232
- name: Set up Python 3.12
33-
uses: actions/setup-python@v5.2.0
33+
uses: actions/setup-python@v5.3.0
3434
with:
3535
python-version: '3.12'
3636
- name: Run the website

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ We do almost all of our project planning here on GitHub. Browse the open [issues
4040

4141
We also have a [`#web-almanac`](https://join.slack.com/t/httparchive/shared_invite/zt-45sgwmnb-eDEatOhqssqNAKxxOSLAaA) channel on the HTTP Archive Slack where we chat about project updates.
4242

43-
For news and announcements, follow [@HTTPArchive](https://twitter.com/HTTPArchive) on Twitter.
43+
For news and announcements, follow [@HTTPArchive](https://x.com/HTTPArchive) on Twitter.
4444

4545
## [License](https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/LICENSE)
4646

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
WITH score_data AS (
2+
SELECT
3+
client,
4+
page,
5+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.performance.score') AS FLOAT64) AS performance_score,
6+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.accessibility.score') AS FLOAT64) AS accessibility_score,
7+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.best-practices.score') AS FLOAT64) AS best_practices_score,
8+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.seo.score') AS FLOAT64) AS seo_score,
9+
t.technology AS framework
10+
FROM
11+
`httparchive.all.pages`,
12+
UNNEST(technologies) AS t
13+
WHERE
14+
date = '2024-06-01' AND
15+
lighthouse IS NOT NULL AND
16+
lighthouse != '{}' AND
17+
is_root_page = TRUE AND
18+
('Web frameworks' IN UNNEST(t.categories) OR 'JavaScript libraries' IN UNNEST(t.categories) OR 'Frontend frameworks' IN UNNEST(t.categories) OR 'JavaScript frameworks' IN UNNEST(t.categories)) AND
19+
t.technology IS NOT NULL
20+
)
21+
22+
SELECT
23+
client,
24+
framework,
25+
AVG(performance_score) AS avg_performance_score,
26+
AVG(accessibility_score) AS avg_accessibility_score,
27+
AVG(best_practices_score) AS avg_best_practices_score,
28+
AVG(seo_score) AS avg_seo_score,
29+
COUNT(DISTINCT page) AS total_pages
30+
FROM (
31+
SELECT
32+
client,
33+
page,
34+
framework,
35+
AVG(performance_score) AS performance_score, # All scores are the same for one page (we have multiple rows due to unnest), we could also take the first instead of the average
36+
AVG(accessibility_score) AS accessibility_score,
37+
AVG(best_practices_score) AS best_practices_score,
38+
AVG(seo_score) AS seo_score
39+
FROM
40+
score_data
41+
GROUP BY
42+
client,
43+
page,
44+
framework
45+
)
46+
GROUP BY
47+
client,
48+
framework
49+
ORDER BY
50+
total_pages DESC;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#standardSQL
2+
# Overall Accessibility (A11y) technology, ie. Overlays, usage by domain rank
3+
4+
# Main SELECT statement to aggregate results by client and rank grouping.
5+
SELECT
6+
client,
7+
is_root_page,
8+
rank_grouping, # Grouping of domains by their rank (e.g., top 1000, top 10000, etc.)
9+
total_in_rank, # Total number of sites within the rank grouping
10+
COUNT(DISTINCT page) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
11+
COUNT(DISTINCT page) / total_in_rank AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology within the rank grouping
12+
FROM
13+
(
14+
# Subquery to filter and extract relevant pages with A11Y technology
15+
SELECT DISTINCT
16+
client,
17+
is_root_page,
18+
page,
19+
rank_grouping,
20+
category
21+
FROM
22+
`httparchive.all.pages`,
23+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping, # Expand rank_grouping to cover different rank categories
24+
UNNEST(technologies) AS tech,
25+
UNNEST(categories) AS category
26+
WHERE
27+
date = '2024-06-01' AND
28+
category = 'Accessibility' AND
29+
rank <= rank_grouping # Include only sites within the specified rank grouping
30+
)
31+
JOIN
32+
(
33+
# Subquery to count total sites in each rank grouping for each client
34+
SELECT
35+
client,
36+
rank_grouping,
37+
COUNT(0) AS total_in_rank
38+
FROM
39+
`httparchive.all.pages`,
40+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
41+
WHERE
42+
date = '2024-06-01' AND
43+
rank <= rank_grouping
44+
GROUP BY
45+
client,
46+
rank_grouping
47+
) USING (client, rank_grouping)
48+
GROUP BY
49+
client,
50+
is_root_page,
51+
rank_grouping,
52+
total_in_rank
53+
ORDER BY
54+
client,
55+
is_root_page,
56+
rank_grouping
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#standardSQL
2+
# Accessibility (A11y) technology, ie. Overlays, usage by client
3+
4+
SELECT
5+
client, # Client domain
6+
is_root_page,
7+
COUNT(DISTINCT page) AS total_sites, # Total number of unique sites for the client
8+
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
9+
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) / COUNT(DISTINCT page) AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology
10+
FROM
11+
`httparchive.all.pages`,
12+
UNNEST(technologies) AS tech,
13+
UNNEST(categories) AS category
14+
WHERE
15+
date = '2024-06-01' # Specific date for data extraction
16+
GROUP BY
17+
client,
18+
is_root_page
19+
ORDER BY
20+
client,
21+
is_root_page;
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#standardSQL
2+
# A11Y technology usage by domain rank
3+
WITH ranked_sites AS (
4+
-- Get the total number of sites within each rank grouping
5+
SELECT
6+
client,
7+
is_root_page,
8+
page,
9+
rank,
10+
technologies, -- Include technologies field here
11+
CASE
12+
WHEN rank <= 1000 THEN 1000
13+
WHEN rank <= 10000 THEN 10000
14+
WHEN rank <= 100000 THEN 100000
15+
WHEN rank <= 1000000 THEN 1000000
16+
WHEN rank <= 10000000 THEN 10000000
17+
WHEN rank <= 100000000 THEN 100000000
18+
END AS rank_grouping
19+
FROM
20+
`httparchive.all.pages`
21+
WHERE
22+
date = '2024-06-01' -- Use the relevant date for analysis
23+
),
24+
25+
rank_totals AS (
26+
-- Calculate total sites in each rank grouping
27+
SELECT
28+
client,
29+
is_root_page,
30+
rank_grouping,
31+
COUNT(DISTINCT page) AS total_in_rank
32+
FROM
33+
ranked_sites
34+
GROUP BY
35+
client,
36+
is_root_page,
37+
rank_grouping
38+
)
39+
40+
SELECT
41+
r.client,
42+
r.is_root_page,
43+
r.rank_grouping,
44+
rt.total_in_rank, -- Total number of unique sites within the rank grouping
45+
tech.technology AS app, -- Accessibility technology used
46+
COUNT(DISTINCT r.page) AS sites_with_app, -- Number of sites using the specific accessibility technology
47+
SAFE_DIVIDE(COUNT(DISTINCT r.page), rt.total_in_rank) AS pct_sites_with_app -- Percentage of sites using the accessibility technology
48+
FROM
49+
ranked_sites r
50+
JOIN
51+
UNNEST(r.technologies) AS tech -- Expand technologies array to individual rows
52+
JOIN
53+
rank_totals rt -- Join to get the total number of sites per rank grouping
54+
ON r.client = rt.client AND
55+
r.is_root_page = rt.is_root_page AND
56+
r.rank_grouping = rt.rank_grouping
57+
JOIN
58+
UNNEST(tech.categories) AS category -- Unnest the categories array to filter for accessibility
59+
WHERE
60+
category = 'Accessibility' -- Filter to include only accessibility-related technologies
61+
GROUP BY
62+
r.client,
63+
r.is_root_page,
64+
r.rank_grouping,
65+
rt.total_in_rank,
66+
tech.technology
67+
ORDER BY
68+
tech.technology, -- Order results by technology (app)
69+
r.rank_grouping, -- Order results by rank grouping
70+
r.client,
71+
r.is_root_page;
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#standardSQL
2+
# Alt text ending in an image extension
3+
CREATE TEMPORARY FUNCTION getUsedExtensions(payload STRING)
4+
RETURNS ARRAY<STRUCT<extension STRING, total INT64>> LANGUAGE js AS '''
5+
try {
6+
const a11y = JSON.parse(payload);
7+
8+
return Object.entries(a11y.file_extension_alts.file_extensions).map(([extension, total]) => {
9+
return {extension, total};
10+
});
11+
} catch (e) {
12+
return [];
13+
}
14+
''';
15+
SELECT
16+
client,
17+
is_root_page,
18+
sites_with_non_empty_alt,
19+
sites_with_file_extension_alt,
20+
total_alts_with_file_extensions,
21+
22+
# Of sites with a non-empty alt, what % have an alt with a file extension
23+
sites_with_file_extension_alt / sites_with_non_empty_alt AS pct_sites_with_file_extension_alt,
24+
# Given a random alt, how often will it end in a file extension
25+
total_alts_with_file_extensions / total_non_empty_alts AS pct_alts_with_file_extension,
26+
27+
extension_stat.extension AS extension,
28+
COUNT(0) AS total_sites_using,
29+
# Of sites with a non-empty alt, what % have an alt with this file extension
30+
COUNT(0) / sites_with_non_empty_alt AS pct_applicable_sites_using,
31+
32+
# Of sites with a non-empty alt, what % have an alt with this file extension
33+
SUM(extension_stat.total) AS total_occurances,
34+
# Given a random alt ending in a file extension, how often will it end in this file extension
35+
SUM(extension_stat.total) / total_alts_with_file_extensions AS pct_total_occurances
36+
FROM
37+
`httparchive.all.pages`,
38+
UNNEST(getUsedExtensions(JSON_EXTRACT(custom_metrics, '$.a11y'))) AS extension_stat
39+
LEFT JOIN (
40+
SELECT
41+
client,
42+
is_root_page,
43+
COUNTIF(total_non_empty_alt > 0) AS sites_with_non_empty_alt,
44+
COUNTIF(total_with_file_extension > 0) AS sites_with_file_extension_alt,
45+
46+
SUM(total_non_empty_alt) AS total_non_empty_alts,
47+
SUM(total_with_file_extension) AS total_alts_with_file_extensions
48+
FROM (
49+
SELECT
50+
client,
51+
is_root_page,
52+
CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.markup.images.img.alt.present') AS INT64) AS total_non_empty_alt,
53+
CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.a11y.file_extension_alts.total_with_file_extension') AS INT64) AS total_with_file_extension
54+
FROM
55+
`httparchive.all.pages`
56+
WHERE
57+
date = '2024-06-01'
58+
)
59+
GROUP BY
60+
client,
61+
is_root_page
62+
) USING (client, is_root_page)
63+
WHERE
64+
date = '2024-06-01'
65+
GROUP BY
66+
client,
67+
is_root_page,
68+
sites_with_non_empty_alt,
69+
sites_with_file_extension_alt,
70+
total_non_empty_alts,
71+
total_alts_with_file_extensions,
72+
extension
73+
ORDER BY
74+
client,
75+
is_root_page,
76+
total_occurances DESC

0 commit comments

Comments
 (0)