Skip to content

Commit a478d30

Browse files
authored
download external images (mdn#1188)
* download external images Part of mdn#1103 * feedbacked
1 parent 028b100 commit a478d30

File tree

11 files changed

+79
-42
lines changed

11 files changed

+79
-42
lines changed

build/check-images.js

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {
1919

2020
const checkImages = options.flawLevels.get("images") !== FLAW_LEVELS.IGNORE;
2121

22-
function addImageFlaw($img, src, { explanation, suggestion = null }) {
22+
function addImageFlaw(
23+
$img,
24+
src,
25+
{ explanation, externalImage = false, suggestion = null }
26+
) {
2327
for (const match of findMatchesInText(src, rawContent, {
2428
attribute: "src",
2529
})) {
@@ -39,6 +43,7 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {
3943
fixable,
4044
suggestion,
4145
explanation,
46+
externalImage,
4247
...match,
4348
});
4449
}
@@ -94,18 +99,9 @@ function checkImageReferences(doc, $, options, { url, rawContent }) {
9499
} else {
95100
addImageFlaw(img, src, {
96101
explanation: "External image URL",
102+
externalImage: true,
97103
});
98104
}
99-
100-
// TODO: It might be prudent to cease allowing any remote images.
101-
// If you rely on an external domain that isn't our designated
102-
// default domain, we should probably download it and check it in.
103-
// It means less SSL and network overheads if we can download all
104-
// images on an existing HTTP2 connection, and if the domain is
105-
// something out of our control we're potentially at mercy of the
106-
// images suddenly disappearing.
107-
// Due to so much else going on, let's not make a huge stink about
108-
// it at the moment (peterbe, Aug 2020).
109105
}
110106
} else {
111107
// Remember, you can not have search parameters on local images.

build/flaws.js

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
const fs = require("fs");
2+
const path = require("path");
3+
14
const chalk = require("chalk");
5+
const got = require("got");
26

37
const { Document, Redirect } = require("../content");
48
const { FLAW_LEVELS } = require("./constants");
@@ -7,6 +11,7 @@ const {
711
findMatchesInText,
812
replaceMatchesInText,
913
} = require("./matches-in-text");
14+
const { fstat } = require("fs-extra");
1015

1116
function injectFlaws(doc, $, options, { rawContent }) {
1217
if (doc.isArchive) return;
@@ -169,7 +174,7 @@ function injectFlaws(doc, $, options, { rawContent }) {
169174
}
170175
}
171176

172-
function fixFixableFlaws(doc, options, document) {
177+
async function fixFixableFlaws(doc, options, document) {
173178
if (!options.fixFlaws || document.isArchive) return;
174179

175180
let newRawHTML = document.rawHTML;
@@ -229,27 +234,52 @@ function fixFixableFlaws(doc, options, document) {
229234
}
230235
}
231236

232-
// Any 'images' flaws with a suggestion...
237+
// Any 'images' flaws with a suggestion or external image...
233238
for (const flaw of doc.flaws.images || []) {
234-
if (!flaw.suggestion) {
239+
if (!(flaw.suggestion || flaw.externalImage)) {
235240
continue;
236241
}
237242
// The reason we're not using the parse HTML, as a cheerio object `$`
238243
// is because the raw HTML we're dealing with isn't actually proper
239244
// HTML. It's only proper HTML when the kumascript macros have been
240245
// expanded.
241246
const htmlBefore = newRawHTML;
242-
newRawHTML = replaceMatchesInText(flaw.src, newRawHTML, flaw.suggestion, {
247+
let newSrc;
248+
if (flaw.externalImage) {
249+
// Sanity check that it's an external image
250+
const url = new URL(flaw.src);
251+
if (url.protocol !== "https:") {
252+
throw new Error(`Insecure image URL ${flaw.src}`);
253+
}
254+
try {
255+
const imageBuffer = await got(flaw.src, {
256+
responseType: "buffer",
257+
resolveBodyOnly: true,
258+
timeout: 10000,
259+
retry: 3,
260+
});
261+
const destination = path.join(
262+
Document.getFolderPath(document.metadata),
263+
path.basename(url.pathname)
264+
);
265+
fs.writeFileSync(destination, imageBuffer);
266+
console.log(`Downloaded ${flaw.src} to ${destination}`);
267+
newSrc = path.basename(destination);
268+
} catch (error) {
269+
console.error(error);
270+
throw error;
271+
}
272+
} else {
273+
newSrc = flaw.suggestion;
274+
}
275+
newRawHTML = replaceMatchesInText(flaw.src, newRawHTML, newSrc, {
243276
inAttribute: "src",
244277
});
245-
if (htmlBefore !== newRawHTML) {
246-
// flaw.fixed = !options.fixFlawsDryRun;
247-
}
248278
if (loud) {
249279
console.log(
250280
chalk.grey(
251281
`Fixed image ${chalk.white.bold(flaw.src)} to ${chalk.white.bold(
252-
flaw.suggestion
282+
newSrc
253283
)}`
254284
)
255285
);

build/index.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
const childProcess = require("child_process");
2+
23
const chalk = require("chalk");
3-
const PACKAGE_REPOSITORY_URL = require("../package.json").repository;
44

5+
const PACKAGE_REPOSITORY_URL = require("../package.json").repository;
56
const { buildURL, Document } = require("../content");
67
const kumascript = require("../kumascript");
78

@@ -198,7 +199,12 @@ async function buildDocument(document, documentOptions = {}) {
198199

199200
// If fixFlaws is on and the doc has fixable flaws, this returned
200201
// raw HTML string will be different.
201-
fixFixableFlaws(doc, options, document);
202+
try {
203+
await fixFixableFlaws(doc, options, document);
204+
} catch (error) {
205+
console.error(error);
206+
throw error;
207+
}
202208

203209
// Apply syntax highlighting all <pre> tags.
204210
syntaxHighlight($, doc);

client/src/document/toolbar/flaws.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ function Flaws({ doc, flaws }: { doc: Doc; flaws: FlawCount[] }) {
174174
const fixableFlaws = Object.values(doc.flaws)
175175
.map((flaws) => {
176176
return flaws.filter(
177-
(flaw) => !flaw.fixed && (flaw.suggestion || flaw.fixable)
177+
(flaw) =>
178+
!flaw.fixed && (flaw.suggestion || flaw.fixable || flaw.externalImage)
178179
);
179180
})
180181
.flat();
@@ -611,7 +612,8 @@ function Images({
611612
>
612613
line {flaw.line}:{flaw.column}
613614
</a>{" "}
614-
{flaw.fixable && <FixableFlawBadge />} <br />
615+
{(flaw.fixable || flaw.externalImage) && <FixableFlawBadge />}{" "}
616+
<br />
615617
{flaw.suggestion && (
616618
<small>
617619
<b>Suggestion:</b>

client/src/document/types.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export interface GenericFlaw {
99
suggestion: string | null;
1010
fixable?: boolean;
1111
fixed?: true;
12+
externalImage?: boolean;
1213
}
1314

1415
export interface BrokenLink extends GenericFlaw {

content/document.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,17 @@ function urlToFolderPath(url) {
7979
}
8080

8181
function create(html, metadata) {
82-
const folderPath = buildPath(
83-
path.join(CONTENT_ROOT, metadata.locale),
84-
metadata.slug
85-
);
82+
const folderPath = getFolderPath(metadata);
8683

8784
fs.mkdirSync(folderPath, { recursive: true });
8885

8986
saveHTMLFile(getHTMLPath(folderPath), trimLineEndings(html), metadata);
9087
}
9188

89+
function getFolderPath(metadata) {
90+
return buildPath(path.join(CONTENT_ROOT, metadata.locale), metadata.slug);
91+
}
92+
9293
function archive(renderedHTML, rawHTML, metadata, wikiHistory) {
9394
if (!CONTENT_ARCHIVE_ROOT) {
9495
throw new Error("Can't archive when CONTENT_ARCHIVE_ROOT is not set");
@@ -284,6 +285,7 @@ module.exports = {
284285
update,
285286
del,
286287
urlToFolderPath,
288+
getFolderPath,
287289

288290
findByURL,
289291
findAll,

content/files/en-us/web/html/element/meta/name/theme-color/index.html

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
---
22
title: theme-color
33
slug: Web/HTML/Element/meta/name/theme-color
4-
summary: >-
5-
The theme-color value for the name attribute of the meta element indicates a
6-
suggested color that user agents should use to customize the display of the
7-
page or of the surrounding user interface. If specified, the content attribute
8-
must contain a valid CSS color.
94
tags:
105
- Attribute
116
- HTML
@@ -23,7 +18,7 @@ <h2 id="Example">Example</h2>
2318

2419
<p>The following image shows the effect that the {{htmlelement("meta")}} element above will have on a document displayed in Chrome running on an Android mobile device.</p>
2520

26-
<figure><img alt="Image showing the effect of using theme-color" src="https://mdn.mozillademos.org/files/17199/theme-color.png" style="height: 686px; width: 894px;">
21+
<figure><img alt="Image showing the effect of using theme-color" src="theme-color.png" style="height: 686px; width: 894px;">
2722
<figcaption><small>Image credit: from <a href="https://developers.google.com/web/fundamentals/design-and-ux/browser-customization">Icons &amp; Browser Colors</a>, created and <a href="https://developers.google.com/readme/policies">shared by Google</a> and used according to terms described in the <a href="https://creativecommons.org/licenses/by/4.0/">Creative Commons 4.0 Attribution License</a>.</small></figcaption>
2823
</figure>
2924

31.3 KB
Loading

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
"front-matter": "4.0.2",
4545
"fuzzysearch": "1.0.3",
4646
"glob": "^7.1.6",
47+
"got": "11.5.2",
4748
"ignore-loader": "0.1.2",
4849
"imagemin": "7.0.1",
4950
"imagemin-gifsicle": "7.0.0",
@@ -89,7 +90,6 @@
8990
"extend": "^3.0.2",
9091
"foreman": "3.0.1",
9192
"fs-extra": "9.0.1",
92-
"got": "^11.5.2",
9393
"husky": "4.2.5",
9494
"jest": "^24.0.0",
9595
"jest-environment-jsdom-sixteen": "^1.0.3",

server/document.js

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,15 @@ router.put("/fixfixableflaws", withDocument, async (req, res) => {
3232
return res.status(400).send("Can't fix archived documents");
3333
}
3434
// To get the 'doc' we have to find the built art
35-
await buildDocument(req.document, {
36-
fixFlaws: true,
37-
fixFlawsVerbose: true,
38-
});
35+
try {
36+
await buildDocument(req.document, {
37+
fixFlaws: true,
38+
fixFlawsVerbose: true,
39+
});
40+
} catch (error) {
41+
console.error(`Error in buildDocument(${req.document.url})`, error);
42+
return res.status(500).send(error.toString());
43+
}
3944

4045
// Just because you *build* document, doesn't mean it writes the new
4146
// built content to disk. So, if *was* already built, with all the flaws

0 commit comments

Comments
 (0)