Skip to content

Commit 5935560

Browse files
committed
Use CDP Page.setDownloadBehavior and headless for demo
1 parent 850be1f commit 5935560

File tree

2 files changed

+214
-83
lines changed

2 files changed

+214
-83
lines changed

verify_download.js

Lines changed: 46 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828
* node verify_download.js
2929
*/
3030

31-
const puppeteer = require('puppeteer');
3231
const fs = require('fs');
33-
const path = require('path');
3432
const os = require('os');
33+
const path = require('path');
34+
const puppeteer = require('puppeteer');
3535

36-
const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`;
36+
const DOWNLOAD_PATH = path.resolve(__dirname, 'downloads');
3737

3838
/**
3939
* From @xprudhomme.
@@ -43,9 +43,9 @@ const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`;
4343
* @param {string} filePath
4444
* @param {integer} timeout
4545
* @returns {!Promise<undefined>} Resolves when file has been created. Rejects
46-
* if timout is reached.
46+
* if timeout is reached.
4747
*/
48-
function checkFileExists(filePath, timeout=15000) {
48+
function waitForFileExists(filePath, timeout=15000) {
4949
return new Promise((resolve, reject) => {
5050
const dir = path.dirname(filePath);
5151
const basename = path.basename(filePath);
@@ -73,96 +73,59 @@ function checkFileExists(filePath, timeout=15000) {
7373
});
7474
}
7575

76-
/**
77-
* @param {!Browser} browser
78-
* @param {string} url The URL of the download file to wait for.
79-
* @returns {!Promise<!Object>} Metadata about the latest file in Download Manager.
80-
*/
81-
async function waitForFileToDownload(browser, url) {
82-
const downloadPage = await browser.newPage();
83-
// Note: navigating to this page only works in headful chrome.
84-
await downloadPage.goto('chrome://downloads/');
85-
86-
// Wait for our download to show up in the list by matching on its url.
87-
const jsHandle = await downloadPage.waitForFunction(downloadUrl => {
88-
const manager = document.querySelector('downloads-manager');
89-
const downloads = manager.items_.length;
90-
const lastDownload = manager.items_[0];
91-
if (downloads && lastDownload.url === downloadUrl &&
92-
lastDownload.state === 'COMPLETE') {
93-
return manager.items_[0];
94-
}
95-
}, {polling: 100}, url);
96-
97-
const fileMeta = await jsHandle.jsonValue();
98-
99-
await downloadPage.close();
100-
101-
return fileMeta;
102-
}
103-
104-
/**
105-
* @param {!Browser} browser
106-
* @param {string} url The url of the page to navigate to.
107-
* @param {string} text The link with this text to find and click on the page.
108-
* @returns {!Promise<?string>} The download resource's url.
109-
*/
110-
async function clickDownloadLink(browser, url, text) {
111-
const page = await browser.newPage();
112-
await page.goto(url, {waitUntil: 'networkidle2'});
113-
114-
const downloadUrl = await page.evaluate((text) => {
115-
const link = document.evaluate(`//a[text()="${text}"]`, document,
116-
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
117-
if (link) {
118-
link.click();
119-
return link.href;
120-
}
121-
return null;
122-
}, text);
123-
124-
await page.close();
125-
126-
return downloadUrl;
127-
}
128-
12976
(async() => {
13077

131-
const browser = await puppeteer.launch({
132-
headless: false,
133-
// dumpio: true,
134-
});
78+
const browser = await puppeteer.launch();
13579

136-
// TODO: setDownloadBehavior would be a good approach, as we could check
137-
// that the file shows up in the location specified by downloadPath. Howeverm
138-
// that arg doesn't currently work.
139-
// const client = await page.target().createCDPSession();
140-
// await client.send('Page.setDownloadBehavior', {
141-
// behavior: 'allow',
142-
// downloadPath: path.resolve(__dirname, 'downloads'),
143-
// });
80+
const page = await browser.newPage();
14481

145-
// await client.detach();
82+
// Change from the default ~/Downloads folder to our own.
83+
const client = await page.target().createCDPSession();
84+
await client.send('Page.setDownloadBehavior', {
85+
behavior: 'allow',
86+
downloadPath: DOWNLOAD_PATH,
87+
});
14688

147-
// 1. navigate to a page with a bunch links to download.
148-
// 2. click the "Short Selling (csv)" link on the page. The browser force downloads the file.
14989
const url = 'https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm';
150-
const downloadUrl = await clickDownloadLink(browser, url, 'Short Selling (csv)');
90+
await page.goto(url);
91+
// Wait for main content area to have list of links.
92+
await page.waitForSelector('.main_content', {visible: true, timeout: 5000});
93+
94+
const downloadUrl = await page.evaluate(() => {
95+
const link = document.evaluate(`//a[text()="Short Selling (csv)"]`, document,
96+
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
97+
if (link) {
98+
// Prevent link from opening up in a new tab. Puppeteer won't respect
99+
// the Page.setDownloadBehavior on the new tab and the file ends up in the
100+
// default download folder.
101+
link.target = '';
102+
link.click();
103+
return link.href;
104+
}
105+
return null;
106+
});
151107

152108
if (!downloadUrl) {
153-
console.error('Did not find download link!');
109+
console.warn('Did not find link to download!');
110+
await browser.close();
154111
return;
155112
}
156113

157-
// 3. Open chrome:downloads and wait for the file to be downloaded.
158-
const fileMeta = await waitForFileToDownload(browser, downloadUrl);
159-
console.log(`"${fileMeta.file_name}" was downloaded`);
114+
// Wait for file response to complete.
115+
await new Promise(resolve => {
116+
page.on('response', async resp => {
117+
if (resp.url() === downloadUrl) {
118+
resolve();
119+
}
120+
});
121+
});
122+
123+
console.log('Downloaded.');
160124

161-
// 4. Optionally check that the file really ends up in the expected location
162-
// on the filesystem.
163-
const exists = await checkFileExists(`${DOWNLOADS_FOLDER}/${fileMeta.file_name}`);
164-
console.assert(exists, `${fileMeta.file_name} was not downloaded to correct location.`);
125+
// Verify it's on the file system.
126+
await waitForFileExists(`${DOWNLOAD_PATH}/ShortSelling.csv`);
127+
console.log('Exists!');
165128

166129
await browser.close();
167130

168-
})();
131+
})();

verify_download2.js

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/**
2+
* Copyright 2018 Google Inc. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
* @author ebidel@ (Eric Bidelman)
17+
*/
18+
19+
/**
20+
* Note: this approach only works in headful Chrome.
21+
* Another approach to verifying a file gets downloaded. Shows how to click a
22+
* file download link and verify that the file gets downloaded in the
23+
* chrome:downloads page.
24+
*
25+
* Install:
26+
* npm i puppeteer
27+
* Run:
28+
* node verify_download2.js
29+
*/
30+
31+
const puppeteer = require('puppeteer');
32+
const fs = require('fs');
33+
const path = require('path');
34+
const os = require('os');
35+
36+
const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`;
37+
38+
/**
39+
* From @xprudhomme.
40+
* Check if file exists, watching containing directory meanwhile.
41+
* Resolve if the file exists, or if the file is created before the timeout
42+
* occurs.
43+
* @param {string} filePath
44+
* @param {integer} timeout
45+
* @returns {!Promise<undefined>} Resolves when file has been created. Rejects
46+
* if timout is reached.
47+
*/
48+
function checkFileExists(filePath, timeout=15000) {
49+
return new Promise((resolve, reject) => {
50+
const dir = path.dirname(filePath);
51+
const basename = path.basename(filePath);
52+
53+
const watcher = fs.watch(dir, (eventType, filename) => {
54+
if (eventType === 'rename' && filename === basename) {
55+
clearTimeout(timer);
56+
watcher.close();
57+
resolve();
58+
}
59+
});
60+
61+
const timer = setTimeout(() => {
62+
watcher.close();
63+
reject(new Error(' [checkFileExists] File does not exist, and was not created during the timeout delay.'));
64+
}, timeout);
65+
66+
fs.access(filePath, fs.constants.R_OK, err => {
67+
if (!err) {
68+
clearTimeout(timer);
69+
watcher.close();
70+
resolve();
71+
}
72+
});
73+
});
74+
}
75+
76+
/**
77+
* @param {!Browser} browser
78+
* @param {string} url The URL of the download file to wait for.
79+
* @returns {!Promise<!Object>} Metadata about the latest file in Download Manager.
80+
*/
81+
async function waitForFileToDownload(browser, url) {
82+
const downloadPage = await browser.newPage();
83+
// Note: navigating to this page only works in headful chrome.
84+
await downloadPage.goto('chrome://downloads/');
85+
86+
// Wait for our download to show up in the list by matching on its url.
87+
const jsHandle = await downloadPage.waitForFunction(downloadUrl => {
88+
const manager = document.querySelector('downloads-manager');
89+
const downloads = manager.items_.length;
90+
const lastDownload = manager.items_[0];
91+
if (downloads && lastDownload.url === downloadUrl &&
92+
lastDownload.state === 'COMPLETE') {
93+
return manager.items_[0];
94+
}
95+
}, {polling: 100}, url);
96+
97+
const fileMeta = await jsHandle.jsonValue();
98+
99+
await downloadPage.close();
100+
101+
return fileMeta;
102+
}
103+
104+
/**
105+
* @param {!Browser} browser
106+
* @param {string} url The url of the page to navigate to.
107+
* @param {string} text The link with this text to find and click on the page.
108+
* @returns {!Promise<?string>} The download resource's url.
109+
*/
110+
async function clickDownloadLink(browser, url, text) {
111+
const page = await browser.newPage();
112+
await page.goto(url, {waitUntil: 'networkidle2'});
113+
114+
const downloadUrl = await page.evaluate((text) => {
115+
const link = document.evaluate(`//a[text()="${text}"]`, document,
116+
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
117+
if (link) {
118+
link.click();
119+
return link.href;
120+
}
121+
return null;
122+
}, text);
123+
124+
await page.close();
125+
126+
return downloadUrl;
127+
}
128+
129+
(async() => {
130+
131+
const browser = await puppeteer.launch({
132+
headless: false,
133+
// dumpio: true,
134+
});
135+
136+
// TODO: setDownloadBehavior would be a good approach, as we could check
137+
// that the file shows up in the location specified by downloadPath. However,
138+
// that arg doesn't currently work.
139+
// const client = await page.target().createCDPSession();
140+
// await client.send('Page.setDownloadBehavior', {
141+
// behavior: 'allow',
142+
// downloadPath: path.resolve(__dirname, 'downloads'),
143+
// });
144+
145+
// await client.detach();
146+
147+
// 1. navigate to a page with a bunch links to download.
148+
// 2. click the "Short Selling (csv)" link on the page. The browser force downloads the file.
149+
const url = 'https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm';
150+
const downloadUrl = await clickDownloadLink(browser, url, 'Short Selling (csv)');
151+
152+
if (!downloadUrl) {
153+
console.error('Did not find download link!');
154+
return;
155+
}
156+
157+
// 3. Open chrome:downloads and wait for the file to be downloaded.
158+
const fileMeta = await waitForFileToDownload(browser, downloadUrl);
159+
console.log(`"${fileMeta.file_name}" was downloaded`);
160+
161+
// 4. Optionally check that the file really ends up in the expected location
162+
// on the filesystem.
163+
const exists = await checkFileExists(`${DOWNLOADS_FOLDER}/${fileMeta.file_name}`);
164+
console.assert(exists, `${fileMeta.file_name} was not downloaded to correct location.`);
165+
166+
await browser.close();
167+
168+
})();

0 commit comments

Comments
 (0)