-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
74 lines (63 loc) · 2.24 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
if ( global.v8debug) {
global.v8debug.Debug.setBreakOnUncaughtException()
}
process.setMaxListeners(0)
var request = require('request-promise'),
url = require('url'),
CartoDB = require('cartodb'),
askGoogleForAddress = require('./lib/google'),
scrape = require('./lib/newyorker'),
secrets = require('./secrets.js'),
cartodbClient = new CartoDB({user: secrets.USER, api_key: secrets.CARTODB_KEY}),
baseUrl = 'http://www.newyorker.com/magazine/tables-for-two',
requestUrl
// open the connection once
cartodbClient.connect()
function doScrape(url) {
console.log('requesting', url)
request(url)
.then(scrape)
.then(askGoogleForAddress)
.then(function(data) { data.forEach(sendToCarto) })
}
function sendToCarto(incoming) {
var scrapedData = incoming[0],
googleResponse = incoming[1]
if (googleResponse.status !== 'OK') {
if (googleResponse.status === 'OVER_QUERY_LIMIT') {
// wait a couple seconds and try again with the scraped data
setTimeout(function(s) { askGoogleForAddress(s) }.bind(null, scrapedData), 2000)
} else {
console.log(googleResponse)
}
return
}
var results = googleResponse.results[0],
coords = results.geometry.location,
coordsString = coords.lng + ' ' + coords.lat,
// address from google is more complete
address = results.formatted_address,
hasPhoneNumber = scrapedData.address.match(/\((.*)\)/),
phoneNumber = hasPhoneNumber ? hasPhoneNumber[1] : ''
var data = {
phone: phoneNumber,
title: scrapedData.title,
url: scrapedData.href,
address: address,
coords: coordsString,
blurb: scrapedData.blurb,
author: scrapedData.author,
publishDate: scrapedData.publishDate
}
cartodbClient.query("insert into t42 (the_geom, name, address, state, url, blurb, author, publish_date, phone) values(ST_GeomFromText('POINT({coords})', 4326), '{title}', '{address}', 'NY', '{url}', '{blurb}', '{author}', '{publishDate}', '{phone}')", data, function(err, data) {
if (err) { console.log(err) } else { console.log(data) }
})
}
for (var i = 1; i <= 65; i++) {
if (i === 1) {
requestUrl = baseUrl.slice()
} else {
requestUrl = baseUrl.slice() + '/page/' + i
}
setTimeout(doScrape.bind(this, requestUrl), i * 1000)
}