-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscraper.js
107 lines (94 loc) · 2.98 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
// Generated by CoffeeScript 1.8.0
(function() {
var Promise, Scraper, fs, request, scraper;
request = require("request-promise");
Promise = require("bluebird");
fs = require("fs");
Scraper = (function() {
function Scraper() {}
Scraper.prototype._endpoint = "https://api.justwatch.com";
Scraper.prototype._popular = function(locale) {
var item_count, items_list, page_size, _i, _ref, _results;
page_size = 200;
item_count = 1000;
items_list = [];
return Promise.each((function() {
_results = [];
for (var _i = 0, _ref = item_count / page_size; 0 <= _ref ? _i < _ref : _i > _ref; 0 <= _ref ? _i++ : _i--){ _results.push(_i); }
return _results;
}).apply(this), (function(_this) {
return function(i) {
i += 1;
console.log("* Downloading titles " + (page_size * i) + "/" + item_count);
return request({
uri: "" + _this._endpoint + "/titles/" + locale + "/popular",
method: 'POST',
gzip: true,
json: true,
body: {
content_types: ["show", "movie"],
page: i,
page_size: page_size
}
}).then(function(data) {
return items_list = items_list.concat(data.items);
});
};
})(this)).then(function() {
return items_list;
});
};
Scraper.prototype._show = function(id, locale) {
var url;
console.log("* Downloading show data for ID: #" + id);
url = "" + this._endpoint + "/titles/show/" + id + "/locale/" + locale;
return request.get({
uri: url,
json: true
});
};
Scraper.prototype.scrape = function(locale) {
if (locale == null) {
locale = "en_US";
}
return this._popular(locale).then((function(_this) {
return function(titles) {
return Promise.all(titles.map(function(title) {
var data;
data = {
title: title.title,
year: title.original_release_year,
type: title.object_type
};
if (data.type === "movie") {
return data;
}
return _this._show(title.id, locale).then(function(show) {
data.season_count = show.seasons.length;
return data;
});
}));
};
})(this)).then(function(titles) {
return {
titles: titles,
title_count: titles.length,
locale: locale
};
});
};
return Scraper;
})();
module.exports = Scraper;
if (require.main === module) {
scraper = new Scraper();
scraper.scrape(process.argv[2]).then(function(response) {
var data, filePath;
filePath = "" + __dirname + "/output.txt";
data = JSON.stringify(response);
return fs.writeFile(filePath, data, function() {
return console.log("* File saved to " + filePath);
});
});
}
}).call(this);