-
Notifications
You must be signed in to change notification settings - Fork 2
/
Reddit Comments Scrape.user.js
52 lines (46 loc) · 1.87 KB
/
Reddit Comments Scrape.user.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
document.getElementByXPath = function(sValue) {
var a = this.evaluate(sValue, this, null, XPathResult
.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};
document.getElementsByXPath = function(sValue) {
var aResult = new Array();
var a = this.evaluate(sValue, this, null, XPathResult
.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < a.snapshotLength; i++) {
aResult.push(a.snapshotItem(i));
}
return aResult;
};
var comments = document.getElementsByXPath(
"//div[contains(@class, 'entry')]//form//div[contains(@class, 'usertext')]"
);
var taglines = document.getElementsByXPath(
"(//div[contains(@class, 'entry')]//form//div[contains(@class, 'usertext-body')])//..//../p[@class='tagline']"
);
var permalinks = document.getElementsByXPath(
"(//div[contains(@class, 'entry')]//form//div[contains(@class, 'usertext-body')])//following::a[text() = 'permalink']"
);
var scores = document.getElementsByXPath(
"(//div[contains(@class, 'entry')]//form//div[contains(@class, 'usertext-body')])//..//../p[@class='tagline']/span[contains(@class,'score unvoted')]"
);
function saveText(filename, text) {
var tempElem = document.createElement('a');
tempElem.setAttribute('href', 'data:text/plain;charset=utf-8,' +
encodeURIComponent(text));
tempElem.setAttribute('download', filename);
tempElem.click();
console.log(filename + ' File downloaded');
}
var textData = "";
for (var i = 0; i < permalinks.length; i++) {
textData += taglines[i].children[1].href + " | " + permalinks[i].href +
" | " + scores[i].textContent.split(" ")[0] + " | " + comments[i]
.textContent.trim() + " | " + "\n\n";
console.log(textData);
}
if (textData.length > 10) {
saveText("Reddit.txt", textData)
}