From 080b3274abbeaa5869f8d0481fc68088182171cf Mon Sep 17 00:00:00 2001 From: wigy Date: Fri, 10 Apr 2020 16:17:23 +0200 Subject: [PATCH] Initialize crawling with a set of seed nodes - A downloaded peers.json can be used to initialize traversal - All collected information is stored in an output file --- CHANGELOG.md | 4 +- README.md | 25 ++++-- app.js | 45 +++++++---- ark-devnet.json | 16 ++++ ark-mainnet.json | 193 ++++++++++++++++++++++++++++++++++++++++++++++ download.sh | 6 ++ hyd-devnet.json | 30 +++++++ hyd-mainnet.json | 89 +++++++++++++++++++++ package-lock.json | 5 ++ package.json | 1 + src/crawler.js | 108 +++++++++++++------------- 11 files changed, 446 insertions(+), 76 deletions(-) create mode 100644 ark-devnet.json create mode 100644 ark-mainnet.json create mode 100755 download.sh create mode 100644 hyd-devnet.json create mode 100644 hyd-mainnet.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 18357b5..bfe9108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ ## [2.0.0] - 2020-04-07 -- Support for Hydra peers +- Support for Hydra peers and network definition files. +- Storing some extra information about network nodes in a + timestamped output file. ## [1.3.0] - 2020-02-04 diff --git a/README.md b/README.md index 0828d65..d0922b2 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,25 @@ you sponsor them for their hard work. ## Usage -`node . http://:` +- [Hydra mainnet](http://hydra.iop.global/) + + `node . hyd-mainnet.json` -For port use the p2p port, which is 4701 for [Hydra mainnet](http://hydra.iop.global/) or -4702 for [Hydra devnet](http://dev.hydra.iop.global/). +- [Hydra devnet](http://dev.hydra.iop.global/) + + `node . hyd-devnet.json` + +- [ARK mainnet](https://explorer.ark.io/) + + `node . ark-mainnet.json` + +- [ARK devnet](https://dev.explorer.ark.io/) + + `node . ark-devnet.json` + +- If you think the seed nodes for the networks has changed since we downloaded them, just run the download script using `curl` on your system to refresh them from github + + `./download.sh` ## Credits @@ -24,5 +39,5 @@ For port use the p2p port, which is 4701 for [Hydra mainnet](http://hydra.iop.gl ## License -ARK Delegate: [MIT](LICENSE) © roks0n -Hydra patches: [MIT](LICENSE) © Decentralized Society Foundation, Panama +- ARK Delegate: [MIT](LICENSE) © roks0n +- Hydra patches: [MIT](LICENSE) © Decentralized Society Foundation, Panama diff --git a/app.js b/app.js index adae908..e9864c0 100644 --- a/app.js +++ b/app.js @@ -1,9 +1,9 @@ -const Crawler = require('./src/crawler') -const { URL } = require('url') -const { orderBy } = require('lodash/collection') +const { orderBy } = require('lodash') +const fs = require('fs') +const path = require('path') +const moment = require('moment') -const crawler = new Crawler() -const args = process.argv.slice(2) +const Crawler = require('./src/crawler') const report = (crawler) => { const blockStats = {} @@ -16,8 +16,6 @@ const report = (crawler) => { continue } - console.log(JSON.stringify(node, undefined, 2)) - if (blockStats[node.height]) { blockStats[node.height].count += 1 blockStats[node.height].ids[node.id] += 1 @@ -77,14 +75,33 @@ const report = (crawler) => { console.log('------------------------------------------') console.log(`Finished scanning in ${new Date() - crawler.startTime}ms`) - process.exit(0) + return crawler } -const node = { ip: undefined, port: undefined } -if (args.length === 1) { - const url = new URL(args[0]) - node.ip = url.hostname - node.port = url.port +const main = async () => { + try { + const crawler = new Crawler() + const args = process.argv.slice(2) + const outputFilename = `${path.basename(args[0], '.json')}-${moment().format('YYYYMMDD-HHmmss')}.json` + + const inputStr = fs.readFileSync(args[0], { encoding: 'utf-8' }) + let input = JSON.parse(inputStr) + if ('list' in input) { + input = input.list + } + + for (const node of input) { + crawler.add(node) + } + + await crawler.run() + await report(crawler) + + const outputStr = JSON.stringify(Object.values(crawler.nodes), undefined, 2) + fs.writeFileSync(outputFilename, outputStr, { encoding: 'utf-8' }) + } catch (err) { + console.error(err) + } } -crawler.run(node).then(report).catch(err => console.error(err)) +main().then(() => {}) diff --git a/ark-devnet.json b/ark-devnet.json new file mode 100644 index 0000000..c58d7e8 --- /dev/null +++ b/ark-devnet.json @@ -0,0 +1,16 @@ +[{ + "ip": "167.114.29.51", + "port": 4002 +}, { + "ip": "167.114.29.52", + "port": 4002 +}, { + "ip": "167.114.29.53", + "port": 4002 +}, { + "ip": "167.114.29.54", + "port": 4002 +}, { + "ip": "167.114.29.55", + "port": 4002 +}] diff --git a/ark-mainnet.json b/ark-mainnet.json new file mode 100644 index 0000000..c448a3f --- /dev/null +++ b/ark-mainnet.json @@ -0,0 +1,193 @@ +[{ + "ip": "5.196.105.32", + "port": 4001 +}, { + "ip": "5.196.105.33", + "port": 4001 +}, { + "ip": "5.196.105.34", + "port": 4001 +}, { + "ip": "5.196.105.35", + "port": 4001 +}, { + "ip": "5.196.105.36", + "port": 4001 +}, { + "ip": "5.196.105.37", + "port": 4001 +}, { + "ip": "5.196.105.38", + "port": 4001 +}, { + "ip": "5.196.105.39", + "port": 4001 +}, { + "ip": "178.32.65.136", + "port": 4001 +}, { + "ip": "178.32.65.137", + "port": 4001 +}, { + "ip": "178.32.65.138", + "port": 4001 +}, { + "ip": "178.32.65.139", + "port": 4001 +}, { + "ip": "178.32.65.140", + "port": 4001 +}, { + "ip": "178.32.65.141", + "port": 4001 +}, { + "ip": "178.32.65.142", + "port": 4001 +}, { + "ip": "178.32.65.143", + "port": 4001 +}, { + "ip": "5.196.105.40", + "port": 4001 +}, { + "ip": "5.196.105.41", + "port": 4001 +}, { + "ip": "5.196.105.42", + "port": 4001 +}, { + "ip": "5.196.105.43", + "port": 4001 +}, { + "ip": "5.196.105.44", + "port": 4001 +}, { + "ip": "5.196.105.45", + "port": 4001 +}, { + "ip": "5.196.105.46", + "port": 4001 +}, { + "ip": "5.196.105.47", + "port": 4001 +}, { + "ip": "54.38.120.32", + "port": 4001 +}, { + "ip": "54.38.120.33", + "port": 4001 +}, { + "ip": "54.38.120.34", + "port": 4001 +}, { + "ip": "54.38.120.35", + "port": 4001 +}, { + "ip": "54.38.120.36", + "port": 4001 +}, { + "ip": "54.38.120.37", + "port": 4001 +}, { + "ip": "54.38.120.38", + "port": 4001 +}, { + "ip": "54.38.120.39", + "port": 4001 +}, { + "ip": "151.80.125.32", + "port": 4001 +}, { + "ip": "151.80.125.33", + "port": 4001 +}, { + "ip": "151.80.125.34", + "port": 4001 +}, { + "ip": "151.80.125.35", + "port": 4001 +}, { + "ip": "151.80.125.36", + "port": 4001 +}, { + "ip": "151.80.125.37", + "port": 4001 +}, { + "ip": "151.80.125.38", + "port": 4001 +}, { + "ip": "151.80.125.39", + "port": 4001 +}, { + "ip": "213.32.41.104", + "port": 4001 +}, { + "ip": "213.32.41.105", + "port": 4001 +}, { + "ip": "213.32.41.106", + "port": 4001 +}, { + "ip": "213.32.41.107", + "port": 4001 +}, { + "ip": "213.32.41.108", + "port": 4001 +}, { + "ip": "213.32.41.109", + "port": 4001 +}, { + "ip": "213.32.41.110", + "port": 4001 +}, { + "ip": "213.32.41.111", + "port": 4001 +}, { + "ip": "5.135.22.92", + "port": 4001 +}, { + "ip": "5.135.22.93", + "port": 4001 +}, { + "ip": "5.135.22.94", + "port": 4001 +}, { + "ip": "5.135.22.95", + "port": 4001 +}, { + "ip": "5.135.52.96", + "port": 4001 +}, { + "ip": "5.135.52.97", + "port": 4001 +}, { + "ip": "5.135.52.98", + "port": 4001 +}, { + "ip": "5.135.52.99", + "port": 4001 +}, { + "ip": "51.255.105.52", + "port": 4001 +}, { + "ip": "51.255.105.53", + "port": 4001 +}, { + "ip": "51.255.105.54", + "port": 4001 +}, { + "ip": "51.255.105.55", + "port": 4001 +}, { + "ip": "46.105.160.104", + "port": 4001 +}, { + "ip": "46.105.160.105", + "port": 4001 +}, { + "ip": "46.105.160.106", + "port": 4001 +}, { + "ip": "46.105.160.107", + "port": 4001 +}] diff --git a/download.sh b/download.sh new file mode 100755 index 0000000..d9528d3 --- /dev/null +++ b/download.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +curl -L https://github.com/ArkEcosystem/peers/raw/master/mainnet.json -o ark-mainnet.json +curl -L https://github.com/ArkEcosystem/peers/raw/master/devnet.json -o ark-devnet.json +curl -L https://github.com/Internet-of-People/hydra-peers/raw/master/mainnet.json -o hyd-mainnet.json +curl -L https://github.com/Internet-of-People/hydra-peers/raw/master/devnet.json -o hyd-devnet.json diff --git a/hyd-devnet.json b/hyd-devnet.json new file mode 100644 index 0000000..9bef982 --- /dev/null +++ b/hyd-devnet.json @@ -0,0 +1,30 @@ +{ + "list": [ + { + "ip" : "35.204.124.143", + "port": 4702 + }, + { + "ip" : "34.68.118.161", + "port": 4702 + }, + { + "ip" : "35.228.196.114", + "port": 4702 + }, + { + "ip" : "34.87.3.205", + "port": 4702 + }, + { + "ip" : "35.185.32.241", + "port": 4702 + }, + { + "ip" : "35.240.62.119", + "port": 4702 + } + ], + "sources": [] + } + \ No newline at end of file diff --git a/hyd-mainnet.json b/hyd-mainnet.json new file mode 100644 index 0000000..455bf12 --- /dev/null +++ b/hyd-mainnet.json @@ -0,0 +1,89 @@ +{ + "list": [ + { + "ip" : "35.195.150.223", + "port": 4701 + }, + { + "ip" : "34.76.165.50", + "port": 4701 + }, + { + "ip" : "104.155.17.122", + "port": 4701 + }, + { + "ip" : "35.228.202.124", + "port": 4701 + }, + { + "ip" : "35.228.73.165", + "port": 4701 + }, + { + "ip" : "35.198.174.42", + "port": 4701 + }, + { + "ip" : "35.246.135.62", + "port": 4701 + }, + { + "ip" : "34.90.0.113", + "port": 4701 + }, + { + "ip" : "34.90.43.152", + "port": 4701 + }, + { + "ip" : "34.66.138.96", + "port": 4701 + }, + { + "ip" : "104.154.142.55", + "port": 4701 + }, + { + "ip" : "34.74.102.192", + "port": 4701 + }, + { + "ip" : "35.231.24.181", + "port": 4701 + }, + { + "ip" : "35.233.159.123", + "port": 4701 + }, + { + "ip" : "35.230.119.77", + "port": 4701 + }, + { + "ip" : "35.235.109.207", + "port": 4701 + }, + { + "ip" : "34.94.151.12", + "port": 4701 + }, + { + "ip" : "35.203.123.223", + "port": 4701 + }, + { + "ip" : "35.240.251.207", + "port": 4701 + }, + { + "ip" : "35.190.233.247", + "port": 4701 + }, + { + "ip" : "34.93.248.166", + "port": 4701 + } + ], + "sources": [] +} diff --git a/package-lock.json b/package-lock.json index 6ae3261..d046a02 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1033,6 +1033,11 @@ "minimist": "^1.2.5" } }, + "moment": { + "version": "2.24.0", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.24.0.tgz", + "integrity": "sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg==" + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", diff --git a/package.json b/package.json index 9676281..27f3ad8 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ }, "dependencies": { "lodash": "^4.17.15", + "moment": "2.24.0", "socketcluster-client": "^14.2.2" } } diff --git a/src/crawler.js b/src/crawler.js index 9c9deeb..946cdc8 100644 --- a/src/crawler.js +++ b/src/crawler.js @@ -1,8 +1,10 @@ -const { map } = require('lodash') +const { map, sample } = require('lodash') const Connections = require('./peer') +const GET_PEERS_FAILED = -2 +const CONNECTION_FAILED = -1 const NOT_VISITED = 0 -const VISITED = 1 +const GET_PEERS_SUCCESS = 1 let NETWORK_P2P_PORT = null class Crawler { @@ -10,7 +12,7 @@ class Crawler { * Initializes the internal request reactor. * @method constructor */ - constructor (timeout = 2500, disconnect = true, sampleSize = 10) { + constructor (timeout = 2500, disconnect = true) { this.disconnect = disconnect this.request = { data: {}, @@ -18,8 +20,34 @@ class Crawler { 'Content-Type': 'application/json' } } - this.sampleSize = sampleSize this.connections = new Connections(timeout) + this.nodes = {} + this.heights = [] + this.traversalState = {} + } + + add (peer) { + if (!NETWORK_P2P_PORT) { + NETWORK_P2P_PORT = peer.port + } else { + if (NETWORK_P2P_PORT !== peer.port) { + console.error(`${peer.ip} has p2p port at ${peer.port} instead of ${NETWORK_P2P_PORT}`) + } + } + + if (!(peer.ip in this.nodes)) { + this.nodes[peer.ip] = peer + } else { + Object.assign(this.nodes[peer.ip], peer) + } + + if (!(peer.ip in this.traversalState)) { + this.traversalState[peer.ip] = NOT_VISITED + } + + if (!this.connections.get(peer.ip)) { + this.connections.add(peer.ip, peer.port) + } } /** @@ -28,21 +56,16 @@ class Crawler { * @param {object} peer {ip: [address], port: [4001]} * @return {Promise} */ - async run (peer) { - this.nodes = {} - this.heights = [] - this.traversalState = {} + async run () { this.startTime = new Date() - NETWORK_P2P_PORT = peer.port - - if (!this.connections.get(peer.ip)) { - this.connections.add(peer.ip, NETWORK_P2P_PORT) - } - try { console.log('... discovering network peers') - await this.discoverPeers(peer) + while (true) { + const unvisitedIp = sample(Object.keys(this.traversalState).filter(ip => this.traversalState[ip] === NOT_VISITED)) + if (!unvisitedIp) break + await this.discoverPeers(unvisitedIp) + } console.log('... scanning network') await this.scanNetwork() if (this.disconnect) { @@ -51,58 +74,36 @@ class Crawler { } } catch (err) { console.error(err) + } finally { + this.endTime = new Date() } - - return this } - async discoverPeers (currentNode) { + async discoverPeers (ip) { return new Promise((resolve, reject) => { - const connection = this.connections.get(currentNode.ip) + const connection = this.connections.get(ip) if (!connection) { - reject(new Error(`No connection exists for ${currentNode.ip}:${currentNode.port}`)) + console.error(`No connection exists for ${ip}`) + this.traversalState[ip] = CONNECTION_FAILED + return resolve() } connection.emit( 'p2p.peer.getPeers', this.request, (err, response) => { if (err) { - console.error(`Error when calling p2p.peer.getPeers on ${currentNode.ip}: ${err}`) + console.error(`Error when calling p2p.peer.getPeers on ${ip}: ${err}`) + this.traversalState[ip] = GET_PEERS_FAILED return resolve() } - if (currentNode.ip in this.traversalState) { - this.traversalState[currentNode.ip] = VISITED - } + this.traversalState[ip] = GET_PEERS_SUCCESS response.data.map((peer) => { - if (!(peer.ip in this.nodes)) { - this.nodes[peer.ip] = peer - } - - if (!this.connections.get(peer.ip)) { - this.connections.add(peer.ip, NETWORK_P2P_PORT) - } + this.add({ port: NETWORK_P2P_PORT, ...peer }) }) - if (this.traversalState[currentNode.ip] === VISITED) { - return resolve() - } - - // note: this is not very efficient on large arrays - const samplePeers = response.data - .filter(p => this.traversalState[p.ip] !== VISITED) - .filter(a => a.ip !== currentNode.ip) - .map(x => ({ x, r: Math.random() })) - .sort((a, b) => a.r - b.r) - .map(a => a.x) - .slice(0, this.sampleSize) - const discoverPeers = samplePeers - .map((peer) => { - this.traversalState[peer.ip] = NOT_VISITED - return this.discoverPeers(peer) - }) - Promise.all(discoverPeers).then(resolve) + return resolve() } ) }) @@ -128,13 +129,8 @@ class Crawler { id: response.data.state.header.id } this.heights.push(block) - if (peer.height !== block.height) { - console.log(peer.ip + ' heights: ' + peer.height + '<>' + block.height) - } - Object.assign(peer, response.data.config); - Object.assign(peer, block); - // peer.height = block.height - // peer.id = block.id + Object.assign(peer, response.data) + Object.assign(peer, block) return resolve() } )