-
Notifications
You must be signed in to change notification settings - Fork 1
/
CrawlerRun.php
76 lines (63 loc) · 1.71 KB
/
CrawlerRun.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
<?php
namespace App\Console\Commands;
use App\Observers\Crawler\ConsoleObserver;
use App\Queues\CrawlerCacheQueue;
use Spatie\Crawler\Crawler;
use Illuminate\Console\Command;
use Spatie\Crawler\CrawlProfiles\CrawlInternalUrls;
class CrawlerRun extends Command
{
public int $total_crawled = 0;
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'craw {site}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Prepares and runs the crawler';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return int
*/
public function handle()
{
$queue = null;
$site = $this->argument('site');
if (is_null($queue)) {
$this->info('Preparing a new crawler queue');
$queue = new CrawlerCacheQueue(86400); // one day
}
// Crawler
$this->info('Start crawling');
Crawler::create()
->setParseableMimeTypes(['text/html', 'text/plain'])
->addCrawlObserver(new ConsoleObserver($this))
// ->setCurrentCrawlLimit(200)
->setConcurrency(20)
->setCrawlQueue($queue)
->setCrawlProfile(new CrawlInternalUrls($site))
->startCrawling($site);
$this->alert("Crawled {$this->total_crawled} items");
if ($queue->hasPendingUrls()) {
$this->alert('Has URLs left');
} else {
$this->info('Has no URLs left');
}
return 0;
}
}