-
Notifications
You must be signed in to change notification settings - Fork 1
/
CrawlerQueue.php
82 lines (69 loc) · 2.1 KB
/
CrawlerQueue.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
<?php
namespace App\Models;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\SoftDeletes;
use Mvdnbrk\EloquentExpirable\Expirable;
use Psr\Http\Message\UriInterface;
use Spatie\Crawler\CrawlUrl;
class CrawlerQueue extends Model
{
// @OBS deleted_at = soft delete = processed
use Expirable, SoftDeletes;
protected string $HASH_ALGO = 'sha3-512';
/**
* The "booted" method of the model.
*
* @return void
*/
protected static function booted()
{
static::addGlobalScope('withoutExpired', function (Builder $builder) {
$builder->withoutExpired();
});
}
public function setHtmlAttribute(string $html): void
{
$html = preg_replace('/\R+/', ' ', $html);
$html = json_encode($html);
$this->attributes[ 'html' ] = $html;
}
public function getHtmlAttribute($html)
{
return json_decode($html);
}
/**
*
* @param UriInterface|CrawlUrl $crawlUrl
* @return void
*/
public function setUrlClassAttribute(UriInterface|CrawlUrl $crawlUrl): void
{
$url = (string) $crawlUrl->url;
$this->attributes[ 'url' ] = $url;
$this->attributes[ 'url_hash' ] = hash($this->HASH_ALGO, $url);
$this->attributes[ 'url_class' ] = serialize($crawlUrl);
}
public function getUrlClassAttribute($crawlUrl)
{
return unserialize($crawlUrl, [ 'allowed_classes' => TRUE, ]);
}
/**
* Search by url hash.
*
* @param Builder $query
* @param UriInterface|CrawlUrl|string $crawlUrl
* @return Builder
*/
public function scopeUrl(Builder $query, UriInterface|CrawlUrl|string $crawlUrl): Builder
{
if ($crawlUrl instanceof CrawlUrl) {
$urlString = (string) $crawlUrl->url;
} elseif ($crawlUrl instanceof UriInterface) {
$urlString = (string) $crawlUrl;
} else { // string
$urlString = $crawlUrl;
}
return $query->where('url_hash', hash($this->HASH_ALGO, $urlString));
}
}