Skip to content

Commit

Permalink
建立索引时,添加过滤停止词功能
Browse files Browse the repository at this point in the history
  • Loading branch information
vanry committed Apr 26, 2018
1 parent e77b715 commit 8007160
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 2 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ php artisan vendor:publish --provider="Laravel\Scout\ScoutServiceProvider"
'duality' => false,
],
],

'stopwords' => [
'的',
'了',
'而是',
],
],

```
Expand Down
6 changes: 6 additions & 0 deletions config/scout.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,11 @@
'duality' => false,
],
],

'stopwords' => [
'',
'',
'而是',
],
],
];
4 changes: 4 additions & 0 deletions src/Engines/TNTSearchEngine.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ public function update($models)
$index->setTokenizer($this->tnt->tokenizer);
$index->setPrimaryKey($models->first()->getKeyName());

if (isset($this->tnt->config['stopwords'])) {
$index->setStopWords((array) $this->tnt->config['stopwords']);
}

$index->indexBeginTransaction();
$models->each(function ($model) use ($index) {
$array = $model->toSearchableArray();
Expand Down
6 changes: 4 additions & 2 deletions src/Tokenizers/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

abstract class Tokenizer implements TokenizerInterface
{
public function tokenize($text)
public function tokenize($text, $stopwords = [])
{
return is_numeric($text) ? [] : $this->getTokens($text);
$tokens = is_numeric($text) ? [] : $this->getTokens($text);

return array_diff($tokens, $stopwords);
}

abstract protected function getTokens($text);
Expand Down

0 comments on commit 8007160

Please sign in to comment.