Skip to content

Commit

Permalink
#13 Add command to remove manuals from index by given constraints
Browse files Browse the repository at this point in the history
This commit adds a new command `docsearch:index:delete`, which
allows for the deletion of all or selected manuals from the
Elasticsearch index.

It accepts the following options:

- --manual-slug: The slug of the manual to be removed from the index
- --manual-version: The version of the manual to be removed from the index
- --manual-type: The type of the manual to be removed from the index
- --manual-language: The language of the manual to be removed from the index

To execute the command, type:

`bin/console docsearch:index:delete`

To remove, for example, a specific manual version, type:

`bin/console docsearch:index:delete --manual-version=12.4`

Multiple options can be combined for more targeted deletions.

Resolves issue: #13
  • Loading branch information
Marcin Sągol committed Dec 18, 2023
1 parent 91b5951 commit 19c0bf2
Show file tree
Hide file tree
Showing 8 changed files with 527 additions and 2 deletions.
22 changes: 22 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ Removing index to start fresh
If you want to start with fresh Elasticsearch index locally, you can use chrome extensions
like `Elasticvue` to clear/drop Elasticsearch index if necessary.

Removing selected manuals from index
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If you want to remove selected manuals from index, you can use chrome extensions or the command `docsearch:index:delete`.

.. code-block:: bash
--manual-slug - slug of the manual to remove from index
--manual-version - version of the manual to remove from index
--manual-type - type of the manual to remove from index
--manual-language - language of the manual to remove from index
execute it with:

.. code-block:: bash
ddev exec ./bin/console docsearch:index:delete --manual-slug= --manual-version=9.5 --manual-type=Extension --manual-language=en-us
.. note::
If you set the ``--manual-version`` option, manuals with this version will be updated by removing
selected version from the list, and if this version was the last one, only then the whole manual will be removed.

Indexing Core changelog
^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
73 changes: 73 additions & 0 deletions src/Command/IndexCleaner.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php

namespace App\Command;

use App\Dto\Constraints;
use App\Repository\ElasticRepository;
use LogicException;
use RuntimeException;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Exception\InvalidArgumentException;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\Stopwatch\Stopwatch;

#[AsCommand(name: 'docsearch:index:delete', description: 'Removes from index manuals by given constraints')]
class IndexCleaner extends Command
{
public function __construct(private readonly ElasticRepository $elasticRepository)
{
parent::__construct();
}

/**
* @throws InvalidArgumentException
*/
protected function configure(): void
{
$this->addOption('manual-slug', 'ms', InputArgument::OPTIONAL, 'Manula path', '');
$this->addOption('manual-version', 'mv', InputArgument::OPTIONAL, 'Manual version', '');
$this->addOption('manual-type', 'mt', InputArgument::OPTIONAL, 'Manual type', '');
$this->addOption('manual-language', 'ml', InputArgument::OPTIONAL, 'Manual language', '');
}

/**
* @param InputInterface $input
* @param OutputInterface $output
* @return int
* @throws LogicException
* @throws RuntimeException
* @throws \InvalidArgumentException
*/
protected function execute(InputInterface $input, OutputInterface $output): int
{
$timer = new Stopwatch();
$timer->start('importer');

$io = new SymfonyStyle($input, $output);
$io->title('Removing from index documents by provided criteria');

$constraints = new Constraints(
$input->getOption('manual-slug'),
$input->getOption('manual-version'),
$input->getOption('manual-type'),
$input->getOption('manual-language')
);

$deletedManualsCount = $this->elasticRepository->deleteByConstraints($constraints);

$totalTime = $timer->stop('importer');
$io->info('Finished after ' . $this->formatMilliseconds($totalTime->getDuration()) . '. Total of ' . $deletedManualsCount . ' manuals were removed.');

return Command::SUCCESS;
}

private function formatMilliseconds(int $milliseconds): string
{
$t = intdiv($milliseconds, 1000);
return sprintf('%02d:%02d:%02d', (int)($t / 3600), (int)($t / 60) % 60, $t % 60);
}
}
36 changes: 36 additions & 0 deletions src/Dto/Constraints.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php

declare(strict_types=1);

namespace App\Dto;

readonly class Constraints
{
public function __construct(
private string $slug = '',
private string $version = '',
private string $type = '',
private string $language = ''
) {
}

public function getSlug(): string
{
return $this->slug;
}

public function getVersion(): string
{
return $this->version;
}

public function getType(): string
{
return $this->type;
}

public function getLanguage(): string
{
return $this->language;
}
}
34 changes: 34 additions & 0 deletions src/QueryBuilder/ElasticQueryBuilder.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?php

declare(strict_types=1);

namespace App\QueryBuilder;

use App\Dto\Constraints;
use Elastica\Query;

class ElasticQueryBuilder
{
public function buildQuery(Constraints $constraints): Query
{
$query = ['bool' => ['must' => []]];

if ($constraints->getSlug() !== '') {
$query['bool']['must'][] = ['match' => ['manual_slug' => $constraints->getSlug()]];
}

if ($constraints->getVersion() !== '') {
$query['bool']['must'][] = ['match' => ['manual_version' => $constraints->getVersion()]];
}

if ($constraints->getType() !== '') {
$query['bool']['must'][] = ['match' => ['manual_type' => $constraints->getType()]];
}

if ($constraints->getLanguage() !== '') {
$query['bool']['must'][] = ['match' => ['manual_language' => $constraints->getLanguage()]];
}

return new Query(['query' => $query]);
}
}
32 changes: 30 additions & 2 deletions src/Repository/ElasticRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

namespace App\Repository;

use App\Dto\Constraints;
use App\Dto\Manual;
use App\Dto\SearchDemand;
use App\QueryBuilder\ElasticQueryBuilder;
use Elastica\Aggregation\Terms;
use Elastica\Client;
use Elastica\Exception\InvalidException;
Expand Down Expand Up @@ -33,7 +35,7 @@ class ElasticRepository

private readonly Client $elasticClient;

public function __construct()
public function __construct(private readonly ElasticQueryBuilder $elasticQueryBuilder)
{
$elasticConfig = $this->getElasticSearchConfig();

Expand Down Expand Up @@ -116,6 +118,28 @@ public function deleteByManual(Manual $manual): void
$this->elasticIndex->updateByQuery($deleteQuery, $script);
}

/**
* @return int Number of deleted documents
*/
public function deleteByConstraints(Constraints $constraints): int
{
$query = $this->elasticQueryBuilder->buildQuery($constraints);

// If a specific manual version is provided, the goal is to remove only this version from
// all associated snippets. In such cases, an update query is used instead of delete.
// This approach ensures that if a snippet has no other versions remaining after the
// removal of the specified one, the entire snippet is deleted. This deletion is
// accomplished by setting ctx.op to "delete" in the provided script.
if ($constraints->getVersion()) {
$script = new Script($this->getDeleteQueryScript(), ['manual_version' => $constraints->getVersion()], AbstractScript::LANG_PAINLESS);
$response = $this->elasticIndex->updateByQuery($query, $script, ['wait_for_completion' => true]);
} else {
$response = $this->elasticIndex->deleteByQuery($query, ['wait_for_completion' => true]);
}

return $response->getData()['total'];
}

/**
* Provide elasticsearch script which removes version (provided in params) from a snippet
* and if this is the last version assigned to snippet, it deletes the snippet from index (by setting ctx.op).
Expand All @@ -126,7 +150,11 @@ protected function getDeleteQueryScript(): string
{
$script = <<<EOD
if (ctx._source.manual_version.contains(params.manual_version)) {
ctx._source.manual_version.remove(ctx._source.manual_version.indexOf(params.manual_version));
for (int i=ctx._source.manual_version.length-1; i>=0; i--) {
if (ctx._source.manual_version[i] == params.manual_version) {
ctx._source.manual_version.remove(i);
}
}
}
if (ctx._source.manual_version.size() == 0) {
ctx.op = "delete";
Expand Down
74 changes: 74 additions & 0 deletions tests/Unit/Command/IndexCleanerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

namespace App\Tests\Unit\Command;

use App\Command\IndexCleaner;
use App\Repository\ElasticRepository;
use PHPUnit\Framework\TestCase;
use Prophecy\Argument;
use Prophecy\PhpUnit\ProphecyTrait;
use Symfony\Component\Console\Tester\CommandTester;

class IndexCleanerTest extends TestCase
{
use ProphecyTrait;

public function usesConstraintsToPerformDeleteQueryDataProvider(): array
{
return [
'All options' => [
[
'--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us',
'--manual-version' => '12.4',
'--manual-type' => 'TYPO3 Manual',
'--manual-language' => 'en-us'
],
10
],
'Some options' => [
[
'--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us',
'--manual-type' => 'TYPO3 Manual',
],
8
],
'Only path' => [
['--manual-path' => 'm/typo3/reference-coreapi/12.4/en-us'],
5
],
'Only version' => [
['--manual-version' => '12.4'],
3
],
'Only type' => [
['--manual-type' => 'TYPO3 Manual'],
2
],
'Only language' => [
['--manual-type' => 'en-us'],
1
],
];
}

/**
* @test
* @dataProvider usesConstraintsToPerformDeleteQueryDataProvider
*/
public function usesConstraintsToPerformDeleteQuery(array $options, int $expectedDeletions): void
{
$elasticRepositoryProphecy = $this->prophesize(ElasticRepository::class);
$elasticRepositoryProphecy
->deleteByConstraints(Argument::type('App\Dto\Constraints'))
->shouldBeCalledTimes(1)
->willReturn($expectedDeletions);

$command = new IndexCleaner($elasticRepositoryProphecy->reveal());
$commandTester = new CommandTester($command);
$commandTester->execute($options);
$output = $commandTester->getDisplay(true);

$this->assertSame(0, $commandTester->getStatusCode());
$this->assertStringContainsString("Total of $expectedDeletions manuals were removed", $output);
}
}
73 changes: 73 additions & 0 deletions tests/Unit/Dto/ConstraintsTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php

namespace App\Tests\Unit\Dto;

use App\Dto\Constraints;
use PHPUnit\Framework\TestCase;

class ConstraintsTest extends TestCase
{
/**
* @test
*/
public function canBeInstantiatedWithDefaultValues(): void
{
$constraints = new Constraints();

$this->assertInstanceOf(Constraints::class, $constraints);
$this->assertSame('', $constraints->getSlug());
$this->assertSame('', $constraints->getVersion());
$this->assertSame('', $constraints->getType());
$this->assertSame('', $constraints->getLanguage());
}

/**
* @test
*/
public function canBeInstantiatedWithCustomValues(): void
{
$constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us', '12.4', 'TYPO3 Manual', 'en-us');

$this->assertInstanceOf(Constraints::class, $constraints);
$this->assertSame('m/typo3/reference-coreapi/12.4/en-us', $constraints->getSlug());
$this->assertSame('12.4', $constraints->getVersion());
$this->assertSame('TYPO3 Manual', $constraints->getType());
$this->assertSame('en-us', $constraints->getLanguage());
}

/**
* @test
*/
public function getSlugReturnsCorrectValue(): void
{
$constraints = new Constraints('m/typo3/reference-coreapi/12.4/en-us');
$this->assertSame('m/typo3/reference-coreapi/12.4/en-us', $constraints->getSlug());
}

/**
* @test
*/
public function getVersionReturnsCorrectValue(): void
{
$constraints = new Constraints('', '12.4');
$this->assertSame('12.4', $constraints->getVersion());
}

/**
* @test
*/
public function getTypeReturnsCorrectValue(): void
{
$constraints = new Constraints('', '', 'TYPO3 Manual');
$this->assertSame('TYPO3 Manual', $constraints->getType());
}

/**
* @test
*/
public function getLanguageReturnsCorrectValue(): void
{
$constraints = new Constraints('', '', '', 'en-us');
$this->assertSame('en-us', $constraints->getLanguage());
}
}
Loading

0 comments on commit 19c0bf2

Please sign in to comment.