From d8b8d23f7a807868216c29dc2f3bfa14801b1fb4 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Mon, 2 Sep 2024 16:57:31 +0200 Subject: [PATCH 01/14] add console script to sync duplicate places --- app/Console/Command/ImportDuplicatePlaces.php | 124 ++++++++++++++++++ app/Console/ConsoleServiceProvider.php | 12 ++ 2 files changed, 136 insertions(+) create mode 100644 app/Console/Command/ImportDuplicatePlaces.php diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php new file mode 100644 index 0000000000..cfbc728553 --- /dev/null +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -0,0 +1,124 @@ +importDuplicatePlacesProcessor = $importDuplicatePlacesProcessor; + $this->dbalDuplicatePlaceRepository = $dbalDuplicatePlaceRepository; + $this->logger = $logger; + } + + public function configure(): void + { + $this + ->setName('place:duplicate-places:import') + ->setDescription('Import duplicate places from the import tables, set clusters ready for processing') + ->addOption( + self::FORCE, + null, + InputOption::VALUE_NONE, + 'Skip confirmation.' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output): ?int + { + $placesToImport = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeImported(); + $clusterChangeResult = $this->dbalDuplicatePlaceRepository->calculateHowManyClustersHaveChanged(); + + if ($placesToImport === 0) { + $msg = 'Import duplicate places failed. Duplicate_places_import table is empty.'; + $this->logger->error($msg); + $output->writeln(sprintf('%s', $msg)); + return self::FAILURE; + } + + if ($clusterChangeResult->getPercentageClustersToRemove() === 0 && $clusterChangeResult->getPercentageNewClusters() === 0) { + $output->writeln('duplicate_places is already synced'); + return self::SUCCESS; + } + + //@todo This Confirmation message has very clunky language, feel free to improve! + if (!$this->askConfirmation( + $input, + $output, + sprintf( + 'This action will change a total of %d%% new cluster lines, and remove %d%% clusters, continue? [y/N] ', + $clusterChangeResult->getPercentageClustersToRemove(), + $clusterChangeResult->getPercentageNewClusters() + ) + )) { + return self::SUCCESS; + } + + if (($clusterChangeResult->getPercentageNewClusters() > self::MAX_SAFE_CHANGE_LIMIT) + && !$this->askConfirmation( + $input, + $output, + sprintf('%d%% of all clusters are to be changed. Are you sure you want to continue? [y/N] ', $clusterChangeResult->getPercentageNewClusters()) + ) + ) { + return self::SUCCESS; + } + + if (($clusterChangeResult->getPercentageClustersToRemove() > self::MAX_SAFE_CHANGE_LIMIT) + && !$this->askConfirmation( + $input, + $output, + sprintf('%d%% of all clusters will be removed. Are you sure you want to continue? [y/N] ', $clusterChangeResult->getPercentageClustersToRemove()) + ) + ) { + return self::SUCCESS; + } + + // Everything before this was just safety checks, below is the actual code that syncs duplicate places + $this->importDuplicatePlacesProcessor->sync(); + + $output->writeln('Duplicate places are synced. You probably want to run place:process-duplicates to process the clusters now.'); + + return self::SUCCESS; + } + + private function askConfirmation(InputInterface $input, OutputInterface $output, string $message): bool + { + if ($input->getOption(self::FORCE)) { + return true; + } + + return $this + ->getHelper('question') + ->ask( + $input, + $output, + new ConfirmationQuestion( + $message, + true + ) + ); + } +} diff --git a/app/Console/ConsoleServiceProvider.php b/app/Console/ConsoleServiceProvider.php index 8c94dcadb5..1c9d583d95 100644 --- a/app/Console/ConsoleServiceProvider.php +++ b/app/Console/ConsoleServiceProvider.php @@ -25,6 +25,7 @@ use CultuurNet\UDB3\Console\Command\GeocodeEventCommand; use CultuurNet\UDB3\Console\Command\GeocodeOrganizerCommand; use CultuurNet\UDB3\Console\Command\GeocodePlaceCommand; +use CultuurNet\UDB3\Console\Command\ImportDuplicatePlaces; use CultuurNet\UDB3\Console\Command\ImportMovieIdsFromCsv; use CultuurNet\UDB3\Console\Command\ImportOfferAutoClassificationLabels; use CultuurNet\UDB3\Console\Command\IncludeLabel; @@ -59,6 +60,7 @@ use CultuurNet\UDB3\Kinepolis\Trailer\YoutubeTrailerRepository; use CultuurNet\UDB3\Offer\OfferType; use CultuurNet\UDB3\Organizer\WebsiteNormalizer; +use CultuurNet\UDB3\Place\DuplicatePlace\ImportDuplicatePlacesProcessor; use CultuurNet\UDB3\Search\EventsSapi3SearchService; use CultuurNet\UDB3\Search\OrganizersSapi3SearchService; use CultuurNet\UDB3\Search\PlacesSapi3SearchService; @@ -83,6 +85,7 @@ final class ConsoleServiceProvider extends AbstractServiceProvider 'console.fire-projected-to-jsonld-for-relations', 'console.fire-projected-to-jsonld', 'console.place:process-duplicates', + 'console.place:duplicate-places:import', 'console.event:bulk-remove-from-production', 'console.event:reindex-offers-with-popularity', 'console.place:reindex-offers-with-popularity', @@ -258,6 +261,15 @@ function () use ($container) { ) ); + $container->addShared( + 'console.place:duplicate-places:import', + fn () => new ImportDuplicatePlaces( + $container->get('duplicate_place_repository'), + new ImportDuplicatePlacesProcessor($container->get('duplicate_place_repository')), + LoggerFactory::create($container, LoggerName::forCli()) + ) + ); + $container->addShared( 'console.event:bulk-remove-from-production', fn () => new BulkRemoveFromProduction($container->get('event_command_bus')) From 5e19166af7b661713d3fa40ef11a731d8fee2157 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Mon, 2 Sep 2024 16:59:40 +0200 Subject: [PATCH 02/14] Add database query support --- .../DBALDuplicatePlaceRepository.php | 33 +++++++++++++++++ .../Canonical/DuplicatePlaceRepository.php | 4 +++ .../Dto/ClusterChangeResult.php | 35 +++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 src/Place/DuplicatePlace/Dto/ClusterChangeResult.php diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index 0a2c838eb6..f74f86ccc5 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -4,6 +4,7 @@ namespace CultuurNet\UDB3\Place\Canonical; +use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterRecord; use Doctrine\DBAL\Connection; @@ -132,4 +133,36 @@ public function addToDuplicatePlaces(string $clusterId, string $placeUuid, strin ['cluster_id' => $clusterId, ':place_uuid' => $placeUuid, 'canonical' => $canonical] ); } + + public function calculateHowManyClustersHaveChanged(): ClusterChangeResult + { + $statement = $this->connection->executeQuery('SELECT + (not_in_duplicate / total_import * 100) AS percentage_not_in_duplicate, + (not_in_import / total_duplicate * 100) AS percentage_not_in_import + FROM + (SELECT COUNT(*) AS total_import FROM duplicate_places_import) AS import_total, + (SELECT COUNT(*) AS total_duplicate FROM duplicate_places) AS duplicate_total, + (SELECT COUNT(*) AS not_in_duplicate + FROM duplicate_places_import dpi + LEFT JOIN duplicate_places dp + ON dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid + WHERE dp.cluster_id IS NULL) AS diff_import_to_duplicate, + (SELECT COUNT(*) AS not_in_import + FROM duplicate_places dp + LEFT JOIN duplicate_places_import dpi + ON dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid + WHERE dpi.cluster_id IS NULL) AS diff_duplicate_to_import;Ê + '); + + return ClusterChangeResult::fromArray($statement->fetchAssociative()); + } + + public function howManyPlacesAreToBeImported(): int + { + $statement = $this->connection->executeQuery('SELECT count(*) as total FROM duplicate_places_import'); + + $count = $statement->fetchAssociative(); + + return (int)$count['total']; + } } diff --git a/src/Place/Canonical/DuplicatePlaceRepository.php b/src/Place/Canonical/DuplicatePlaceRepository.php index efc67518cb..eebac3165d 100644 --- a/src/Place/Canonical/DuplicatePlaceRepository.php +++ b/src/Place/Canonical/DuplicatePlaceRepository.php @@ -4,6 +4,7 @@ namespace CultuurNet\UDB3\Place\Canonical; +use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterRecord; interface DuplicatePlaceRepository @@ -33,4 +34,7 @@ public function calculateNoLongerInCluster(): array; public function calculateNotYetInCluster(): array; public function addToDuplicatePlaces(string $clusterId, string $placeUuid, string $canonical=null): void; + + public function calculateHowManyClustersHaveChanged(): ClusterChangeResult; + public function howManyPlacesAreToBeImported(): int; } diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php new file mode 100644 index 0000000000..37d775add0 --- /dev/null +++ b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php @@ -0,0 +1,35 @@ +percentageNewClusters = $percentageNewClusters; + $this->percentageClustersToRemove = $percentageClustersToRemove; + } + + public function getPercentageNewClusters(): int + { + return $this->percentageNewClusters; + } + + public function getPercentageClustersToRemove(): int + { + return $this->percentageClustersToRemove; + } + + public static function fromArray(array $array): self + { + return new self( + (int)round($array['percentage_not_in_duplicate']), + (int)round($array['percentage_not_in_import']) + ); + } +} From 51b9c7a1cc86572abd38c1cba8112d84724835c5 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Mon, 2 Sep 2024 17:03:41 +0200 Subject: [PATCH 03/14] Add tests --- .../Command/ImportDuplicatePlacesTest.php | 141 ++++++++++++++++++ .../DBALDuplicatePlaceRepositoryTest.php | 84 +++++++++++ 2 files changed, 225 insertions(+) create mode 100644 tests/Console/Command/ImportDuplicatePlacesTest.php diff --git a/tests/Console/Command/ImportDuplicatePlacesTest.php b/tests/Console/Command/ImportDuplicatePlacesTest.php new file mode 100644 index 0000000000..869c921621 --- /dev/null +++ b/tests/Console/Command/ImportDuplicatePlacesTest.php @@ -0,0 +1,141 @@ +dbalDuplicatePlaceRepository = $this->createMock(DBALDuplicatePlaceRepository::class); + $this->importDuplicatePlacesProcessor = $this->createMock(ImportDuplicatePlacesProcessor::class); + $this->logger = $this->createMock(LoggerInterface::class); + $this->input = $this->createMock(InputInterface::class); + $this->output = $this->createMock(OutputInterface::class); + + $this->command = new ImportDuplicatePlaces( + $this->dbalDuplicatePlaceRepository, + $this->importDuplicatePlacesProcessor, + $this->logger + ); + } + + public function testExecuteFailsWhenImportTableIsEmpty(): void + { + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('howManyPlacesAreToBeImported') + ->willReturn(0); + + $this->logger + ->expects($this->once()) + ->method('error') + ->with('Import duplicate places failed. Duplicate_places_import table is empty.'); + + $this->output + ->expects($this->once()) + ->method('writeln') + ->with('Import duplicate places failed. Duplicate_places_import table is empty.'); + + $this->assertEquals(1, $this->command->run($this->input, $this->output)); + } + + public function testExecuteSucceedsWhenTablesAreAlreadySynced(): void + { + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('howManyPlacesAreToBeImported') + ->willReturn(10); + + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('calculateHowManyClustersHaveChanged') + ->willReturn(new ClusterChangeResult(0, 0)); + + $this->output + ->expects($this->once()) + ->method('writeln') + ->with('duplicate_places is already synced'); + + $this->assertEquals(0, $this->command->run($this->input, $this->output)); + } + + public function testExecuteConfirmsAndSyncsWhenChangesAreWithinLimits(): void + { + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('howManyPlacesAreToBeImported') + ->willReturn(10); + + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('calculateHowManyClustersHaveChanged') + ->willReturn(new ClusterChangeResult(50, 30)); + + $helper = $this->createMock(QuestionHelper::class); + $helper->expects($this->once()) + ->method('ask') + ->willReturn(true); + $this->command->setHelperSet(new HelperSet(['question' => $helper])); + + $this->importDuplicatePlacesProcessor + ->expects($this->once()) + ->method('sync'); + + $this->output + ->expects($this->once()) + ->method('writeln') + ->with('Duplicate places are synced. You probably want to run place:process-duplicates to process the clusters now.'); + + $this->assertEquals(0, $this->command->run($this->input, $this->output)); + } + + public function testExecuteFailsWhenChangesExceedLimitAndConfirmationIsDenied(): void + { + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('howManyPlacesAreToBeImported') + ->willReturn(10); + + $this->dbalDuplicatePlaceRepository + ->expects($this->once()) + ->method('calculateHowManyClustersHaveChanged') + ->willReturn(new ClusterChangeResult(80, 20)); + + $helper = $this->createMock(QuestionHelper::class); + $helper->expects($this->exactly(2)) + ->method('ask') + ->willReturnOnConsecutiveCalls(true, false); + $this->command->setHelperSet(new HelperSet(['question' => $helper])); + + $this->importDuplicatePlacesProcessor + ->expects($this->never()) + ->method('sync'); + + $this->assertEquals(0, $this->command->run($this->input, $this->output)); + } +} diff --git a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php index 8ee2d733b4..3dfa5b946d 100644 --- a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php +++ b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php @@ -5,6 +5,7 @@ namespace CultuurNet\UDB3\Place\Canonical; use CultuurNet\UDB3\DBALTestConnectionTrait; +use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterRecord; use PHPUnit\Framework\TestCase; use Ramsey\Uuid\Uuid; @@ -200,4 +201,87 @@ public function test_add_to_duplicate_places(): void $raw = $this->connection->fetchAllAssociative('select * from duplicate_places where cluster_id = 5'); $this->assertEquals([['cluster_id' => 5, 'place_uuid' => '0671f7a3-8301-43da-bf56-03c5c1e33332', 'canonical' => null]], $raw); } + + /** @dataProvider clusterChangesDataProvider */ + public function test_calculate_how_many_clusters_have_changed(array $clusters, int $percentageNotInDuplicate, int $percentageNotInImport): void + { + foreach ($clusters as [$clusterId, $placeUuid]) { + $this->getConnection()->insert( + 'duplicate_places_import', + [ + 'cluster_id' => $clusterId, + 'place_uuid' => $placeUuid, + ] + ); + } + + $result = $this->duplicatePlaceRepository->calculateHowManyClustersHaveChanged(); + + $this->assertEquals(new ClusterChangeResult($percentageNotInDuplicate, $percentageNotInImport), $result); + } + + public static function clusterChangesDataProvider(): array + { + return [ + 'everything is new' => [ + [ + + ], + 0, + 100, + ], + 'Some new, some removed' => [ + [ + ['cluster_1', '19ce6565-76be-425d-94d6-894f84dd2947'], + ['cluster_1', '1accbcfb-3b22-4762-bc13-be0f67fd3116'], + ['new', '04a549ba-6e5e-433b-9601-07b7a809758e'], + ], + 33, + 60, + ], + 'Nothing has changed' => [ + [ + ['cluster_1', '19ce6565-76be-425d-94d6-894f84dd2947'], + ['cluster_1', '1accbcfb-3b22-4762-bc13-be0f67fd3116'], + ['cluster_1', '526605d3-7cc4-4607-97a4-065896253f42'], + ['cluster_2', '4a355db3-c3f9-4acc-8093-61b333a3aefb'], + ['cluster_2', '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad'], + ], + 0, + 0, + ], + 'Everything single place has been moved' => [ + [ + ['5', '19ce6565-76be-425d-94d6-894f84dd2947'], + ['5', '1accbcfb-3b22-4762-bc13-be0f67fd3116'], + ['5', '526605d3-7cc4-4607-97a4-065896253f42'], + ['5', '4a355db3-c3f9-4acc-8093-61b333a3aefb'], + ['5', '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad'], + ], + 100, + 100, + ], + ]; + } + + public function test_how_many_places_are_to_be_imported(): void + { + $this->getConnection()->insert( + 'duplicate_places_import', + [ + 'cluster_id' => 'cluster_1', + 'place_uuid' => '19ce6565-76be-425d-94d6-894f84dd2947', + ] + ); + $this->getConnection()->insert( + 'duplicate_places_import', + [ + 'cluster_id' => 'cluster_1', + 'place_uuid' => '1accbcfb-3b22-4762-bc13-be0f67fd3116', + ] + ); + + $count = $this->duplicatePlaceRepository->howManyPlacesAreToBeImported(); + $this->assertEquals(2, $count); + } } From 24bdfe83a57a7c866c050c316b6a8bfaeef0ebd1 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Mon, 2 Sep 2024 17:10:23 +0200 Subject: [PATCH 04/14] Make sure round cna handle NULL --- src/Place/DuplicatePlace/Dto/ClusterChangeResult.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php index 37d775add0..c4e18b8bab 100644 --- a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php +++ b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php @@ -28,8 +28,8 @@ public function getPercentageClustersToRemove(): int public static function fromArray(array $array): self { return new self( - (int)round($array['percentage_not_in_duplicate']), - (int)round($array['percentage_not_in_import']) + (int)round($array['percentage_not_in_duplicate'] ?? 0), + (int)round($array['percentage_not_in_import'] ?? 0) ); } } From b011482af9e98207307808c4e03ec5097a9e2237 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Mon, 2 Sep 2024 17:15:15 +0200 Subject: [PATCH 05/14] Make sure round cna handle NULL --- src/Place/DuplicatePlace/Dto/ClusterChangeResult.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php index c4e18b8bab..559d469ca9 100644 --- a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php +++ b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php @@ -28,8 +28,8 @@ public function getPercentageClustersToRemove(): int public static function fromArray(array $array): self { return new self( - (int)round($array['percentage_not_in_duplicate'] ?? 0), - (int)round($array['percentage_not_in_import'] ?? 0) + (int)round((float)$array['percentage_not_in_duplicate']), + (int)round((float)$array['percentage_not_in_import']) ); } } From 95fe43533b492c79655a3880948d35e21e073d40 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Tue, 3 Sep 2024 13:00:55 +0200 Subject: [PATCH 06/14] Give back absolute numbers instead of percentages in script --- app/Console/Command/ImportDuplicatePlaces.php | 33 ++++--------------- .../DBALDuplicatePlaceRepository.php | 8 ++--- .../Dto/ClusterChangeResult.php | 20 +++++------ .../Command/ImportDuplicatePlacesTest.php | 30 ----------------- .../DBALDuplicatePlaceRepositoryTest.php | 10 +++--- 5 files changed, 24 insertions(+), 77 deletions(-) diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php index cfbc728553..035b58e486 100644 --- a/app/Console/Command/ImportDuplicatePlaces.php +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -16,7 +16,7 @@ class ImportDuplicatePlaces extends BaseCommand { private const FORCE = 'force'; - private const MAX_SAFE_CHANGE_LIMIT = 70; + private ImportDuplicatePlacesProcessor $importDuplicatePlacesProcessor; private DBALDuplicatePlaceRepository $dbalDuplicatePlaceRepository; private LoggerInterface $logger; @@ -58,48 +58,27 @@ protected function execute(InputInterface $input, OutputInterface $output): ?int return self::FAILURE; } - if ($clusterChangeResult->getPercentageClustersToRemove() === 0 && $clusterChangeResult->getPercentageNewClusters() === 0) { + if ($clusterChangeResult->getAmountRemovedClusters() === 0 && $clusterChangeResult->getAmountNewClusters() === 0) { $output->writeln('duplicate_places is already synced'); return self::SUCCESS; } - //@todo This Confirmation message has very clunky language, feel free to improve! if (!$this->askConfirmation( $input, $output, sprintf( - 'This action will change a total of %d%% new cluster lines, and remove %d%% clusters, continue? [y/N] ', - $clusterChangeResult->getPercentageClustersToRemove(), - $clusterChangeResult->getPercentageNewClusters() + 'This action will change a total of %d new places, and remove %d places from the duplicate places table. Do you want to continue? [y/N] ', + $clusterChangeResult->getAmountNewClusters(), + $clusterChangeResult->getAmountRemovedClusters(), ) )) { return self::SUCCESS; } - if (($clusterChangeResult->getPercentageNewClusters() > self::MAX_SAFE_CHANGE_LIMIT) - && !$this->askConfirmation( - $input, - $output, - sprintf('%d%% of all clusters are to be changed. Are you sure you want to continue? [y/N] ', $clusterChangeResult->getPercentageNewClusters()) - ) - ) { - return self::SUCCESS; - } - - if (($clusterChangeResult->getPercentageClustersToRemove() > self::MAX_SAFE_CHANGE_LIMIT) - && !$this->askConfirmation( - $input, - $output, - sprintf('%d%% of all clusters will be removed. Are you sure you want to continue? [y/N] ', $clusterChangeResult->getPercentageClustersToRemove()) - ) - ) { - return self::SUCCESS; - } - // Everything before this was just safety checks, below is the actual code that syncs duplicate places $this->importDuplicatePlacesProcessor->sync(); - $output->writeln('Duplicate places are synced. You probably want to run place:process-duplicates to process the clusters now.'); + $output->writeln('Duplicate places are synced and old clusters where removed. You probably want to run place:process-duplicates to give canonicals to the new clusters now.'); return self::SUCCESS; } diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index f74f86ccc5..ae0e93ebb2 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -137,11 +137,9 @@ public function addToDuplicatePlaces(string $clusterId, string $placeUuid, strin public function calculateHowManyClustersHaveChanged(): ClusterChangeResult { $statement = $this->connection->executeQuery('SELECT - (not_in_duplicate / total_import * 100) AS percentage_not_in_duplicate, - (not_in_import / total_duplicate * 100) AS percentage_not_in_import + not_in_duplicate, + not_in_import FROM - (SELECT COUNT(*) AS total_import FROM duplicate_places_import) AS import_total, - (SELECT COUNT(*) AS total_duplicate FROM duplicate_places) AS duplicate_total, (SELECT COUNT(*) AS not_in_duplicate FROM duplicate_places_import dpi LEFT JOIN duplicate_places dp @@ -151,7 +149,7 @@ public function calculateHowManyClustersHaveChanged(): ClusterChangeResult FROM duplicate_places dp LEFT JOIN duplicate_places_import dpi ON dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid - WHERE dpi.cluster_id IS NULL) AS diff_duplicate_to_import;Ê + WHERE dpi.cluster_id IS NULL) AS diff_duplicate_to_import; '); return ClusterChangeResult::fromArray($statement->fetchAssociative()); diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php index 559d469ca9..c8903ca3f4 100644 --- a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php +++ b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php @@ -6,30 +6,30 @@ class ClusterChangeResult { - private int $percentageNewClusters; - private int $percentageClustersToRemove; + private int $amountNewClusters; + private int $amountRemovedClusters; public function __construct(int $percentageNewClusters, int $percentageClustersToRemove) { - $this->percentageNewClusters = $percentageNewClusters; - $this->percentageClustersToRemove = $percentageClustersToRemove; + $this->amountNewClusters = $percentageNewClusters; + $this->amountRemovedClusters = $percentageClustersToRemove; } - public function getPercentageNewClusters(): int + public function getAmountNewClusters(): int { - return $this->percentageNewClusters; + return $this->amountNewClusters; } - public function getPercentageClustersToRemove(): int + public function getAmountRemovedClusters(): int { - return $this->percentageClustersToRemove; + return $this->amountRemovedClusters; } public static function fromArray(array $array): self { return new self( - (int)round((float)$array['percentage_not_in_duplicate']), - (int)round((float)$array['percentage_not_in_import']) + (int)round((float)$array['not_in_duplicate']), + (int)round((float)$array['not_in_import']) ); } } diff --git a/tests/Console/Command/ImportDuplicatePlacesTest.php b/tests/Console/Command/ImportDuplicatePlacesTest.php index 869c921621..e65c3616ad 100644 --- a/tests/Console/Command/ImportDuplicatePlacesTest.php +++ b/tests/Console/Command/ImportDuplicatePlacesTest.php @@ -106,36 +106,6 @@ public function testExecuteConfirmsAndSyncsWhenChangesAreWithinLimits(): void ->expects($this->once()) ->method('sync'); - $this->output - ->expects($this->once()) - ->method('writeln') - ->with('Duplicate places are synced. You probably want to run place:process-duplicates to process the clusters now.'); - - $this->assertEquals(0, $this->command->run($this->input, $this->output)); - } - - public function testExecuteFailsWhenChangesExceedLimitAndConfirmationIsDenied(): void - { - $this->dbalDuplicatePlaceRepository - ->expects($this->once()) - ->method('howManyPlacesAreToBeImported') - ->willReturn(10); - - $this->dbalDuplicatePlaceRepository - ->expects($this->once()) - ->method('calculateHowManyClustersHaveChanged') - ->willReturn(new ClusterChangeResult(80, 20)); - - $helper = $this->createMock(QuestionHelper::class); - $helper->expects($this->exactly(2)) - ->method('ask') - ->willReturnOnConsecutiveCalls(true, false); - $this->command->setHelperSet(new HelperSet(['question' => $helper])); - - $this->importDuplicatePlacesProcessor - ->expects($this->never()) - ->method('sync'); - $this->assertEquals(0, $this->command->run($this->input, $this->output)); } } diff --git a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php index 3dfa5b946d..b27b05ae59 100644 --- a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php +++ b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php @@ -228,7 +228,7 @@ public static function clusterChangesDataProvider(): array ], 0, - 100, + 5, ], 'Some new, some removed' => [ [ @@ -236,8 +236,8 @@ public static function clusterChangesDataProvider(): array ['cluster_1', '1accbcfb-3b22-4762-bc13-be0f67fd3116'], ['new', '04a549ba-6e5e-433b-9601-07b7a809758e'], ], - 33, - 60, + 1, + 3, ], 'Nothing has changed' => [ [ @@ -258,8 +258,8 @@ public static function clusterChangesDataProvider(): array ['5', '4a355db3-c3f9-4acc-8093-61b333a3aefb'], ['5', '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad'], ], - 100, - 100, + 5, + 5, ], ]; } From 380772f3484dc7b72dabc044224a2ec3016b8c50 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Wed, 4 Sep 2024 15:19:55 +0200 Subject: [PATCH 07/14] use QueryBuilder / cleanup --- .../DBALDuplicatePlaceRepository.php | 35 ++++++++++--------- .../Dto/ClusterChangeResult.php | 14 ++------ 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index ae0e93ebb2..f8e881c74c 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -136,23 +136,24 @@ public function addToDuplicatePlaces(string $clusterId, string $placeUuid, strin public function calculateHowManyClustersHaveChanged(): ClusterChangeResult { - $statement = $this->connection->executeQuery('SELECT - not_in_duplicate, - not_in_import - FROM - (SELECT COUNT(*) AS not_in_duplicate - FROM duplicate_places_import dpi - LEFT JOIN duplicate_places dp - ON dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid - WHERE dp.cluster_id IS NULL) AS diff_import_to_duplicate, - (SELECT COUNT(*) AS not_in_import - FROM duplicate_places dp - LEFT JOIN duplicate_places_import dpi - ON dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid - WHERE dpi.cluster_id IS NULL) AS diff_duplicate_to_import; - '); - - return ClusterChangeResult::fromArray($statement->fetchAssociative()); + // Subquery 1: COUNT from `duplicate_places_import` not present in `duplicate_places` + $qb1 = $this->connection->createQueryBuilder(); + $qb1->select('COUNT(*) AS not_in_duplicate') + ->from('duplicate_places_import', 'dpi') + ->leftJoin('dpi', 'duplicate_places', 'dp', 'dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid') + ->where('dp.cluster_id IS NULL'); + + // Subquery 2: COUNT from `duplicate_places` not present in `duplicate_places_import` + $qb2 = $this->connection->createQueryBuilder(); + $qb2->select('COUNT(*) AS not_in_import') + ->from('duplicate_places', 'dp') + ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid') + ->where('dpi.cluster_id IS NULL'); + + return new ClusterChangeResult( + $qb1->execute()->fetchOne(), + $qb2->execute()->fetchOne() + ); } public function howManyPlacesAreToBeImported(): int diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php index c8903ca3f4..4381c4ef6f 100644 --- a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php +++ b/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php @@ -9,10 +9,10 @@ class ClusterChangeResult private int $amountNewClusters; private int $amountRemovedClusters; - public function __construct(int $percentageNewClusters, int $percentageClustersToRemove) + public function __construct(int $amountNewClusters, int $amountRemovedClusters) { - $this->amountNewClusters = $percentageNewClusters; - $this->amountRemovedClusters = $percentageClustersToRemove; + $this->amountNewClusters = $amountNewClusters; + $this->amountRemovedClusters = $amountRemovedClusters; } public function getAmountNewClusters(): int @@ -24,12 +24,4 @@ public function getAmountRemovedClusters(): int { return $this->amountRemovedClusters; } - - public static function fromArray(array $array): self - { - return new self( - (int)round((float)$array['not_in_duplicate']), - (int)round((float)$array['not_in_import']) - ); - } } From aaec824f8e2ad5c79c43da3aa3153e69d384ebd9 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Wed, 4 Sep 2024 15:22:44 +0200 Subject: [PATCH 08/14] linter --- app/Console/ConsoleServiceProvider.php | 1 - src/Place/Canonical/DuplicatePlaceRepository.php | 1 - 2 files changed, 2 deletions(-) diff --git a/app/Console/ConsoleServiceProvider.php b/app/Console/ConsoleServiceProvider.php index b819123fcb..7f3a03157f 100644 --- a/app/Console/ConsoleServiceProvider.php +++ b/app/Console/ConsoleServiceProvider.php @@ -27,7 +27,6 @@ use CultuurNet\UDB3\Console\Command\GeocodeOrganizerCommand; use CultuurNet\UDB3\Console\Command\GeocodePlaceCommand; use CultuurNet\UDB3\Console\Command\ImportDuplicatePlaces; -use CultuurNet\UDB3\Console\Command\ImportMovieIdsFromCsv; use CultuurNet\UDB3\Console\Command\ImportOfferAutoClassificationLabels; use CultuurNet\UDB3\Console\Command\IncludeLabel; use CultuurNet\UDB3\Console\Command\KeycloakCommand; diff --git a/src/Place/Canonical/DuplicatePlaceRepository.php b/src/Place/Canonical/DuplicatePlaceRepository.php index 7cbc722d9b..5e34059d04 100644 --- a/src/Place/Canonical/DuplicatePlaceRepository.php +++ b/src/Place/Canonical/DuplicatePlaceRepository.php @@ -5,7 +5,6 @@ namespace CultuurNet\UDB3\Place\Canonical; use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; -use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterRecord; interface DuplicatePlaceRepository { From 154ac298de11439dd359be8cb9c042604e993e47 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Wed, 4 Sep 2024 15:29:56 +0200 Subject: [PATCH 09/14] Make calculateHowManyClustersHaveChanged work with QueryBuilder --- .../ClustersDiffResult.php} | 4 ++-- src/Place/Canonical/DBALDuplicatePlaceRepository.php | 9 ++++----- src/Place/Canonical/DuplicatePlaceRepository.php | 5 ++--- .../DuplicatePlace/ImportDuplicatePlacesProcessor.php | 0 tests/Console/Command/ImportDuplicatePlacesTest.php | 8 ++++---- .../Place/Canonical/DBALDuplicatePlaceRepositoryTest.php | 3 +-- 6 files changed, 13 insertions(+), 16 deletions(-) rename src/Place/{DuplicatePlace/Dto/ClusterChangeResult.php => Canonical/ClustersDiffResult.php} (87%) delete mode 100644 src/Place/DuplicatePlace/ImportDuplicatePlacesProcessor.php diff --git a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php b/src/Place/Canonical/ClustersDiffResult.php similarity index 87% rename from src/Place/DuplicatePlace/Dto/ClusterChangeResult.php rename to src/Place/Canonical/ClustersDiffResult.php index 4381c4ef6f..713c629641 100644 --- a/src/Place/DuplicatePlace/Dto/ClusterChangeResult.php +++ b/src/Place/Canonical/ClustersDiffResult.php @@ -2,9 +2,9 @@ declare(strict_types=1); -namespace CultuurNet\UDB3\Place\DuplicatePlace\Dto; +namespace CultuurNet\UDB3\Place\Canonical; -class ClusterChangeResult +class ClustersDiffResult { private int $amountNewClusters; private int $amountRemovedClusters; diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index cf6c4b361d..f6b43a1abe 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -4,7 +4,6 @@ namespace CultuurNet\UDB3\Place\Canonical; -use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; use Doctrine\DBAL\Connection; class DBALDuplicatePlaceRepository implements DuplicatePlaceRepository @@ -147,7 +146,7 @@ public function addToDuplicatePlaces(ClusterRecordRow $clusterRecordRow): void ->execute(); } - public function calculateHowManyClustersHaveChanged(): ClusterChangeResult + public function calculateHowManyClustersHaveChanged(): ClustersDiffResult { // Subquery 1: COUNT from `duplicate_places_import` not present in `duplicate_places` $qb1 = $this->connection->createQueryBuilder(); @@ -163,9 +162,9 @@ public function calculateHowManyClustersHaveChanged(): ClusterChangeResult ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid') ->where('dpi.cluster_id IS NULL'); - return new ClusterChangeResult( - $qb1->execute()->fetchOne(), - $qb2->execute()->fetchOne() + return new ClustersDiffResult( + (int)round((float)$qb1->execute()->fetchOne()), + (int)round((float)$qb2->execute()->fetchOne()), ); } diff --git a/src/Place/Canonical/DuplicatePlaceRepository.php b/src/Place/Canonical/DuplicatePlaceRepository.php index 5e34059d04..298fd1836f 100644 --- a/src/Place/Canonical/DuplicatePlaceRepository.php +++ b/src/Place/Canonical/DuplicatePlaceRepository.php @@ -4,8 +4,6 @@ namespace CultuurNet\UDB3\Place\Canonical; -use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; - interface DuplicatePlaceRepository { /** @@ -28,10 +26,11 @@ public function getPlacesNoLongerInCluster(): array; /** @return ClusterRecordRow[] */ public function getClustersToImport(): array; + public function getClustersToBeRemoved(): array; public function addToDuplicatePlaces(ClusterRecordRow $clusterRecordRow): void; public function deleteCluster(string $clusterId): void; - public function calculateHowManyClustersHaveChanged(): ClusterChangeResult; + public function calculateHowManyClustersHaveChanged(): ClustersDiffResult; public function howManyPlacesAreToBeImported(): int; } diff --git a/src/Place/DuplicatePlace/ImportDuplicatePlacesProcessor.php b/src/Place/DuplicatePlace/ImportDuplicatePlacesProcessor.php deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/Console/Command/ImportDuplicatePlacesTest.php b/tests/Console/Command/ImportDuplicatePlacesTest.php index e65c3616ad..6d7657be91 100644 --- a/tests/Console/Command/ImportDuplicatePlacesTest.php +++ b/tests/Console/Command/ImportDuplicatePlacesTest.php @@ -4,9 +4,9 @@ namespace CultuurNet\UDB3\Console\Command; +use CultuurNet\UDB3\Place\Canonical\ClustersDiffResult; use CultuurNet\UDB3\Place\Canonical\DBALDuplicatePlaceRepository; -use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; -use CultuurNet\UDB3\Place\DuplicatePlace\ImportDuplicatePlacesProcessor; +use CultuurNet\UDB3\Place\Canonical\ImportDuplicatePlacesProcessor; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; use Psr\Log\LoggerInterface; @@ -74,7 +74,7 @@ public function testExecuteSucceedsWhenTablesAreAlreadySynced(): void $this->dbalDuplicatePlaceRepository ->expects($this->once()) ->method('calculateHowManyClustersHaveChanged') - ->willReturn(new ClusterChangeResult(0, 0)); + ->willReturn(new ClustersDiffResult(0, 0)); $this->output ->expects($this->once()) @@ -94,7 +94,7 @@ public function testExecuteConfirmsAndSyncsWhenChangesAreWithinLimits(): void $this->dbalDuplicatePlaceRepository ->expects($this->once()) ->method('calculateHowManyClustersHaveChanged') - ->willReturn(new ClusterChangeResult(50, 30)); + ->willReturn(new ClustersDiffResult(50, 30)); $helper = $this->createMock(QuestionHelper::class); $helper->expects($this->once()) diff --git a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php index fcb6835b1b..e6523184bb 100644 --- a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php +++ b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php @@ -5,7 +5,6 @@ namespace CultuurNet\UDB3\Place\Canonical; use CultuurNet\UDB3\DBALTestConnectionTrait; -use CultuurNet\UDB3\Place\DuplicatePlace\Dto\ClusterChangeResult; use PHPUnit\Framework\TestCase; class DBALDuplicatePlaceRepositoryTest extends TestCase @@ -268,7 +267,7 @@ public function test_calculate_how_many_clusters_have_changed(array $clusters, i $result = $this->duplicatePlaceRepository->calculateHowManyClustersHaveChanged(); - $this->assertEquals(new ClusterChangeResult($percentageNotInDuplicate, $percentageNotInImport), $result); + $this->assertEquals(new ClustersDiffResult($percentageNotInDuplicate, $percentageNotInImport), $result); } public static function clusterChangesDataProvider(): array From 36dd62ad6f84e1e16fe675ebf85557d5d1c44209 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Wed, 4 Sep 2024 15:58:03 +0200 Subject: [PATCH 10/14] removed ClustersDiffResult object, and split up calculate method in 2 different methods --- app/Console/Command/ImportDuplicatePlaces.php | 26 ++++--------- app/Console/ConsoleServiceProvider.php | 8 ++-- src/Place/Canonical/ClustersDiffResult.php | 27 ------------- .../DBALDuplicatePlaceRepository.php | 37 +++++++++--------- .../Canonical/DuplicatePlaceRepository.php | 5 ++- .../Command/ImportDuplicatePlacesTest.php | 38 ++++--------------- .../DBALDuplicatePlaceRepositoryTest.php | 32 +++++++++++++--- 7 files changed, 67 insertions(+), 106 deletions(-) delete mode 100644 src/Place/Canonical/ClustersDiffResult.php diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php index 035b58e486..2f7c901304 100644 --- a/app/Console/Command/ImportDuplicatePlaces.php +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -5,7 +5,7 @@ namespace CultuurNet\UDB3\Console\Command; use CultuurNet\UDB3\Place\Canonical\DBALDuplicatePlaceRepository; -use CultuurNet\UDB3\Place\DuplicatePlace\ImportDuplicatePlacesProcessor; +use CultuurNet\UDB3\Place\Canonical\ImportDuplicatePlacesProcessor; use Psr\Log\LoggerInterface; use Symfony\Component\Console\Command\Command as BaseCommand; use Symfony\Component\Console\Input\InputInterface; @@ -19,18 +19,15 @@ class ImportDuplicatePlaces extends BaseCommand private ImportDuplicatePlacesProcessor $importDuplicatePlacesProcessor; private DBALDuplicatePlaceRepository $dbalDuplicatePlaceRepository; - private LoggerInterface $logger; public function __construct( DBALDuplicatePlaceRepository $dbalDuplicatePlaceRepository, - ImportDuplicatePlacesProcessor $importDuplicatePlacesProcessor, - LoggerInterface $logger + ImportDuplicatePlacesProcessor $importDuplicatePlacesProcessor ) { parent::__construct(); $this->importDuplicatePlacesProcessor = $importDuplicatePlacesProcessor; $this->dbalDuplicatePlaceRepository = $dbalDuplicatePlaceRepository; - $this->logger = $logger; } public function configure(): void @@ -48,17 +45,10 @@ public function configure(): void protected function execute(InputInterface $input, OutputInterface $output): ?int { - $placesToImport = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeImported(); - $clusterChangeResult = $this->dbalDuplicatePlaceRepository->calculateHowManyClustersHaveChanged(); - - if ($placesToImport === 0) { - $msg = 'Import duplicate places failed. Duplicate_places_import table is empty.'; - $this->logger->error($msg); - $output->writeln(sprintf('%s', $msg)); - return self::FAILURE; - } + $howManyPlacesAreToBeImported = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeImported(); + $howManyPlacesAreToBeDeleted = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeDeleted(); - if ($clusterChangeResult->getAmountRemovedClusters() === 0 && $clusterChangeResult->getAmountNewClusters() === 0) { + if ($howManyPlacesAreToBeImported === 0 && $howManyPlacesAreToBeDeleted === 0) { $output->writeln('duplicate_places is already synced'); return self::SUCCESS; } @@ -67,9 +57,9 @@ protected function execute(InputInterface $input, OutputInterface $output): ?int $input, $output, sprintf( - 'This action will change a total of %d new places, and remove %d places from the duplicate places table. Do you want to continue? [y/N] ', - $clusterChangeResult->getAmountNewClusters(), - $clusterChangeResult->getAmountRemovedClusters(), + 'This action will sync a total of %d new places, and remove %d places from the duplicate places table. Do you want to continue? [y/N] ', + $howManyPlacesAreToBeImported, + $howManyPlacesAreToBeDeleted, ) )) { return self::SUCCESS; diff --git a/app/Console/ConsoleServiceProvider.php b/app/Console/ConsoleServiceProvider.php index 7f3a03157f..8f1b7fa31e 100644 --- a/app/Console/ConsoleServiceProvider.php +++ b/app/Console/ConsoleServiceProvider.php @@ -60,7 +60,7 @@ use CultuurNet\UDB3\Kinepolis\Trailer\YoutubeTrailerRepository; use CultuurNet\UDB3\Offer\OfferType; use CultuurNet\UDB3\Organizer\WebsiteNormalizer; -use CultuurNet\UDB3\Place\DuplicatePlace\ImportDuplicatePlacesProcessor; +use CultuurNet\UDB3\Place\Canonical\ImportDuplicatePlacesProcessor; use CultuurNet\UDB3\Place\Canonical\DuplicatePlaceRemovedFromClusterRepository; use CultuurNet\UDB3\Search\EventsSapi3SearchService; use CultuurNet\UDB3\Search\OrganizersSapi3SearchService; @@ -268,8 +268,10 @@ function () use ($container) { 'console.place:duplicate-places:import', fn () => new ImportDuplicatePlaces( $container->get('duplicate_place_repository'), - new ImportDuplicatePlacesProcessor($container->get('duplicate_place_repository')), - LoggerFactory::create($container, LoggerName::forCli()) + new ImportDuplicatePlacesProcessor( + $container->get('duplicate_place_repository'), + $container->get(DuplicatePlaceRemovedFromClusterRepository::class) + ) ) ); diff --git a/src/Place/Canonical/ClustersDiffResult.php b/src/Place/Canonical/ClustersDiffResult.php deleted file mode 100644 index 713c629641..0000000000 --- a/src/Place/Canonical/ClustersDiffResult.php +++ /dev/null @@ -1,27 +0,0 @@ -amountNewClusters = $amountNewClusters; - $this->amountRemovedClusters = $amountRemovedClusters; - } - - public function getAmountNewClusters(): int - { - return $this->amountNewClusters; - } - - public function getAmountRemovedClusters(): int - { - return $this->amountRemovedClusters; - } -} diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index f6b43a1abe..69135bc6fa 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -146,34 +146,31 @@ public function addToDuplicatePlaces(ClusterRecordRow $clusterRecordRow): void ->execute(); } - public function calculateHowManyClustersHaveChanged(): ClustersDiffResult + public function howManyPlacesAreToBeImported(): int { - // Subquery 1: COUNT from `duplicate_places_import` not present in `duplicate_places` - $qb1 = $this->connection->createQueryBuilder(); - $qb1->select('COUNT(*) AS not_in_duplicate') + // COUNT from `duplicate_places_import` not present in `duplicate_places` + $result = $this->connection->createQueryBuilder() + ->select('COUNT(*) AS not_in_duplicate') ->from('duplicate_places_import', 'dpi') ->leftJoin('dpi', 'duplicate_places', 'dp', 'dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid') - ->where('dp.cluster_id IS NULL'); + ->where('dp.cluster_id IS NULL') + ->execute(); - // Subquery 2: COUNT from `duplicate_places` not present in `duplicate_places_import` - $qb2 = $this->connection->createQueryBuilder(); - $qb2->select('COUNT(*) AS not_in_import') - ->from('duplicate_places', 'dp') - ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid') - ->where('dpi.cluster_id IS NULL'); + $a = $result->fetchOne(); - return new ClustersDiffResult( - (int)round((float)$qb1->execute()->fetchOne()), - (int)round((float)$qb2->execute()->fetchOne()), - ); + return (int)($a ?? 0); } - public function howManyPlacesAreToBeImported(): int + public function howManyPlacesAreToBeDeleted(): int { - $statement = $this->connection->executeQuery('SELECT count(*) as total FROM duplicate_places_import'); - - $count = $statement->fetchAssociative(); + // COUNT from `duplicate_places` not present in `duplicate_places_import` + $result = $this->connection->createQueryBuilder() + ->select('COUNT(*) AS not_in_import') + ->from('duplicate_places', 'dp') + ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid') + ->where('dpi.cluster_id IS NULL') + ->execute(); - return (int)$count['total']; + return (int)($result->fetchOne() ?? 0); } } diff --git a/src/Place/Canonical/DuplicatePlaceRepository.php b/src/Place/Canonical/DuplicatePlaceRepository.php index 298fd1836f..c24171d29f 100644 --- a/src/Place/Canonical/DuplicatePlaceRepository.php +++ b/src/Place/Canonical/DuplicatePlaceRepository.php @@ -26,11 +26,14 @@ public function getPlacesNoLongerInCluster(): array; /** @return ClusterRecordRow[] */ public function getClustersToImport(): array; + public function getClustersToBeRemoved(): array; public function addToDuplicatePlaces(ClusterRecordRow $clusterRecordRow): void; + public function deleteCluster(string $clusterId): void; - public function calculateHowManyClustersHaveChanged(): ClustersDiffResult; public function howManyPlacesAreToBeImported(): int; + + public function howManyPlacesAreToBeDeleted(): int; } diff --git a/tests/Console/Command/ImportDuplicatePlacesTest.php b/tests/Console/Command/ImportDuplicatePlacesTest.php index 6d7657be91..371f4458ff 100644 --- a/tests/Console/Command/ImportDuplicatePlacesTest.php +++ b/tests/Console/Command/ImportDuplicatePlacesTest.php @@ -21,8 +21,6 @@ class ImportDuplicatePlacesTest extends TestCase private $dbalDuplicatePlaceRepository; /** @var ImportDuplicatePlacesProcessor|MockObject */ private $importDuplicatePlacesProcessor; - /** @var LoggerInterface|MockObject */ - private $logger; /** @var InputInterface|MockObject */ private $input; /** @var OutputInterface|MockObject */ @@ -33,48 +31,26 @@ protected function setUp(): void { $this->dbalDuplicatePlaceRepository = $this->createMock(DBALDuplicatePlaceRepository::class); $this->importDuplicatePlacesProcessor = $this->createMock(ImportDuplicatePlacesProcessor::class); - $this->logger = $this->createMock(LoggerInterface::class); $this->input = $this->createMock(InputInterface::class); $this->output = $this->createMock(OutputInterface::class); $this->command = new ImportDuplicatePlaces( $this->dbalDuplicatePlaceRepository, - $this->importDuplicatePlacesProcessor, - $this->logger + $this->importDuplicatePlacesProcessor ); } - public function testExecuteFailsWhenImportTableIsEmpty(): void - { - $this->dbalDuplicatePlaceRepository - ->expects($this->once()) - ->method('howManyPlacesAreToBeImported') - ->willReturn(0); - - $this->logger - ->expects($this->once()) - ->method('error') - ->with('Import duplicate places failed. Duplicate_places_import table is empty.'); - - $this->output - ->expects($this->once()) - ->method('writeln') - ->with('Import duplicate places failed. Duplicate_places_import table is empty.'); - - $this->assertEquals(1, $this->command->run($this->input, $this->output)); - } - public function testExecuteSucceedsWhenTablesAreAlreadySynced(): void { $this->dbalDuplicatePlaceRepository ->expects($this->once()) ->method('howManyPlacesAreToBeImported') - ->willReturn(10); + ->willReturn(0); $this->dbalDuplicatePlaceRepository ->expects($this->once()) - ->method('calculateHowManyClustersHaveChanged') - ->willReturn(new ClustersDiffResult(0, 0)); + ->method('howManyPlacesAreToBeDeleted') + ->willReturn(0); $this->output ->expects($this->once()) @@ -89,12 +65,12 @@ public function testExecuteConfirmsAndSyncsWhenChangesAreWithinLimits(): void $this->dbalDuplicatePlaceRepository ->expects($this->once()) ->method('howManyPlacesAreToBeImported') - ->willReturn(10); + ->willReturn(50); $this->dbalDuplicatePlaceRepository ->expects($this->once()) - ->method('calculateHowManyClustersHaveChanged') - ->willReturn(new ClustersDiffResult(50, 30)); + ->method('howManyPlacesAreToBeDeleted') + ->willReturn(30); $helper = $this->createMock(QuestionHelper::class); $helper->expects($this->once()) diff --git a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php index e6523184bb..459346fbd4 100644 --- a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php +++ b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php @@ -253,7 +253,7 @@ public function test_add_to_duplicate_places(): void } /** @dataProvider clusterChangesDataProvider */ - public function test_calculate_how_many_clusters_have_changed(array $clusters, int $percentageNotInDuplicate, int $percentageNotInImport): void + public function test_calculate_how_many_clusters_have_changed(array $clusters, int $expectedPlacesTobeImported, int $expectedPlacesTobeDeleted): void { foreach ($clusters as [$clusterId, $placeUuid]) { $this->getConnection()->insert( @@ -265,9 +265,8 @@ public function test_calculate_how_many_clusters_have_changed(array $clusters, i ); } - $result = $this->duplicatePlaceRepository->calculateHowManyClustersHaveChanged(); - - $this->assertEquals(new ClustersDiffResult($percentageNotInDuplicate, $percentageNotInImport), $result); + $this->assertEquals($expectedPlacesTobeImported, $this->duplicatePlaceRepository->howManyPlacesAreToBeImported()); + $this->assertEquals($expectedPlacesTobeDeleted, $this->duplicatePlaceRepository->howManyPlacesAreToBeDeleted()); } public static function clusterChangesDataProvider(): array @@ -319,14 +318,14 @@ public function test_how_many_places_are_to_be_imported(): void $this->getConnection()->insert( 'duplicate_places_import', [ - 'cluster_id' => 'cluster_1', + 'cluster_id' => 'my_brand_new_cluster', 'place_uuid' => '19ce6565-76be-425d-94d6-894f84dd2947', ] ); $this->getConnection()->insert( 'duplicate_places_import', [ - 'cluster_id' => 'cluster_1', + 'cluster_id' => 'my_brand_new_cluster', 'place_uuid' => '1accbcfb-3b22-4762-bc13-be0f67fd3116', ] ); @@ -334,4 +333,25 @@ public function test_how_many_places_are_to_be_imported(): void $count = $this->duplicatePlaceRepository->howManyPlacesAreToBeImported(); $this->assertEquals(2, $count); } + + public function test_how_many_places_are_to_be_deleted(): void + { + $this->getConnection()->insert( + 'duplicate_places_import', + [ + 'cluster_id' => 'cluster_2', + 'place_uuid' => '4a355db3-c3f9-4acc-8093-61b333a3aefb', + ] + ); + $this->getConnection()->insert( + 'duplicate_places_import', + [ + 'cluster_id' => 'cluster_2', + 'place_uuid' => '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad', + ] + ); + + $count = $this->duplicatePlaceRepository->howManyPlacesAreToBeDeleted(); + $this->assertEquals(3, $count); + } } From e0d937789083d51bc966d4fd92ac27fdac10d3d2 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Thu, 5 Sep 2024 14:04:34 +0200 Subject: [PATCH 11/14] Update app/Console/Command/ImportDuplicatePlaces.php Co-authored-by: Luc Wollants --- app/Console/Command/ImportDuplicatePlaces.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php index a3f71eb6a5..ee3d1c41c7 100644 --- a/app/Console/Command/ImportDuplicatePlaces.php +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -67,7 +67,7 @@ protected function execute(InputInterface $input, OutputInterface $output): ?int // Everything before this was just safety checks, below is the actual code that syncs duplicate places $this->importDuplicatePlacesProcessor->sync(); - $output->writeln('Duplicate places are synced and old clusters where removed. You probably want to run place:process-duplicates to give canonicals to the new clusters now.'); + $output->writeln('Duplicate places were synced and old clusters were removed. You probably want to run place:process-duplicates to give canonicals to the new clusters now.'); return self::SUCCESS; } From b4c9c66c2266010623e463f28a53bfe6853bf214 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Thu, 5 Sep 2024 14:04:52 +0200 Subject: [PATCH 12/14] Update app/Console/Command/ImportDuplicatePlaces.php Co-authored-by: Simon de Bruijn <66943525+simon-debruijn@users.noreply.github.com> --- app/Console/Command/ImportDuplicatePlaces.php | 1 - 1 file changed, 1 deletion(-) diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php index ee3d1c41c7..418362952f 100644 --- a/app/Console/Command/ImportDuplicatePlaces.php +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -64,7 +64,6 @@ protected function execute(InputInterface $input, OutputInterface $output): ?int return self::SUCCESS; } - // Everything before this was just safety checks, below is the actual code that syncs duplicate places $this->importDuplicatePlacesProcessor->sync(); $output->writeln('Duplicate places were synced and old clusters were removed. You probably want to run place:process-duplicates to give canonicals to the new clusters now.'); From 6d3746e4f1cb49a0fad1a7a9159e1ea23658c259 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Thu, 5 Sep 2024 16:30:41 +0200 Subject: [PATCH 13/14] Reace howManyPlacesAreToBeDeleted with getPlacesNoLongerInCluster --- .../DBALDuplicatePlaceRepository.php | 41 ++++++------------- .../Canonical/DuplicatePlaceRepository.php | 2 - .../Command/ImportDuplicatePlacesTest.php | 9 ++-- .../DBALDuplicatePlaceRepositoryTest.php | 25 +---------- 4 files changed, 20 insertions(+), 57 deletions(-) diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index 446eab0328..df35bbf02b 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -124,6 +124,19 @@ public function getPlacesWithCluster(): array }, $statement->fetchAllAssociative()); } + public function howManyPlacesAreToBeImported(): int + { + // COUNT from `duplicate_places_import` not present in `duplicate_places` + $result = $this->connection->createQueryBuilder() + ->select('COUNT(*) AS not_in_duplicate') + ->from('duplicate_places_import', 'dpi') + ->leftJoin('dpi', 'duplicate_places', 'dp', 'dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid') + ->where('dp.cluster_id IS NULL') + ->execute(); + + return (int)($result->fetchOne() ?? 0); + } + public function deleteCluster(string $clusterId): void { $this->connection->createQueryBuilder() @@ -147,32 +160,4 @@ public function addToDuplicatePlaces(PlaceWithCluster $clusterRecordRow): void ]) ->execute(); } - - public function howManyPlacesAreToBeImported(): int - { - // COUNT from `duplicate_places_import` not present in `duplicate_places` - $result = $this->connection->createQueryBuilder() - ->select('COUNT(*) AS not_in_duplicate') - ->from('duplicate_places_import', 'dpi') - ->leftJoin('dpi', 'duplicate_places', 'dp', 'dpi.cluster_id = dp.cluster_id AND dpi.place_uuid = dp.place_uuid') - ->where('dp.cluster_id IS NULL') - ->execute(); - - $a = $result->fetchOne(); - - return (int)($a ?? 0); - } - - public function howManyPlacesAreToBeDeleted(): int - { - // COUNT from `duplicate_places` not present in `duplicate_places_import` - $result = $this->connection->createQueryBuilder() - ->select('COUNT(*) AS not_in_import') - ->from('duplicate_places', 'dp') - ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.cluster_id = dpi.cluster_id AND dp.place_uuid = dpi.place_uuid') - ->where('dpi.cluster_id IS NULL') - ->execute(); - - return (int)($result->fetchOne() ?? 0); - } } diff --git a/src/Place/Canonical/DuplicatePlaceRepository.php b/src/Place/Canonical/DuplicatePlaceRepository.php index 324125f025..c03427464b 100644 --- a/src/Place/Canonical/DuplicatePlaceRepository.php +++ b/src/Place/Canonical/DuplicatePlaceRepository.php @@ -33,6 +33,4 @@ public function addToDuplicatePlaces(PlaceWithCluster $clusterRecordRow): void; public function deleteCluster(string $clusterId): void; public function howManyPlacesAreToBeImported(): int; - - public function howManyPlacesAreToBeDeleted(): int; } diff --git a/tests/Console/Command/ImportDuplicatePlacesTest.php b/tests/Console/Command/ImportDuplicatePlacesTest.php index 47e0a4b751..05bb6ec701 100644 --- a/tests/Console/Command/ImportDuplicatePlacesTest.php +++ b/tests/Console/Command/ImportDuplicatePlacesTest.php @@ -8,6 +8,7 @@ use CultuurNet\UDB3\Place\Canonical\ImportDuplicatePlacesProcessor; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; +use Ramsey\Uuid\Uuid; use Symfony\Component\Console\Helper\HelperSet; use Symfony\Component\Console\Helper\QuestionHelper; use Symfony\Component\Console\Input\InputInterface; @@ -47,8 +48,8 @@ public function testExecuteSucceedsWhenTablesAreAlreadySynced(): void $this->dbalDuplicatePlaceRepository ->expects($this->once()) - ->method('howManyPlacesAreToBeDeleted') - ->willReturn(0); + ->method('getPlacesNoLongerInCluster') + ->willReturn([]); $this->output ->expects($this->once()) @@ -67,8 +68,8 @@ public function testExecuteConfirmsAndSyncsWhenChangesAreWithinLimits(): void $this->dbalDuplicatePlaceRepository ->expects($this->once()) - ->method('howManyPlacesAreToBeDeleted') - ->willReturn(30); + ->method('getPlacesNoLongerInCluster') + ->willReturn([Uuid::uuid4()]); $helper = $this->createMock(QuestionHelper::class); $helper->expects($this->once()) diff --git a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php index 5b7ff392d3..af0f1c8b8e 100644 --- a/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php +++ b/tests/Place/Canonical/DBALDuplicatePlaceRepositoryTest.php @@ -266,7 +266,7 @@ public function test_calculate_how_many_clusters_have_changed(array $clusters, i } $this->assertEquals($expectedPlacesTobeImported, $this->duplicatePlaceRepository->howManyPlacesAreToBeImported()); - $this->assertEquals($expectedPlacesTobeDeleted, $this->duplicatePlaceRepository->howManyPlacesAreToBeDeleted()); + $this->assertCount($expectedPlacesTobeDeleted, $this->duplicatePlaceRepository->getPlacesNoLongerInCluster()); } public static function clusterChangesDataProvider(): array @@ -308,7 +308,7 @@ public static function clusterChangesDataProvider(): array ['5', '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad'], ], 5, - 5, + 0, ], ]; } @@ -333,25 +333,4 @@ public function test_how_many_places_are_to_be_imported(): void $count = $this->duplicatePlaceRepository->howManyPlacesAreToBeImported(); $this->assertEquals(2, $count); } - - public function test_how_many_places_are_to_be_deleted(): void - { - $this->getConnection()->insert( - 'duplicate_places_import', - [ - 'cluster_id' => 'cluster_2', - 'place_uuid' => '4a355db3-c3f9-4acc-8093-61b333a3aefb', - ] - ); - $this->getConnection()->insert( - 'duplicate_places_import', - [ - 'cluster_id' => 'cluster_2', - 'place_uuid' => '64901efc-6bd7-4e9d-8916-fcdeb5b1c8ad', - ] - ); - - $count = $this->duplicatePlaceRepository->howManyPlacesAreToBeDeleted(); - $this->assertEquals(3, $count); - } } From 0ea4de6bb768bc3ce9f55a6ba13464fc15236882 Mon Sep 17 00:00:00 2001 From: Koen Eelen Date: Thu, 5 Sep 2024 16:37:06 +0200 Subject: [PATCH 14/14] remove howManyPlacesAreToBeDeleted --- app/Console/Command/ImportDuplicatePlaces.php | 2 +- .../DBALDuplicatePlaceRepository.php | 24 ++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/app/Console/Command/ImportDuplicatePlaces.php b/app/Console/Command/ImportDuplicatePlaces.php index 418362952f..82cc30439c 100644 --- a/app/Console/Command/ImportDuplicatePlaces.php +++ b/app/Console/Command/ImportDuplicatePlaces.php @@ -45,7 +45,7 @@ public function configure(): void protected function execute(InputInterface $input, OutputInterface $output): ?int { $howManyPlacesAreToBeImported = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeImported(); - $howManyPlacesAreToBeDeleted = $this->dbalDuplicatePlaceRepository->howManyPlacesAreToBeDeleted(); + $howManyPlacesAreToBeDeleted = count($this->dbalDuplicatePlaceRepository->getPlacesNoLongerInCluster()); if ($howManyPlacesAreToBeImported === 0 && $howManyPlacesAreToBeDeleted === 0) { $output->writeln('duplicate_places is already synced'); diff --git a/src/Place/Canonical/DBALDuplicatePlaceRepository.php b/src/Place/Canonical/DBALDuplicatePlaceRepository.php index df35bbf02b..a9180538c4 100644 --- a/src/Place/Canonical/DBALDuplicatePlaceRepository.php +++ b/src/Place/Canonical/DBALDuplicatePlaceRepository.php @@ -80,18 +80,7 @@ public function getDuplicatesOfPlace(string $placeId): ?array return count($duplicates) > 0 ? $duplicates : null; } - public function getPlacesNoLongerInCluster(): array - { - // All places that do not exist in duplicate_places_import - $statement = $this->connection->createQueryBuilder() - ->select('DISTINCT dp.place_uuid') - ->from('duplicate_places', 'dp') - ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.place_uuid = dpi.place_uuid') - ->where('dpi.place_uuid IS NULL') - ->execute(); - return $statement->fetchFirstColumn(); - } public function getClustersToBeRemoved(): array { @@ -137,6 +126,19 @@ public function howManyPlacesAreToBeImported(): int return (int)($result->fetchOne() ?? 0); } + public function getPlacesNoLongerInCluster(): array + { + // All places that do not exist in duplicate_places_import + $statement = $this->connection->createQueryBuilder() + ->select('DISTINCT dp.place_uuid') + ->from('duplicate_places', 'dp') + ->leftJoin('dp', 'duplicate_places_import', 'dpi', 'dp.place_uuid = dpi.place_uuid') + ->where('dpi.place_uuid IS NULL') + ->execute(); + + return $statement->fetchFirstColumn(); + } + public function deleteCluster(string $clusterId): void { $this->connection->createQueryBuilder()