Skip to content

Commit

Permalink
Merge pull request #548 from matomo-org/PG-3704-continuous-import
Browse files Browse the repository at this point in the history
Fix to keep resume import from import_start_time and continue import pending dates, #PG-3704
  • Loading branch information
AltamashShaikh authored Sep 2, 2024
2 parents c721717 + a43cb15 commit de5d1ef
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
21 changes: 18 additions & 3 deletions Commands/ImportGA4Reports.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,21 @@ protected function executeImpl() : int
$output->writeln(LogToSingleFileProcessor::$cliOutputPrefix . "Importing the following date ranges in order: " . $dateRangesText);
// NOTE: date ranges to reimport are handled first, then we go back to the main import (which could be
// continuous)

// If no date ranges to re-import and import reached till startDate, check if futureDates need to be imported or not and update the start and EdnDate
if (count($dateRangesToImport) == 1 && !empty($status['last_day_archived']) && $status['last_day_archived'] === $status['import_range_start'] && $dates[1]->isLater(Date::factory('yesterday')) && !empty($status['import_start_time'])) {
$dateRangesToImport[0][0] = Date::factory($status['import_start_time'])->subDay(1);
$status['do_not_import_latest_dates_first'] = true;
$status['main_import_progress'] = $dateRangesToImport[0][0]->toString();
$importStatus->saveStatus($status);
}

foreach (array_values($dateRangesToImport) as $index => $datesToImport) {
$status = $importStatus->getImportStatus($idSite);
$isDoNotImportLatestDatesFirst = !empty($status['do_not_import_latest_dates_first']);
if ($isDoNotImportLatestDatesFirst) {
$status['last_date_imported'] = null;
}
// can change in the meantime, so we refetch
if (!is_array($datesToImport) || count($datesToImport) != 2) {
$output->writeln(LogToSingleFileProcessor::$cliOutputPrefix . "Found broken entry in date ranges to import (entry #{$index}) with improper type, skipping.");
Expand All @@ -218,7 +231,9 @@ protected function executeImpl() : int
if (!empty($lastDateImported) && $isFutureDateImport) {
$startDate = Date::factory($status['future_resume_date']);
} else {
if (!empty($lastDateImported) && Date::factory($lastDateImported)->subDay(1)->isEarlier($endDate)) {
if ($isDoNotImportLatestDatesFirst) {
$startDate = Date::factory($lastDateImported)->addDay(1);
} elseif (!empty($lastDateImported) && Date::factory($lastDateImported)->subDay(1)->isEarlier($endDate)) {
$endDate = Date::factory($lastDateImported)->subDay(1);
}
}
Expand All @@ -227,15 +242,15 @@ protected function executeImpl() : int
$importStatus->removeReImportEntry($idSite, $datesToImport);
continue;
}
if ($endDate->isEarlier($startDate)) {
if ($endDate->isEarlier($startDate) && !$isDoNotImportLatestDatesFirst) {
$output->writeln(LogToSingleFileProcessor::$cliOutputPrefix . "(Entry #{$index}) is finished, moving on.");
$importStatus->removeReImportEntry($idSite, $datesToImport);
continue;
}
$output->writeln(LogToSingleFileProcessor::$cliOutputPrefix . "Importing reports for date range {$startDate} - {$endDate} from GA property {$property}.");
try {
$importer->setIsMainImport($isMainImport);
$aborted = $importer->import($idSite, $property, $startDate, $endDate, $lock, '', $streamIds);
$aborted = $importer->import($idSite, $property, $startDate, $endDate, $lock, '', $streamIds, $isDoNotImportLatestDatesFirst);
if ($aborted == -1) {
$shouldFinishImportIfNothingLeft = \false;
}
Expand Down
4 changes: 2 additions & 2 deletions ImportStatus.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ public function getImportedDateRange($idSite)
}
return $dates;
}
public function dayImportFinished($idSite, Date $date, $isMainImport = \true)
public function dayImportFinished($idSite, Date $date, $isMainImport = \true, $skipRecentDateImportFirst = false)
{
$status = $this->getImportStatus($idSite);
$status['status'] = self::STATUS_ONGOING;
if (empty($status['last_date_imported']) || !Date::factory($status['last_date_imported'])->isEarlier($date) || !empty($status['future_resume_date']) && Date::factory($status['last_date_imported'])->isEarlier($date)) {
if (empty($status['last_date_imported']) || $skipRecentDateImportFirst || !Date::factory($status['last_date_imported'])->isEarlier($date) || !empty($status['future_resume_date']) && Date::factory($status['last_date_imported'])->isEarlier($date)) {
$status['last_date_imported'] = $date->toString();
$this->setImportedDateRange($idSite, $startDate = null, $date);
if ($isMainImport) {
Expand Down
24 changes: 21 additions & 3 deletions ImporterGA4.php
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ private function importCustomVariableSlots()
$command .= ' customvariables:set-max-custom-variables ' . $numCustomVarSlots;
passthru($command);
}
public function import($idSite, $propertyId, Date $start, Date $end, Lock $lock, $segment = '', $streamIds = [])
public function import($idSite, $propertyId, Date $start, Date $end, Lock $lock, $segment = '', $streamIds = [], $skipRecentDateImportFirst = false)
{
$date = null;
try {
Expand All @@ -314,7 +314,11 @@ public function import($idSite, $propertyId, Date $start, Date $end, Lock $lock,
}
$recordImporters = $this->getRecordImporters($idSite, $propertyId, $streamIds);
$site = new Site($idSite);
$dates = $this->getRecentDatesToImport($start, $endPlusOne, Date::today()->getTimestamp());
if (!$skipRecentDateImportFirst) {
$dates = $this->getRecentDatesToImport($start, $endPlusOne, Date::today()->getTimestamp());
} else {
$dates = $this->getDatesToImport($start, $end);
}
foreach ($dates as $date) {
if ($date->isToday() || $date->isLater(Date::yesterday())) {
$this->logger->info("Encountered Future Date while Importing data for GA4 Property {propertyID} for date {date}, the import would be stopped", ['viewId' => $propertyId, 'date' => $date->toString()]);
Expand All @@ -328,7 +332,7 @@ public function import($idSite, $propertyId, Date $start, Date $end, Lock $lock,
// force delete all tables in case they aren't all freed
\Piwik\DataTable\Manager::getInstance()->deleteAll();
}
$this->importStatus->dayImportFinished($idSite, $date, $this->isMainImport);
$this->importStatus->dayImportFinished($idSite, $date, $this->isMainImport, $skipRecentDateImportFirst);
}
$this->importStatus->finishImportIfNothingLeft($idSite);
unset($recordImporters);
Expand Down Expand Up @@ -591,4 +595,18 @@ public function getRecentDatesToImport(Date $startDate, Date $endPlusOne, $thres
}
return $dates;
}

/**
* @param Date $startDate
* @param Date $endDate
* @return array of dates between $startDate and $endDate
*/
public function getDatesToImport(Date $startDate, Date $endDate)
{
$dates = [];
for ($date = $startDate; $date->getTimestamp() <= $endDate->getTimestamp(); $date = $date->addDay(1)) {
array_push($dates, $date);
}
return $dates;
}
}
11 changes: 11 additions & 0 deletions tests/Integration/ImporterTestGA4.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ public function test_getRecentDatesToImport_Past()
}
$this->assertEquals(['2019-07-13', '2019-07-12', '2019-07-11', '2019-07-10', '2019-07-09', '2019-07-08', '2019-07-07'], $processed);
}
public function test_getDatesToImport()
{
$startDate = Date::factory('2022-07-07');
$endDate = Date::factory('2022-07-13');
$dates = $this->importer->getDatesToImport($startDate, $endDate);
$processed = [];
foreach ($dates as $dateObj) {
$processed[] = $dateObj->toString();
}
$this->assertEquals(['2022-07-07', '2022-07-08', '2022-07-09', '2022-07-10', '2022-07-11', '2022-07-12', '2022-07-13'], $processed);
}
public function makeMockService()
{
return new \Piwik\Plugins\GoogleAnalyticsImporter\tests\Integration\MockGoogleServiceAnalytics($this);
Expand Down

0 comments on commit de5d1ef

Please sign in to comment.