forked from vikas5914/google-photos-backup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
google-photos-backup.js
1143 lines (1024 loc) · 46.4 KB
/
google-photos-backup.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Description:
This script ensures an organized, metadata-enriched photo collection by automating backup processes, securing digital memories both locally and in the cloud, and maintaining an up-to-date database of photo backups.
- Automate photo URL scraping from Google Photos (scrapeAllGooglePhotosUrls)
- Scan all local images (scanDirectory)
- Utilize playwright-extra for browser automation and sqlite3 for robust data management (chromium, sqlite3)
- Read and write photo metadata directly to files or to sidecar files, based on user preferences (writeMetadataToFile, readMetadataFromFile)
- Support maximum photo limits, enable full disk rescans, facilitate incremental Google Photos backups, and allow for drop-and-create database table options (main, setupDatabase)
- Offer customizable settings for headless browsing, download paths, database initialization, and metadata writing options (main)
- Implement notification system for backup completion and errors (logSkippedEntry)
- For a given google photos image, search the db to see if there's a file on-disk whose metadata has the google photos URL in it, indicating that it's already been backed up (checkIfAllUrlsAreBackedUp)
- Include options for data deduplication to avoid backing up duplicates (handleDuplicateFiles)
High level functional overview:
- main() is the entry point, orchestrating the entire backup process. From here, the execution branches out to several key functions, each performing a distinct part of the workflow. The auxiliary functions are called by these key functions as needed.
- setupDatabase() prepares the database for operation. This function doesn't directly call any auxiliary functions but sets up the foundation for data storage and retrieval.
- scrapeAllGooglePhotosUrls(maxPics, db) is called if new Google Photos URLs need to be scraped. It does not directly call auxiliary functions within its description but interacts with the database and the browser session to scrape URLs.
- scanDirectory(startPath, db) Run if scrapeFiles is True. Invoked to scan the local filesystem for images. It calls:
- addFileToDb(absolutePath, db) for each image found, to add their paths and metadata to the database. addFileToDb then calls:
- readMetadataFromFile(filePath) to extract metadata from each image or its sidecar file, which is used to populate database entries.
- checkIfAllUrlsAreBackedUp(db, thoroughness) is executed to update the backup status of URLs in the database. It uses:
- updateUrlBackupStatus(url, db) to check if a URL is backed up and update its status accordingly.
- incrementalBackup(db) is performed to download new photos from Google Photos and update their backup status. It involves several steps and auxiliary functions:
- navigateToUrl(page, url) to navigate to a photo's URL.
- downloadPhoto(page, db) to manage the photo download process, which then calls:
- openInfoPanelIfClosed(page) to ensure the photo's information panel is open for metadata extraction.
- dispatchMouseEvents(page, buttonSelector) to simulate mouse events for clicking the download button.
- downloadToTempLocation(page) to handle the actual download operation.
- writeMetadataToFile(page, finalPath, people, url, dateTimeOriginal, description, writeSidecarMetadata, writeFileMetadata) to write or update the photo's metadata in the file or a sidecar file.
- Finally, browser.close() is called to close the browser session once all operations are completed.*/
import { chromium } from 'playwright-extra';
import path from 'path';
import sqlite3 from 'sqlite3';
import { moveFile } from 'move-file'
import { open } from 'sqlite';
import { exiftool } from 'exiftool-vendored';
import { existsSync } from 'fs';
import fs from 'fs';
import stealth from 'puppeteer-extra-plugin-stealth';
import fsP from 'node:fs/promises';
import { DateTime } from 'luxon';
import StreamZip from 'node-stream-zip';
const userDataDir = './session';
const downloadPath = './download'
const logErrorName = 'runtime-errors.log'
const writeSidecarMetadata = true; //write the metadata to an XMP file
const writeFileMetadata = true; //modify the file to add metadata via EXIF
const maxPics = 0; //For testing. End after grabbing this many URLs (0 for no max)
const dropAndCreateFileTable = false; //will fully rescan the disk for backups. Takes ~1h.
const dropAndCreatePhotosTable = false; //setting to true means you will need to re-scrape the entire google photos collection, which is very slow (~1 day). Only way to pick up insertions where the date is not recent, though.
const scrapeGooglePhotos = true; // if true, looks for new images on google photos. Checks the latest images, gives up when it finds several photos that are backed up alreaady, so it's quick. Use with dropAndCreatePhotosTable if you want to rebuild entirely.
const scrapeFiles = false; // if true, looks for which images are on disk. Not incremental, fully rescans every time, but only adds, not change/remove. Use with dropAndCreateFileTable=true to fully update.
const performIncrementalBackup = true; //Back up all the new URLs to disk. Usually do this.
const debug = true; //halts on errors, writes out page html on each page to disk for debugging, etc
const checkBackupThoroughness = 'unchecked' // 'unchecked', 'unchecked-and-no', 'all'
const validFileExtensions = ['.avif', '.bmp', '.dng', '.gif', '.heic', '.ico', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.webp', '.raw', '.3gp', '.3g2', '.asf', '.avi', '.divx', '.m2t', '.m2ts', '.m4v', '.mkv', '.mmv', '.mod', '.mov', '.mp4', '.mpg', '.mts', '.nef', '.tod', '.wmv', '.zip'];
import winston from 'winston';
import DailyRotateFile from 'winston-daily-rotate-file';
function setupLogging() {
const logger = winston.createLogger({
level: 'debug',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.printf(info => `${info.timestamp} - ${info.level} - ${info.message}`)
),
transports: [
new DailyRotateFile({
filename: 'logs/google-photos-backup-%DATE%.log',
datePattern: 'YYYY-MM-DD',
zippedArchive: false,
maxSize: '20m',
maxFiles: '10',
prepend: true, // Deprecated in newer versions, included for reference
level: 'debug'
}),
new winston.transports.File({
filename: 'logs/' + logErrorName,
level: 'error'
}),
new winston.transports.Console({
level: 'info',
format: winston.format.simple()
}),
],
});
return logger;
}
// Let's log
const logger = setupLogging();
logger.error('Logger started.\n\n\n'); //"error" level so that it shows up in both logs
export { setupLogging }; // in case other modules want to use this later
//record total runtime
const startTime = Date.now();
chromium.use(stealth());
let browser = null;
let page = null;
async function launchBrowser() {
browser = await chromium.launchPersistentContext(path.resolve(userDataDir), {
headless: false,
acceptDownloads: true,
channel: 'chrome', // possible values: chrome, msedge, and chromium
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
}
// Navigate to a URL without resetting the browser or page
async function navigateToUrl(url) {
let navigationSuccessful = true; // Assume navigation is successful unless proven otherwise
try {
await page.goto(url, { waitUntil: 'domcontentloaded' }); // Navigate to the URL and wait for content to load
} catch (error) {
logger.error(`Failed to navigate to URL: ${url}. Error: ${error}`);
navigationSuccessful = false; // Update status if navigation fails
}
return navigationSuccessful; // Return the status of the navigation attempt
};
// Function to reset the browser and page, made global for accessibility
async function resetBrowser(url = null) {
await browser.close();
await launchBrowser();
page = await browser.newPage();
if (url) {
const navigationSuccessful = await navigateToUrl(url); // Use navigateToUrl function to navigate to the URL
if (!navigationSuccessful) {
throw new Error(`Failed to navigate to URL in resetBrowser(): ${url}`);
}
}
}
// Function to reset the current tab and optionally navigate to a new URL
async function resetPage(url = null) {
logger.info(`Resetting page. Requested URL: ${url}`);
const originalUrl = page.url();
// Close the current tab & create a new one
await page.close();
page = await browser.newPage();
// If a URL is provided, navigate to it. Otherwise, navigate to the original URL.
if (url) {
const navigationSuccessful = await navigateToUrl(url); // Use navigateToUrl function to navigate to the URL
if (!navigationSuccessful) {
throw new Error(`Failed to navigate to new URL in resetPage(): ${url}`);
}
} else {
const navigationSuccessful = await navigateToUrl(originalUrl);
if (!navigationSuccessful) {
throw new Error(`Failed to refresh the current page in resetPage(): ${originalUrl}`);
}
}
}
// Add all photos from google photos to the db.
// Go to the google photos homepage, keep clicking right arrow to select the next image, and stash each URL in the db.
async function scrapeAllGooglePhotosUrls(maxPics, db) {
await resetBrowser('https://photos.google.com');
// After resetBrowser, we are guaranteed to have navigated successfully or thrown an error
logger.info('Successfully navigated to https://photos.google.com.');
// Check if redirected to google.com/photos/about
if (page.url() === 'https://www.google.com/photos/about/') {
throw new Error("You don't appear to be logged in.\nFrom the command line, run 'node setup.js' in this directory. Log in, then close the browser. Then run this again.");
return;
}
logger.info('Navigated to Google Photos');
let currentUrl = '';
let previousUrl = '';
let imageUrlsScraped = 0;
let sameUrlCount = 0;
const sameUrlCountMax = 5 //abort if you see the same URL this number of times in a row
let alreadyBackedUpCount = 0;
const alreadyBackedUpCountMax = 10; //abort when you see this many photos in a row that are already backed up
do {
await page.keyboard.press('ArrowRight');
await page.waitForTimeout(100); // wait for the new image to load
let imageUrl = await page.evaluate(() => document.activeElement.toString());
let findUrlRetries = 3;
while (!imageUrl.startsWith('https://') && findUrlRetries > 0) {
logger.warn(`Expected imageUrl to start with 'https://', but got ${imageUrl}. Retrying in 3 seconds...`);
await page.waitForTimeout(3000);
await page.keyboard.press('ArrowRight');
imageUrl = await page.evaluate(() => document.activeElement.toString());
findUrlRetries--;
}
if (typeof imageUrl !== 'string') {
logger.error(`Failed to obtain a valid imageUrl on the homepage after waiting 30s.`);
debugger;
continue; // Skip to the next iteration of the loop if still not a string
}
logger.info(`Pic: ${imageUrl}`);
// Check if imageUrl starts with 'https://photos.google.com/photo/'
const url = new URL(imageUrl);
if (!(url.host === 'photos.google.com' && url.pathname.startsWith('/photo/'))) {
logger.error(`Invalid URL: ${imageUrl}`);
debugger;
continue; // Skip to the next iteration of the loop
}
// Add to the database
const timestamp = new Date().toISOString();
try {
await db.run(`INSERT INTO googlePhotosUrls(url, isBackedUp, retrieved_on) VALUES(?, 'unchecked', ?)`, [imageUrl, timestamp]);
} catch (err) {
if (err.code === 'SQLITE_CONSTRAINT') {
logger.debug(`Already backed up.`);
alreadyBackedUpCount++;
} else {
logger.error(`SQL Error: ${err}`);
debugger;
}
}
// Update the previous URL and increment the image index
previousUrl = currentUrl;
currentUrl = imageUrl;
imageUrlsScraped++;
// If the previous URL is the same as the current one, increment sameUrlCount
if (previousUrl === currentUrl) {
sameUrlCount++;
} else {
sameUrlCount = 0; // reset sameUrlCount if the URLs are different
}
// If a maximum number of images is specified and we've reached it, break the loop
if (maxPics && imageUrlsScraped >= maxPics) {
logger.info(`Reached maximum number of images: ${maxPics}`);
break;
}
} while (sameUrlCount < sameUrlCountMax && alreadyBackedUpCount < alreadyBackedUpCountMax);
};
// Add a DB operation to the queue
const queue = [];
let isProcessing = false;
const batchSize = 1000;
async function addToDbQueue(sql, params, db) {
if (queue.length % 100 === 0 && queue.length !== 0) {
logger.info(`Added. Current queue size: ${queue.length}`);
}
return new Promise((resolve) => {
queue.push({ sql, params, resolve });
if (!isProcessing) {
processDbQueue(db);
}
resolve(true);
});
};
// Send a queued operation to the db
async function processDbQueue(db) {
if (queue.length % 100 === 0 && queue.length !== 0) {
logger.info(`Removing. Current queue size: ${queue.length}`);
}
if (queue.length === 0) {
isProcessing = false;
return;
}
isProcessing = true;
const tasks = [];
const batch = [];
for (let i = 0; i < batchSize && queue.length > 0; i++) {
const { sql, params, resolve } = queue.shift();
batch.push({ sql, params });
tasks.push(resolve);
}
for (const { sql, params } of batch) {
try {
await db.run(sql, params);
} catch (err) {
if (err.code === 'SQLITE_CONSTRAINT') {
logger.debug(`Duplicate entry ignored: ${params[0]}`);
} else {
logger.error(`SQL Error: ${err}`);
debugger;
}
}
}
tasks.forEach(resolve => resolve());
processDbQueue(db);
};
// Wait for all db operations to finish before returning
function waitForQueueToDrain() {
return new Promise(resolve => {
const checkQueue = () => {
if (queue.length === 0 && !isProcessing) {
logger.info('Queue is drained to empty.');
resolve();
} else {
setTimeout(checkQueue, 100);
}
};
checkQueue();
});
};
function convertExifDateTimeToISO(dateToConvert) {
if (!dateToConvert) {
return ''
}
let DateISO = `${dateToConvert.year}-${dateToConvert.month.toString().padStart(2, '0')}-${dateToConvert.day.toString().padStart(2, '0')}T${dateToConvert.hour.toString().padStart(2, '0')}:${dateToConvert.minute.toString().padStart(2, '0')}:${dateToConvert.second.toString().padStart(2, '0')}`;
if (dateToConvert.tzoffset !== undefined) {
// Convert timezone offset from minutes to HH:mm format
let offsetHours = Math.floor(Math.abs(dateToConvert.tzoffset) / 60);
let offsetMinutes = Math.abs(dateToConvert.tzoffset) % 60;
let sign = dateToConvert.tzoffset < 0 ? '-' : '+';
DateISO += `${sign}${offsetHours.toString().padStart(2, '0')}:${offsetMinutes.toString().padStart(2, '0')}`;
} else {
// Assume UTC if no timezone offset is provided
DateISO += 'Z';
}
return DateISO;
}
//return the metadata found in the image file or in an xmp sidecar file
async function readMetadataFromFile(filePath) {
const xmpFilePath = `${filePath}.xmp`;
let metadata;
if (fs.existsSync(xmpFilePath)) {
metadata = await exiftool.read(xmpFilePath);
} else {
logger.info(`XMP file not found for ${filePath}, reading EXIF data instead.`);
metadata = await exiftool.read(filePath);
}
let url = metadata['Source'];
let createDate = convertExifDateTimeToISO(metadata['CreateDate']);
let dateTimeOriginal = convertExifDateTimeToISO(metadata['DateTimeOriginal']);
let description = metadata['Description'] || metadata['XMP:Description'];
let personInImage = '';
if (metadata['PersonInImage']) {
personInImage = metadata['PersonInImage'][0];
} else if (metadata['XMP:PersonInImage']) {
personInImage = metadata['XMP:PersonInImage'][0];
}
if (!url) {
logger.error(`URL not found in metadata for ${filePath}`);
return;
}
return { url, createDate, dateTimeOriginal, description, personInImage };
};
// Add an on-disk file to the database along with its metadata
async function addFileToDb(absolutePath, db) {
// Check if the file extension is in the list of allowed extensions
const ext = path.extname(absolutePath).toLowerCase();
if (!validFileExtensions.includes(ext)) {
logger.error(`Found an invalid file type: ${absolutePath}`);
debugger;
return;
}
const metadata = await readMetadataFromFile(absolutePath);
if (metadata) {
addToDbQueue(`INSERT INTO files(path, url, createDate, dateTimeOriginal, description, personInImage, last_updated_date) VALUES(?, ?, ?, ?, ?, ?, ?)`,
[absolutePath, metadata.url, metadata.createDate, metadata.dateTimeOriginal, metadata.description, metadata.personInImage, new Date().toISOString()], db);
} else {
logger.error(`No metadata found for file: ${absolutePath}`);
// debugger; //TODO: What do we do when we find an image that was put there manually, with no metadata to tie it back to a google photos url?
}
}
// Scan recursively for files and add them plus their XMP metadata to the db
async function scanDirectory(startPath, db) {
logger.info(`Processing directory: ${startPath}`);
const list = await fs.promises.readdir(startPath);
const filePromises = [];
const directoryPromises = [];
for (const file of list) {
const absolutePath = path.join(startPath, file);
const stat = await fs.promises.stat(absolutePath);
if (stat.isDirectory()) {
// Store the promise for processing the subdirectory later
directoryPromises.push(() => scanDirectory(absolutePath, db));
} else if (!absolutePath.endsWith('.xmp')) {
// Process files concurrently within the same directory
filePromises.push(addFileToDb(absolutePath, db));
}
}
// Wait for all file processing in the current directory to complete
await Promise.all(filePromises);
// Now process each subdirectory one at a time, waiting for each to complete before moving to the next
for (const directoryPromise of directoryPromises) {
await directoryPromise();
}
};
// Check if there is a file in the database whose URL field matches the specified url
async function updateUrlBackupStatus(url, db, downloadFailed) {
if (downloadFailed) {
logger.debug(`Image URL ${url} failed to download. Setting db status to error.`);
addToDbQueue(`UPDATE googlePhotosUrls SET isBackedUp = 'error' WHERE url = ?`, [url], db);
return
}
const fileRow = await db.get(`SELECT * FROM files WHERE url = ?`, [url]);
if (fileRow) {
logger.debug(`Image URL ${url} is backed up in file ${fileRow.path}`);
addToDbQueue(`UPDATE googlePhotosUrls SET isBackedUp = 'yes' WHERE url = ?`, [url], db);
} else {
addToDbQueue(`UPDATE googlePhotosUrls SET isBackedUp = 'no' WHERE url = ?`, [url], db);
}
};
// Go through all photo URLs from google photos and see if there's a file in the db that has its URL in the source field, (indicating the photo is already backed up to disk)
async function checkIfAllUrlsAreBackedUp(db, thoroughness = 'unchecked') {
try {
let query;
switch (thoroughness) {
case 'all':
query = `SELECT url FROM googlePhotosUrls`;
break;
case 'unchecked-and-no':
query = `SELECT url FROM googlePhotosUrls WHERE isBackedUp IN ('unchecked', 'no')`;
break;
case 'unchecked':
default:
query = `SELECT url FROM googlePhotosUrls WHERE isBackedUp = 'unchecked'`;
break;
}
const rows = await db.all(query);
logger.debug(`About to check backup status for ${rows.length} items.`);
let completedCount = 0;
for (const row of rows) {
const photosUrl = row.url;
await updateUrlBackupStatus(photosUrl, db, false);
completedCount++;
const percentComplete = Math.floor((completedCount / rows.length) * 100);
if (percentComplete % 2 === 0) {
logger.info(`${percentComplete}% complete.`);
}
}
} catch (err) {
logger.error('checkBackup SQL error:', err);
debugger;
}
};
// Find a selector on the page and return its visibility and enabled status
async function findActiveSelector(selector) {
const element = await page.$(selector);
if (element) {
const isVisible = await page.evaluate(el => el.offsetParent !== null, element);
const isEnabled = !(await element.isDisabled());
if (isVisible && isEnabled) {
return true;
} else {
logger.error(`Element found but status is visible: ${isVisible}, enabled: ${isEnabled}, for selector: ${selector}`);
return false;
}
}
// logger.info(`Element not found: ${selector}`);
return false;
}
// Wait for a selector on the page to become visible and enabled, up to a specified timeout
async function waitForActiveSelector(selector, timeout = 5000) {
try {
await page.waitForSelector(selector, { state: 'attached', timeout });
const element = await page.$(selector);
if (element) {
const isVisible = await page.evaluate(el => el.offsetParent !== null, element);
const isEnabled = !(await element.isDisabled());
if (isVisible && isEnabled) {
return true;
} else {
logger.error(`Element is not active. Visible: ${isVisible}, Enabled: ${isEnabled}, Selector: ${selector}`);
return false;
}
}
} catch (error) {
logger.error(`waitForActiveSelector error: Selector not found or timeout exceeded for ${selector}. Error: ${error}`);
return false;
}
}
// When the info panel is closed, open it
async function openOptionsPanel() {
const selectorToLookFor = '[aria-label="Download - Shift+D"]';
let isOptionsPanelOpen = await findActiveSelector(selectorToLookFor);
if (!isOptionsPanelOpen) {
try {
await dispatchMouseEvents('div[data-tooltip="More options"]');
await page.waitForTimeout(2000); // It seems to need a hard wait, perhaps because of the animation? The next line doesn't do it.
isOptionsPanelOpen = await waitForActiveSelector(selectorToLookFor, 5000);
if (isOptionsPanelOpen) {
return true;
} else {
logger.error(`Options panel failed to open.`);
return false;
}
} catch (error) {
logger.error(`Failed to open 'More options': ${error}`);
}
}
}
// Global function to check if the info panel is open
async function isInfoPanelOpen() {
const closeButtonSelector = 'div.IMbeAf'; // This is the best selector to look for to see if it's open
try {
await page.waitForSelector(closeButtonSelector, { state: 'attached', timeout: 5000 });
return true;
} catch (error) {
logger.debug(`Info panel is closed (did not find selector): ${error}`);
return false;
}
}
// When the info panel is closed, open it
async function openInfoPanelIfClosed() {
let isOpen = await isInfoPanelOpen();
if (!isOpen) {
logger.debug("Couldn't open it with the selector. Try pressing 'i'.");
try {
// Set a timeout for the entire operation
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Operation timed out')), 20000); // 20 seconds
});
await Promise.race([
(async () => {
await page.keyboard.press('i');
console.log('Pressed i to open info panel');
isOpen = await isInfoPanelOpen(); // Re-check if the info panel is open after pressing 'i'
})(),
timeoutPromise
]);
} catch (error) {
logger.error(`Error opening the info panel: ${error}`);
return false;
}
}
return isOpen;
}
// Download the photo to a temp location, get its metadata, check if it's a duplicate, write the file and metadata to disk, then add the file on disk to the db.
async function downloadPhoto(db) {
const timeout = 120000; // 2 minutes in milliseconds
let timeoutHandle;
const currentPageUrl = page.url();
// Set up a promise that rejects after a timeout
const timeoutPromise = new Promise((resolve) => {
timeoutHandle = setTimeout(() => {
logger.error(`Download photo operation timed out. URL: ${currentPageUrl}`);
resolve({timeoutOccurred: true});
}, timeout);
});
// Use Promise.race to handle the timeout
const operationPromise = (async () => {
let infoPanelOpen = await openInfoPanelIfClosed();
if (!infoPanelOpen) {
logger.error(`The info panel couldn't be opened; something's awry. Move on to the next photo. URL: ${currentPageUrl}`);
return {processedSuccessfully: false, timeoutOccurred: false};
}
let downloadInfos;
try {
downloadInfos = await downloadToTempLocation();
} catch (error) {
logger.error(`Error downloading photo: ${error}. URL: ${currentPageUrl}`);
return {processedSuccessfully: false, timeoutOccurred: false};
}
let processedSuccessfully = true;
for (const { download, tempDownloadPath } of downloadInfos) {
const fileNamePhoto = await download.suggestedFilename();
const fileExtensionPhoto = path.extname(fileNamePhoto).toLowerCase();
if (!validFileExtensions.includes(fileExtensionPhoto)) {
logger.error(`Invalid file extension for ${fileNamePhoto}. Skipping. URL: ${currentPageUrl}`);
debugger;
processedSuccessfully = false; // Mark as unsuccessful due to invalid file extension
continue;
}
if (fileExtensionPhoto === '.zip') {
const zipProcessingResult = await processZipFile(db, tempDownloadPath);
processedSuccessfully = processedSuccessfully && zipProcessingResult;
} else {
const singleFileProcessingResult = await processSingleFile(db, tempDownloadPath, fileNamePhoto);
processedSuccessfully = processedSuccessfully && singleFileProcessingResult;
}
}
return {processedSuccessfully, timeoutOccurred: false};
})();
const result = await Promise.race([operationPromise, timeoutPromise]).finally(() => {
clearTimeout(timeoutHandle); // Ensure the timeout is cleared in any case
});
return result;
}
async function processZipFile(db, zipFilePath) {
const zip = new StreamZip.async({ file: zipFilePath });
const entries = await zip.entries();
let processedSuccessfully = false;
for (const entry of Object.values(entries)) {
if (entry.isDirectory) continue;
const ext = path.extname(entry.name).toLowerCase();
if (!validFileExtensions.includes(ext)) continue;
const tempDownloadPath = path.join(path.dirname(zipFilePath), entry.name);
await zip.extract(entry.name, tempDownloadPath);
const processResult = await processSingleFile(db, tempDownloadPath, entry.name);
processedSuccessfully = processedSuccessfully || processResult;
}
await zip.close();
await fs.promises.unlink(zipFilePath); // Clean up the ZIP file after processing
return processedSuccessfully;
}
async function processSingleFile(db, tempDownloadPath, filename) {
const metadata = await extractMetadataFromPage(tempDownloadPath, filename);
if (!metadata) {
logger.error(`Failed to extract metadata for ${filename}.`);
return false;
}
const { year, month, dateTimeOriginal, people, description } = metadata;
let finalPath = path.join(downloadPath, year.toString(), month.toString(), filename);
finalPath = await handleDuplicateFiles(finalPath, tempDownloadPath, filename, year, month);
await writeMetadataToFile(finalPath, people, page.url(), dateTimeOriginal, description, writeSidecarMetadata, writeFileMetadata);
await addFileToDb(finalPath, db);
return true;
}
// Dispatch mouse events to the first clickable and visible selector found (in case of multiples).
async function dispatchMouseEvents(buttonSelector) {
try {
// Find all buttons matching the selector
const buttons_matching_selector = await page.$$(buttonSelector);
// Find the first visible and enabled button
let actionableButton;
for (let button_match of buttons_matching_selector) {
let visible = await page.evaluate(el => el.offsetParent !== null, button_match);
let enabled = !(await button_match.isDisabled());
if (visible && enabled) {
actionableButton = button_match;
break;
}
}
// If a visible and enabled button is found, dispatch mouse events
if (actionableButton) {
await page.evaluate((button_on_page) => {
button_on_page.dispatchEvent(new MouseEvent('mousedown', {
'view': window,
'bubbles': true,
'cancelable': true
}));
button_on_page.dispatchEvent(new MouseEvent('mouseup', {
'view': window,
'bubbles': true,
'cancelable': true
}));
}, actionableButton);
} else {
logger.error(`No visible and enabled button found for selector: ${buttonSelector}`);
debugger; //TODO: consider removing
}
} catch (error) {
logger.error('Error simulating mousedown and mouseup events: ' + error);
}
}
// Open More Options, try to download all versions available, return the path to the temp download location
async function downloadToTempLocation() {
let downloadInfos = [];
const urlToDownloadFrom = page.url();
// Define the various download buttons
const downloadSelectors = [
'[aria-label^="Download all "]',
'[aria-label="Download original"]',
'[aria-label="Download video"]',
'[aria-label="Download - Shift+D"]',
];
// Iterate over each selector to handle multiple downloads
for (const selector of downloadSelectors) {
await openOptionsPanel();
const dlButtons = await page.$$(selector);
if (dlButtons.length > 0) {
logger.info(`Attempting download from: ${selector}`);
// Click the button to trigger the download
await dispatchMouseEvents(selector);
// Wait for the download to start with a timeout of 5 seconds for the download to initiate
try {
const download = await page.waitForEvent('download', { timeout: 5000 });
// Once the download has started, wait up to 60 seconds for it to complete
const downloadPath = await download.path({ timeout: 60000 });
downloadInfos.push({ download, tempDownloadPath: downloadPath });
logger.info("Download started and path received!");
} catch (error) {
// Reset and move to the next selector
logger.error(`Download did not start or complete in time for selector: ${selector} from URL: ${urlToDownloadFrom}. Skipping to next selector; this will not be backed up. Error: ${error}`);
await resetPage(urlToDownloadFrom);
}
}
}
if (downloadInfos.length === 0) {
logger.error('No downloads were initiated or completed successfully.');
// Consider adding additional error handling or recovery logic here
}
// Filter out any undefined or null results
return downloadInfos.filter(info => info && info.tempDownloadPath);
}
// Examine the page hosting the image and return the metadata that's shown onscreen
async function extractMetadataFromPage(tempDownloadPath, filename) {
const exifData = await exiftool.read(tempDownloadPath);
let year = exifData.DateTimeOriginal?.year || 1;
let month = exifData.DateTimeOriginal?.month || 1;
let dateTimeOriginal = exifData.DateTimeOriginal;
const metadata = await page.evaluate(() => {
const data = {
people: null,
detailsHtml: null,
description: null
};
// Find all "People" sections and extract names if visible
const peopleSections = Array.from(document.querySelectorAll('div.wiOkb')).filter(element => element.textContent === 'People');
const visiblePeopleSection = peopleSections.find(section => section.offsetParent !== null);
if (visiblePeopleSection) {
const parentElement = visiblePeopleSection.parentElement;
data.people = Array.from(parentElement.querySelectorAll('ul.cdLvR li a[aria-label]')).map(element => element.getAttribute('aria-label'));
}
// Get the HTML of the whole details panel to use for the date
const detailsSection = Array.from(document.querySelectorAll('div.wiOkb')).find(element => element.textContent === 'Details');
if (detailsSection) {
const parentElement = detailsSection.parentElement;
data.detailsHtml = parentElement.innerHTML;
}
// Grab the first 'Description' element that is visible
const descriptionElement = document.querySelector('textarea[aria-label="Description"]');
if (descriptionElement && descriptionElement.offsetParent !== null) {
data.description = descriptionElement.value;
} else {
data.description = "";
}
return data;
});
// Only search for a date if the file doesn't have its own date in EXIF
if (year === 1 && month === 1 && metadata.detailsHtml != null) {
const regex1 = new RegExp([
'aria-label="',
'[^"]*',
'(Photo|Video|Animation|Highlight Video)',
'[^"]*-',
' ([^"]+)',
'"'
].join(''), 'g');
const regex2 = new RegExp([
'aria-label="Date:',
'\\s*([A-Za-z0-9 ,]+)',
'"'
].join(''), 'g');
const match1 = regex1.exec(metadata.detailsHtml);
const match2 = regex2.exec(metadata.detailsHtml);
let dateString = match1 ? match1[1] : match2 ? match2[1] : null;
if (dateString && !/\d{4}/.test(dateString)) {
const currentYear = new Date().getFullYear();
dateString += `, ${currentYear}`;
}
if (dateString) {
const date = new Date(dateString);
year = date.getFullYear();
month = date.getMonth() + 1;
dateTimeOriginal = date;
logger.debug(`Extracted date from details: ${dateString}`);
} else {
logger.error('Could not find the date in the HTML details');
debugger;
}
}
if (isNaN(year) || isNaN(month) || year === 1) {
logger.error(`Year or Month is not a valid number for URL: ${page.url()} and filename: ${filename}. Year: ${year}, Month: ${month}.`);
debugger;
return null;
}
logger.info(`Filename: ${filename}: on ${month}/${year}`)
logger.info(`Date and Time: ${dateTimeOriginal ? dateTimeOriginal.toISOString() : 'Not Available'}`)
if (metadata.people?.length > 0) logger.info(`People: ${metadata.people.join(', ')}`);
if (metadata.description?.length > 0) logger.info(`Description: ${metadata.description}`);
return { fileName: filename, year, month, dateTimeOriginal, people: metadata.people, description: metadata.description };
}
// If a file is downloaded with the same name as an existing file, check to see if the existing file has the same source URL. If it does, overwrite the existing file with the new one. If it doesn't, write it out with a new filename by appending a number.
async function handleDuplicateFiles(finalPath, tempDownloadPath, fileName, year, month) {
let counter = 1;
let fileExists = true;
let newFileName; // Declare newFileName here
while (fileExists) {
try {
await fsP.access(finalPath);
let imageMetadata;
let sourcePath = "";
const sidecarFilePath = `${finalPath}.xmp`;
if (existsSync(sidecarFilePath)) {
imageMetadata = await exiftool.read(sidecarFilePath);
sourcePath = imageMetadata['Source'];
} else {
imageMetadata = await exiftool.read(finalPath);
sourcePath = imageMetadata['XMP:Source'];
}
if (sourcePath === page.url()) {
logger.debug('Duplicate Detected with the same source as the URL of this image; Overwriting: '+finalPath+' from URL: '+page.url());
await moveFile(tempDownloadPath, finalPath, { overwrite: true });
fileExists = false;
} else {
logger.info(finalPath+' is a duplicate, but the gphotos source doesnt exist or is different from the URL of this page, so saving this as a separate file by appending a number.')
newFileName = fileName.replace(/(\.[\w\d_-]+)$/i, `-${counter}$1`);
finalPath = path.join(downloadPath, year.toString(), month.toString(), newFileName);
counter++;
}
} catch (error) {
if (error.code === 'ENOENT') {
await moveFile(tempDownloadPath, finalPath);
logger.info('Adjusted Filename; Download Complete: '+ finalPath + ' URL:' + page.url());
fileExists = false;
} else {
logger.error('Unexpected error while moving from temp to final location:', error);
debugger;
}
}
}
return finalPath;
}
async function deleteFileIfExists(filePath) {
if (existsSync(filePath)) {
await fsP.unlink(filePath);
}
}
// Write the file's metadata to disk, either as a sidecar or to the file itself
async function writeMetadataToFile(finalPath, people, url, dateTimeOriginal, description, writeSidecarMetadata, writeFileMetadata) {
logger.debug("Trying to write metadata to file: "+finalPath);
const fileTypesWithoutExif = ['.avi', '.bmp', '.wmv', '.mts', '.dng', '.zip'];
if (writeFileMetadata || writeSidecarMetadata) {
try {
// Convert the ExifDateTime object to a DateTime object
const luxonDateTime = DateTime.fromObject({
year: dateTimeOriginal.year,
month: dateTimeOriginal.month,
day: dateTimeOriginal.day,
hour: dateTimeOriginal.hour,
minute: dateTimeOriginal.minute,
second: dateTimeOriginal.second
});
// Format the date to the correct format
const formattedDate = luxonDateTime.toFormat("yyyy:LL:dd HH:mm:ss");
const metadata = {
'XMP:CreateDate': DateTime.now().toISO(),
'XMP:Source': url,
'DateTimeOriginal': formattedDate,
'XMP:Description': description,
'XMP:PersonInImage': people && people.length > 0 ? people.join(', ') : null
};
for (const [key, value] of Object.entries(metadata)) {
if (value !== '' && value !== null) {
//logger.info(`Writing metadata: ${key} = ${value}`);
if (writeSidecarMetadata) {
try {
const sidecarFilePath = `${finalPath}.xmp`;
await exiftool.write(sidecarFilePath, { [key]: value });
await deleteFileIfExists(`${sidecarFilePath}_original`);
} catch (sidecarError) {
logger.error(`Error writing sidecar metadata for ${key} = ${value}: ${sidecarError}`);
debugger;
}
}
const lowercaseFilePath = finalPath.toLowerCase();
if (writeFileMetadata && !fileTypesWithoutExif.some(ext => lowercaseFilePath.endsWith(ext))) {
try {
await exiftool.write(finalPath, { [key]: value });
await deleteFileIfExists(`${finalPath}_original`);
} catch (error) {
logger.debug(`Error while writing file metadata ${key} = ${value}: ${error}. Attempting to rewrite all tags.`);
try {
const tempFilePath = `${finalPath}_temp.jpg`;
await deleteFileIfExists(tempFilePath);
await fsP.copyFile(finalPath, tempFilePath);
await deleteFileIfExists(finalPath);
await exiftool.rewriteAllTags(tempFilePath, finalPath);
logger.debug(`Rewrote all tags successfully for ${finalPath}`);
await deleteFileIfExists(tempFilePath);
} catch (rewriteError) {
logger.error(`Failed to rewrite all tags for ${finalPath}: ${rewriteError}`);
// Check if tempFilePath exists and move it to finalPath if so
try {
await fsP.access(tempFilePath, fs.constants.F_OK);
await fsP.rename(tempFilePath, finalPath);
logger.debug(`Moved ${tempFilePath} to ${finalPath} after failure to rewrite tags.`);
} catch (accessError) {
logger.error(`Could not move ${tempFilePath} to ${finalPath}: ${accessError}`);
}
debugger;
}
}
}
}
}
} catch (error) {
logger.error('Error modifying EXIF metadata for file. finalPath: ' + finalPath + ', URL: ' + page.url() + '. Error: ' + error);
debugger;
}
}
logger.debug('Wrote metadata to file: ' + finalPath);
}
// Set up the database
async function setupDatabase() {
const db = await open({
filename: './google-photos-backup.db',
driver: sqlite3.Database
});
// Drop the tables if they exist and instructed to
if (dropAndCreateFileTable) {
await db.run(`DROP TABLE IF EXISTS files`);
}
if (dropAndCreatePhotosTable) {
await db.run(`DROP TABLE IF EXISTS googlePhotosUrls`);
}
// Create the tables
await db.run(`CREATE TABLE IF NOT EXISTS googlePhotosUrls(url TEXT UNIQUE, isBackedUp TEXT DEFAULT 'unchecked', retrieved_on TEXT)`);
await db.run(`CREATE TABLE IF NOT EXISTS files(path TEXT UNIQUE, url TEXT, createDate TEXT, dateTimeOriginal TEXT, description TEXT, personInImage TEXT, last_updated_date TEXT)`);
return db;
};
// Once you've scraped disk + google photos, you can run this to incrementally back up new photos from google photos to disk.
async function incrementalBackup(db) {
const maxRetries = 3;
let photo = true;
let downloadFailed;
await resetBrowser();
while (photo) {
// Retrieve the most recently scraped URL that hasn't been backed up yet
downloadFailed = true; // Assume failure until proven otherwise
photo = await db.get(`SELECT url FROM googlePhotosUrls WHERE isBackedUp = 'no' ORDER BY retrieved_on DESC LIMIT 1`);
if (!photo) {
break; // Exit the loop if there are no more photos to back up
}
logger.info(`Downloading: ${photo.url}`);
const navigationSuccessful = await navigateToUrl(photo.url);
if (navigationSuccessful) {