diff --git a/.github/workflows/code_test_and_deploy.yml b/.github/workflows/code_test_and_deploy.yml index e6e8fcb14..4e6103eb6 100644 --- a/.github/workflows/code_test_and_deploy.yml +++ b/.github/workflows/code_test_and_deploy.yml @@ -66,6 +66,26 @@ jobs: python -m pip install --upgrade pip pip install .[dev] + - name: Install pass on Linux + # this is required for Rclone config encryption + if: runner.os == 'Linux' + run: | + set -euo pipefail + sudo apt-get update + sudo apt-get install -y pass gnupg git + + # Create a dedicated GPG home for this job + export GNUPGHOME="$(mktemp -d)" + echo "GNUPGHOME=$GNUPGHOME" >> "$GITHUB_ENV" # <-- make it available to later steps + + # Generate a non-interactive key (no passphrase), no expiry + gpg --batch --yes --pinentry-mode loopback --passphrase '' \ + --quick-gen-key "CI Key " default default 0 + + # Initialize pass with the key fingerprint (more robust than UID) + FPR="$(gpg --list-secret-keys --with-colons | awk -F: '/^fpr:/ {print $10; exit}')" + pass init "$FPR" + # run SSH tests only on Linux because Windows and macOS # are already run within a virtual container and so cannot # run Linux containers because nested containerisation is disabled. @@ -97,7 +117,6 @@ jobs: run: | pytest --ignore=tests/tests_transfers/ssh --ignore=tests/tests_transfers/gdrive --ignore=tests/tests_transfers/aws - build_sdist_wheels: name: Build source distribution needs: [test] diff --git a/datashuttle/configs/canonical_folders.py b/datashuttle/configs/canonical_folders.py index da4c92a65..36d357365 100644 --- a/datashuttle/configs/canonical_folders.py +++ b/datashuttle/configs/canonical_folders.py @@ -6,6 +6,8 @@ if TYPE_CHECKING: from datashuttle.utils.custom_types import TopLevelFolder +import platform + from datashuttle.configs import canonical_configs from datashuttle.utils.folder_class import Folder @@ -80,6 +82,11 @@ def get_datashuttle_path() -> Path: return Path.home() / ".datashuttle" +def get_internal_datashuttle_from_path() -> Path: + """Get a placeholder path for `validate_project_from_path()`.""" + return get_datashuttle_path() / "_datashuttle_from_path" + + def get_project_datashuttle_path(project_name: str) -> Tuple[Path, Path]: """Return the datashuttle config path for the project. @@ -91,3 +98,26 @@ def get_project_datashuttle_path(project_name: str) -> Tuple[Path, Path]: temp_logs_path = base_path / "temp_logs" return base_path, temp_logs_path + + +def get_rclone_config_base_path() -> Path: + """Return the path to the Rclone config file. + + This is used for RClone config files for transfer targets (ssh, aws, gdrive). + This should match where RClone itself stores the config by default, + as described here: https://rclone.org/docs/#config-string + + Because RClone's resolution process for where it stores its config files + is a little complex, in some rare cases the path returned below may not match + where RClone actually stores its configs. In such cases, local filesystem configs, + which are stored in the default `rclone.conf` file for backwards compatibility + reasons, and transfer configs, which are stored in their own file at the path + returned from this function, are stored in separate places. This is generally + not a significant issue. + """ + if platform.system() == "Windows": + appdata_path = Path().home() / "AppData" / "Roaming" + if appdata_path.is_dir(): + return appdata_path / "rclone" + + return Path().home() / ".config" / "rclone" diff --git a/datashuttle/configs/config_class.py b/datashuttle/configs/config_class.py index 67a8299c7..6ece95673 100644 --- a/datashuttle/configs/config_class.py +++ b/datashuttle/configs/config_class.py @@ -1,12 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Optional, Union, cast +from typing import TYPE_CHECKING, Dict, Union, cast if TYPE_CHECKING: from collections.abc import ItemsView, KeysView, ValuesView from datashuttle.utils.custom_types import ( - OverwriteExistingFiles, TopLevelFolder, ) @@ -20,6 +19,7 @@ canonical_configs, canonical_folders, load_configs, + rclone_configs, ) from datashuttle.utils import folders, utils @@ -37,6 +37,9 @@ def __init__( ) -> None: """Initialize the Configs class with project name, file path, and config dictionary. + This class also holds `RCloneConfigs` that manage the Rclone config files + used for transfer. + Parameters ---------- project_name @@ -64,6 +67,8 @@ def __init__( self.hostkeys_path: Path self.project_metadata_path: Path + self.rclone = rclone_configs.RCloneConfigs(self, self.file_path.parent) + def setup_after_load(self) -> None: """Set up the config after loading it.""" load_configs.convert_str_and_pathlib_paths(self, "str_to_path") @@ -249,48 +254,6 @@ def get_base_folder( return base_folder - def get_rclone_config_name( - self, connection_method: Optional[str] = None - ) -> str: - """Generate the rclone configuration name for the central project. - - These configs are created by datashuttle but managed and stored by rclone. - """ - if connection_method is None: - connection_method = self["connection_method"] - - assert connection_method != "local_only", ( - "This state assumes a central connection." - ) - - return f"central_{self.project_name}_{connection_method}" - - def make_rclone_transfer_options( - self, overwrite_existing_files: OverwriteExistingFiles, dry_run: bool - ) -> Dict: - """Create a dictionary of rclone transfer options. - - Originally these arguments were collected from configs, but now - they are passed via function arguments. The `show_transfer_progress` - and `dry_run` options are fixed here. - """ - allowed_overwrite = ["never", "always", "if_source_newer"] - - if overwrite_existing_files not in allowed_overwrite: - utils.log_and_raise_error( - f"`overwrite_existing_files` not " - f"recognised, must be one of: " - f"{allowed_overwrite}", - ValueError, - ) - - return { - "overwrite_existing_files": overwrite_existing_files, - "show_transfer_progress": True, - "transfer_verbosity": "vv", - "dry_run": dry_run, - } - def init_paths(self) -> None: """Initialize paths used by datashuttle.""" self.project_metadata_path = self["local_path"] / ".datashuttle" diff --git a/datashuttle/configs/rclone_configs.py b/datashuttle/configs/rclone_configs.py new file mode 100644 index 000000000..c940774c0 --- /dev/null +++ b/datashuttle/configs/rclone_configs.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from pathlib import Path + + from datashuttle.configs.configs_class import Configs + +import yaml + +from datashuttle.configs import canonical_folders +from datashuttle.utils import rclone_encryption + + +class RCloneConfigs: + """Class to manage the RClone configuration file. + + This is a file that RClone creates to hold all information about local and + central transfer targets. For example, the SSH RClone config holds the private key, + the GDrive rclone config holds the access token, etc. + + In datashuttle, local filesystem configs uses the Rclone default configuration file, + that RClone manages, for backwards compatibility reasons. However, SSH, AWS and GDrive + configs are stored in separate config files (set using RClone's --config argument). + Then being separate means these files can be separately encrypted. + + This class tracks the state on whether a RClone config is encrypted, as well + as provides the default names for the rclone conf (e.g. central__). + + Parameters + ---------- + datashuttle_configs + Parent Configs class. + + config_base_class + Path to the datashuttle configs folder where all configs for the project are stored. + + """ + + def __init__(self, datashuttle_configs: Configs, config_base_path: Path): + """Construct the class.""" + self.datashuttle_configs = datashuttle_configs + self.rclone_encryption_state_file_path = ( + config_base_path / "rclone_ps_state.yaml" + ) + + def load_rclone_config_is_encrypted(self) -> dict: + """Track whether the Rclone config file is encrypted. + + This could be read directly from the RClone config file, but requires + a subprocess call which can be slow on Windows. As this function is + called a lot, we track this explicitly when a rclone config is + encrypted / unencrypted and store to disk between sessions. + """ + assert rclone_encryption.connection_method_requires_encryption( + self.datashuttle_configs["connection_method"] + ) + + if self.rclone_encryption_state_file_path.is_file(): + with open(self.rclone_encryption_state_file_path, "r") as file: + rclone_config_is_encrypted = yaml.full_load(file) + else: + rclone_config_is_encrypted = { + "ssh": False, + "gdrive": False, + "aws": False, + } + + with open(self.rclone_encryption_state_file_path, "w") as file: + yaml.dump(rclone_config_is_encrypted, file) + + return rclone_config_is_encrypted + + def set_rclone_config_encryption_state(self, value: bool) -> None: + """Store the current state of the rclone config encryption for the `connection_method`. + + Note that this is stored to disk each call (rather than tracked in memory) + to ensure it is updated properly if changed through the Python API + while the TUI is also running. + """ + assert rclone_encryption.connection_method_requires_encryption( + self.datashuttle_configs["connection_method"] + ) + + rclone_config_is_encrypted = self.load_rclone_config_is_encrypted() + + rclone_config_is_encrypted[ + self.datashuttle_configs["connection_method"] + ] = value + + with open(self.rclone_encryption_state_file_path, "w") as file: + yaml.dump(rclone_config_is_encrypted, file) + + def rclone_file_is_encrypted( + self, + ) -> bool: + """Return whether the config file associated with the current `connection_method` is encrypted.""" + assert rclone_encryption.connection_method_requires_encryption( + self.datashuttle_configs["connection_method"] + ) + + rclone_config_is_encrypted = self.load_rclone_config_is_encrypted() + + return rclone_config_is_encrypted[ + self.datashuttle_configs["connection_method"] + ] + + def get_rclone_config_name( + self, connection_method: Optional[str] = None + ) -> str: + """Generate the rclone configuration name for the central project.""" + if connection_method is None: + connection_method = self.datashuttle_configs["connection_method"] + + return f"central_{self.datashuttle_configs.project_name}_{connection_method}" + + def get_rclone_central_connection_config_filepath(self) -> Path: + """Return the full filepath to the rclone `.conf` config file.""" + return ( + canonical_folders.get_rclone_config_base_path() + / f"{self.get_rclone_config_name()}.conf" + ) + + def delete_existing_rclone_config_file(self) -> None: + """Delete the Rclone config file if it exists.""" + rclone_config_filepath = ( + self.get_rclone_central_connection_config_filepath() + ) + + if rclone_config_filepath.exists(): + rclone_config_filepath.unlink() + self.set_rclone_config_encryption_state(False) diff --git a/datashuttle/datashuttle_class.py b/datashuttle/datashuttle_class.py index 267990c82..cf689b125 100644 --- a/datashuttle/datashuttle_class.py +++ b/datashuttle/datashuttle_class.py @@ -45,6 +45,7 @@ gdrive, getters, rclone, + rclone_encryption, ssh, utils, validation, @@ -789,7 +790,7 @@ def _transfer_specific_file_or_folder( upload_or_download, top_level_folder, include_list, - self.cfg.make_rclone_transfer_options( + rclone.make_rclone_transfer_options( overwrite_existing_files, dry_run ), ) @@ -815,6 +816,11 @@ def setup_ssh_connection(self) -> None: cluster. Once input, SSH private / public key pair will be setup. """ + if self.cfg["connection_method"] != "ssh": + raise RuntimeError( + "configs `connection_method` must be 'ssh' to set up SSH connection." + ) + self._start_log( "setup-ssh-connection-to-central-server", local_vars=locals() ) @@ -830,6 +836,15 @@ def setup_ssh_connection(self) -> None: self._setup_rclone_central_ssh_config(private_key_str, log=True) + utils.log_and_message( + f"Your SSH key will be stored in the rclone config at:\n " + f"{self.cfg.rclone.get_rclone_central_connection_config_filepath()}.\n" + ) + + if not self.cfg.rclone.rclone_file_is_encrypted(): + if self._ask_user_rclone_encryption(): + self._try_encrypt_rclone_config() + rclone.check_successful_connection_and_raise_error_on_fail( self.cfg ) @@ -860,6 +875,11 @@ def setup_gdrive_connection(self) -> None: Next, with the provided credentials, the final setup will be done. This opens up a browser if the user confirmed access to a browser. """ + if self.cfg["connection_method"] != "gdrive": + raise RuntimeError( + "configs `connection_method` must be 'gdrive' to set up Google Drive connection." + ) + self._start_log( "setup-google-drive-connection-to-central-server", local_vars=locals(), @@ -876,7 +896,7 @@ def setup_gdrive_connection(self) -> None: config_token = gdrive.prompt_and_get_config_token( self.cfg, gdrive_client_secret, - self.cfg.get_rclone_config_name("gdrive"), + self.cfg.rclone.get_rclone_config_name("gdrive"), log=True, ) else: @@ -886,7 +906,13 @@ def setup_gdrive_connection(self) -> None: gdrive_client_secret, config_token ) - rclone.await_call_rclone_with_popen_raise_on_fail(process, log=True) + rclone.await_call_rclone_with_popen_for_central_connection_raise_on_fail( + self.cfg, process, log=True + ) + + if not self.cfg.rclone.rclone_file_is_encrypted(): + if self._ask_user_rclone_encryption(): + self._try_encrypt_rclone_config() rclone.check_successful_connection_and_raise_error_on_fail(self.cfg) @@ -909,6 +935,12 @@ def setup_aws_connection(self) -> None: Next, with the provided credentials, the final connection setup will be done. """ + if self.cfg["connection_method"] != "aws": + raise RuntimeError( + "configs `connection_method` must be 'aws' to " + "set up Amazon Web Services S3 Bucket connection." + ) + self._start_log( "setup-aws-connection-to-central-server", local_vars=locals(), @@ -918,6 +950,10 @@ def setup_aws_connection(self) -> None: self._setup_rclone_aws_config(aws_secret_access_key, log=True) + if not self.cfg.rclone.rclone_file_is_encrypted(): + if self._ask_user_rclone_encryption(): + self._try_encrypt_rclone_config() + rclone.check_successful_connection_and_raise_error_on_fail(self.cfg) aws.raise_if_bucket_absent(self.cfg) @@ -925,6 +961,72 @@ def setup_aws_connection(self) -> None: ds_logger.close_log_filehandler() + # ------------------------------------------------------------------------- + # Rclone config encryption + # ------------------------------------------------------------------------- + + def _ask_user_rclone_encryption(self) -> bool: + """Get user input to determine if they want to encrypt the rclone config.""" + input_ = utils.get_user_input( + f"{rclone_encryption.get_explanation_message(self.cfg)}\n" + f"Press 'y' to encrypt the Rclone config or leave blank to skip." + ) + + return input_ == "y" + + def _try_encrypt_rclone_config( + self, + ) -> None: + """Try to encrypt the rclone config file. + + If it fails, error and let the user know the config file is unencrypted. + """ + try: + self.encrypt_rclone_config() + except Exception as e: + config_path = ( + self.cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + utils.log_and_raise_error( + f"{str(e)}\n" + f"Config encryption failed.\n" + f"Use `encrypt_rclone_config()` to attempt to encrypt the file again " + f"(see full error message above).\n" + f"IMPORTANT: The config at {config_path} is not currently encrypted.\n", + RuntimeError, + ) + + utils.log_and_message( + f"Rclone config file for the central connection " + f"{self.cfg['connection_method']} was successfully encrypted." + ) + + def encrypt_rclone_config(self) -> None: + """Encrypt the rclone config file for the central connection.""" + if self.cfg.rclone.rclone_file_is_encrypted(): + raise RuntimeError( + "This config file is already encrypted. " + "First, use `remove_rclone_encryption` to remove it." + ) + + rclone_encryption.run_rclone_config_encrypt(self.cfg) + + self.cfg.rclone.set_rclone_config_encryption_state(True) + + def remove_rclone_encryption(self) -> None: + """Unencrypt the rclone config file for the central connection.""" + if not self.cfg.rclone.rclone_file_is_encrypted(): + raise RuntimeError( + f"The config for the current connection method: " + f"{self.cfg['connection_method']} " + f"is not encrypted. Cannot unencrypt." + ) + + rclone_encryption.remove_rclone_encryption(self.cfg) + + self.cfg.rclone.set_rclone_config_encryption_state(False) + # ------------------------------------------------------------------------- # Configs # ------------------------------------------------------------------------- @@ -943,7 +1045,7 @@ def make_config_file( ) -> None: """Initialize the configurations for datashuttle on the local machine. - Once initialised, these settings will be used each + Once initialized, these settings will be used each time the datashuttle is opened. These settings are stored in a config file on the @@ -1120,6 +1222,11 @@ def get_config_path(self) -> Path: """Return the full path to the DataShuttle config file.""" return self._config_path + @check_configs_set + def get_rclone_central_config_path(self) -> Path: + """Get the path to the Rclone config for the current `connection_method`.""" + return rclone.get_rclone_config_filepath(self.cfg) + @check_configs_set def get_configs(self) -> Configs: """Return the datashuttle configs.""" @@ -1572,14 +1679,15 @@ def _setup_rclone_central_ssh_config( ) -> None: rclone.setup_rclone_config_for_ssh( self.cfg, - self.cfg.get_rclone_config_name("ssh"), + self.cfg.rclone.get_rclone_config_name("ssh"), private_key_str, log=log, ) def _setup_rclone_central_local_filesystem_config(self) -> None: rclone.setup_rclone_config_for_local_filesystem( - self.cfg.get_rclone_config_name("local_filesystem"), + self.cfg, + self.cfg.rclone.get_rclone_config_name("local_filesystem"), ) def _setup_rclone_gdrive_config( @@ -1589,7 +1697,7 @@ def _setup_rclone_gdrive_config( ) -> subprocess.Popen: return rclone.setup_rclone_config_for_gdrive( self.cfg, - self.cfg.get_rclone_config_name("gdrive"), + self.cfg.rclone.get_rclone_config_name("gdrive"), gdrive_client_secret, config_token, ) @@ -1599,7 +1707,7 @@ def _setup_rclone_aws_config( ) -> None: rclone.setup_rclone_config_for_aws( self.cfg, - self.cfg.get_rclone_config_name("aws"), + self.cfg.rclone.get_rclone_config_name("aws"), aws_secret_access_key, log=log, ) diff --git a/datashuttle/datashuttle_functions.py b/datashuttle/datashuttle_functions.py index 2d618bb30..e4b57e015 100644 --- a/datashuttle/datashuttle_functions.py +++ b/datashuttle/datashuttle_functions.py @@ -13,9 +13,7 @@ Optional, ) -from datashuttle.configs import ( - canonical_configs, -) +from datashuttle.configs import canonical_configs, canonical_folders from datashuttle.configs.config_class import Configs from datashuttle.utils import ( validation, @@ -85,6 +83,8 @@ def validate_project_from_path( # Create some mock configs for the validation call, # then for each top-level folder, run the validation + # Note `get_internal_datashuttle_from_path` generates a placeholder + # folder path but this is not actually created. placeholder_configs = { key: None for key in canonical_configs.get_canonical_configs().keys() } @@ -92,7 +92,7 @@ def validate_project_from_path( cfg = Configs( project_name=project_path.name, - file_path=None, # type: ignore + file_path=canonical_folders.get_internal_datashuttle_from_path(), input_dict=placeholder_configs, ) diff --git a/datashuttle/tui/interface.py b/datashuttle/tui/interface.py index 00f6e0697..bff4d1b42 100644 --- a/datashuttle/tui/interface.py +++ b/datashuttle/tui/interface.py @@ -13,7 +13,7 @@ from datashuttle import DataShuttle from datashuttle.configs import load_configs -from datashuttle.utils import aws, gdrive, rclone, ssh, utils +from datashuttle.utils import aws, rclone, ssh, utils class Interface: @@ -568,10 +568,10 @@ def get_rclone_message_for_gdrive_without_browser( ) -> InterfaceOutput: """Get the rclone message for Google Drive setup without a browser.""" try: - output = gdrive.preliminary_for_setup_without_browser( + output = rclone.preliminary_setup_gdrive_config_without_browser( self.project.cfg, gdrive_client_secret, - self.project.cfg.get_rclone_config_name("gdrive"), + self.project.cfg.rclone.get_rclone_config_name("gdrive"), log=False, ) return True, output @@ -597,7 +597,11 @@ def await_successful_gdrive_connection_setup_raise_on_fail( The `self.gdrive_setup_process_killed` flag helps prevent raising errors in case the process was killed manually. """ - stdout, stderr = process.communicate() + stdout, stderr = ( + rclone.await_call_rclone_with_popen_for_central_connection_raise_on_fail( + self.project.cfg, process, log=False + ) + ) if not self.gdrive_setup_process_killed: if process.returncode != 0: @@ -625,3 +629,14 @@ def setup_aws_connection( return True, None except BaseException as e: return False, str(e) + + # Set RClone Encryption + # ------------------------------------------------------------------------------------ + + def try_setup_rclone_encryption(self): + """Try and encrypt the RClone config file for the current `connection_method`.""" + try: + self.project._try_encrypt_rclone_config() + return True, None + except BaseException as e: + return False, str(e) diff --git a/datashuttle/tui/screens/setup_aws.py b/datashuttle/tui/screens/setup_aws.py index 8527e1dd9..fd705aa7d 100644 --- a/datashuttle/tui/screens/setup_aws.py +++ b/datashuttle/tui/screens/setup_aws.py @@ -11,6 +11,8 @@ from textual.screen import ModalScreen from textual.widgets import Button, Input, Static +from datashuttle.utils import rclone_encryption + class SetupAwsScreen(ModalScreen): """Dialog window that sets up connection to an Amazon Web Service S3 bucket. @@ -26,7 +28,7 @@ def __init__(self, interface: Interface) -> None: super(SetupAwsScreen, self).__init__() self.interface = interface - self.stage = 0 + self.stage = "init" def compose(self) -> ComposeResult: """Set widgets on the SetupAwsScreen.""" @@ -52,18 +54,35 @@ def on_mount(self) -> None: self.query_one("#setup_aws_secret_access_key_input").visible = False def on_button_pressed(self, event: Button.Pressed) -> None: - """Handle button press on the screen.""" + """Handle button press on the screen. + + The `setup_aws_ok_button` is used for all 'positive' events ('Yes, Ok') + and 'setup_aws_cancel_button' is used for 'negative' events ('No', 'Cancel'). + The appropriate action to take on the button press is determined by the + current stage. + + """ if event.button.id == "setup_aws_cancel_button": - self.dismiss() + if self.stage == "ask_rclone_encryption": + message = "AWS Connection Successful!" + self.query_one("#setup_aws_messagebox_message").update(message) + self.query_one("#setup_aws_ok_button").label = "Finish" + self.query_one("#setup_aws_cancel_button").remove() + self.stage = "finished" + else: + self.dismiss() - if event.button.id == "setup_aws_ok_button": - if self.stage == 0: + elif event.button.id == "setup_aws_ok_button": + if self.stage == "init": self.prompt_user_for_aws_secret_access_key() - elif self.stage == 1: + elif self.stage == "use_secret_access_key": self.use_secret_access_key_to_setup_aws_connection() - elif self.stage == 2: + elif self.stage == "ask_rclone_encryption": + self.set_rclone_encryption() + + elif self.stage == "finished": self.dismiss() def prompt_user_for_aws_secret_access_key(self) -> None: @@ -73,10 +92,15 @@ def prompt_user_for_aws_secret_access_key(self) -> None: self.query_one("#setup_aws_messagebox_message").update(message) self.query_one("#setup_aws_secret_access_key_input").visible = True - self.stage += 1 + self.query_one("#setup_aws_ok_button") + + self.stage = "use_secret_access_key" def use_secret_access_key_to_setup_aws_connection(self) -> None: - """Set up the AWS connection and inform user of success or failure.""" + """Set up the AWS connection and failure. + + If success, move onto the rclone_encryption screen. + """ secret_access_key = self.query_one( "#setup_aws_secret_access_key_input" ).value @@ -86,11 +110,13 @@ def use_secret_access_key_to_setup_aws_connection(self) -> None: ) if success: - message = "AWS Connection Successful!" - self.query_one( - "#setup_aws_secret_access_key_input" - ).visible = False + message = f"{rclone_encryption.get_explanation_message(self.interface.project.cfg)}" + self.query_one("#setup_aws_messagebox_message").update(message) + self.query_one("#setup_aws_secret_access_key_input").remove() + self.query_one("#setup_aws_ok_button").label = "Yes" + self.query_one("#setup_aws_cancel_button").label = "No" + self.stage = "ask_rclone_encryption" else: message = ( f"AWS setup failed. Please check your configs and secret access key" @@ -100,7 +126,21 @@ def use_secret_access_key_to_setup_aws_connection(self) -> None: "#setup_aws_secret_access_key_input" ).disabled = True - self.query_one("#setup_aws_ok_button").label = "Finish" - self.query_one("#setup_aws_messagebox_message").update(message) - self.query_one("#setup_aws_cancel_button").disabled = True - self.stage += 1 + self.query_one("#setup_aws_ok_button").label = "Retry" + self.query_one("#setup_aws_messagebox_message").update(message) + + def set_rclone_encryption(self): + """Try and encrypt the Rclone config file and inform the user of success / failure.""" + success, output = self.interface.try_setup_rclone_encryption() + + if success: + message = "The Rclone config file was successfully encrypted. Setup complete!" + self.query_one("#setup_aws_messagebox_message").update(message) + self.query_one("#setup_aws_ok_button").label = "Finish" + self.query_one("#setup_aws_cancel_button").remove() + self.stage = "finished" + else: + message = ( + f"The rclone_encryption set up failed. Exception: {output}" + ) + self.query_one("#setup_aws_messagebox_message").update(message) diff --git a/datashuttle/tui/screens/setup_gdrive.py b/datashuttle/tui/screens/setup_gdrive.py index 509ac1a52..a70e816b2 100644 --- a/datashuttle/tui/screens/setup_gdrive.py +++ b/datashuttle/tui/screens/setup_gdrive.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import traceback from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: @@ -12,6 +13,7 @@ from textual import work from textual.containers import Container, Horizontal, Vertical +from textual.css.query import NoMatches from textual.screen import ModalScreen from textual.widgets import ( Button, @@ -19,13 +21,15 @@ Static, ) +from datashuttle.utils import rclone_encryption + class SetupGdriveScreen(ModalScreen): """Dialog window that sets up a Google Drive connection. If the config contains a "gdrive_client_id", the user is prompted to enter a client secret. If the user has access to a browser, a Google Drive - authentication page will open. Otherwise, the user is asked to run an rclone command + authentication page will open. Otherwise, the user is asked to run an Rclone command and input a config token. """ @@ -34,7 +38,7 @@ def __init__(self, interface: Interface) -> None: super(SetupGdriveScreen, self).__init__() self.interface = interface - self.stage: int = 0 + self.no_browser_stage: None | str = "show_command_to_generate_code" self.setup_worker: Worker | None = None self.is_browser_available: bool = True self.gdrive_client_secret: Optional[str] = None @@ -44,7 +48,9 @@ def __init__(self, interface: Interface) -> None: id="setup_gdrive_generic_input_box", placeholder="Enter value here", ) - self.enter_button = Button("Enter", id="setup_gdrive_enter_button") + self.enter_button = Button( + "Enter", id="setup_gdrive_no_browser_enter_button" + ) def compose(self) -> ComposeResult: """Add widgets to the SetupGdriveScreen.""" @@ -69,20 +75,24 @@ def on_button_pressed(self, event: Button.Pressed) -> None: This dialog window operates using 6 buttons: - 1) "ok" button : Starts the connection setup process. + 1) `setup_gdrive_ok_button` : Starts the connection setup process. - 2) "yes" button : A "yes" answer to the availability of browser question. On click, - if "gdrive_client_id" is present in configs, the user is asked for client secret + 2) `setup_gdrive_has_browser_yes_button` : A "yes" answer to the availability of browser question. + On click, if "gdrive_client_id" is present in configs, the user is asked for client secret and proceeds to a browser authentication. - 3) "no" button : A "no" answer to the availability of browser question. On click, + 3) `setup_gdrive_no_button` : A "no" answer to the availability of browser question. On click, prompts the user to enter a config token by running an rclone command. - 4) "enter" button : To enter the client secret or config token. + 4) `setup_gdrive_no_browser_enter_button` : To enter the client secret or config token. + + 5) `setup_gdrive_set_encryption_yes_button` : To set a password on the RClone config file - 5) "finish" button : To finish the setup. + 6) `setup_gdrive_set_encryption_no_button` : To skip setting a password on the RClone config file - 6) "cancel" button : To cancel the setup at any step before completion. + 7) `setup_gdrive_finish_button` button : To finish the setup. + + 8) "`setup_gdrive_cancel_button` : To cancel the setup at any step before completion. """ if ( event.button.id == "setup_gdrive_cancel_button" @@ -90,7 +100,7 @@ def on_button_pressed(self, event: Button.Pressed) -> None: ): # see setup_gdrive_connection_and_update_ui() if self.setup_worker and self.setup_worker.is_running: - self.setup_worker.cancel() # fix + self.setup_worker.cancel() self.interface.terminate_gdrive_setup() self.dismiss() @@ -102,28 +112,30 @@ def on_button_pressed(self, event: Button.Pressed) -> None: else: self.ask_user_for_browser() - elif event.button.id == "setup_gdrive_yes_button": - self.remove_yes_no_buttons() + elif event.button.id == "setup_gdrive_has_browser_yes_button": + self.query_one("#setup_gdrive_has_browser_yes_button").remove() + self.query_one("#setup_gdrive_no_button").remove() self.open_browser_and_setup_gdrive_connection( self.gdrive_client_secret ) elif event.button.id == "setup_gdrive_no_button": self.is_browser_available = False - self.remove_yes_no_buttons() + self.query_one("#setup_gdrive_has_browser_yes_button").remove() + self.query_one("#setup_gdrive_no_button").remove() self.prompt_user_for_config_token() - elif event.button.id == "setup_gdrive_enter_button": + elif event.button.id == "setup_gdrive_no_browser_enter_button": if ( self.interface.project.cfg["gdrive_client_id"] - and self.stage == 0 + and self.no_browser_stage == "show_command_to_generate_code" ): self.gdrive_client_secret = ( self.input_box.value.strip() if self.input_box.value.strip() else None ) - self.stage += 1 + self.no_browser_stage = "setup_with_code" self.ask_user_for_browser() else: config_token = ( @@ -135,6 +147,15 @@ def on_button_pressed(self, event: Button.Pressed) -> None: self.gdrive_client_secret, config_token ) + elif event.button.id == "setup_gdrive_set_encryption_yes_button": + self.set_rclone_encryption() + + elif event.button.id == "setup_gdrive_set_encryption_no_button": + self.set_finish_page("Setup complete!") + + # Setup the connection (with or without browser) + # ---------------------------------------------------------------------------------- + def ask_user_for_browser(self) -> None: """Ask the user if their machine has access to a browser.""" message = ( @@ -150,7 +171,7 @@ def ask_user_for_browser(self) -> None: self.input_box.visible = False # Mount the Yes and No buttons - yes_button = Button("Yes", id="setup_gdrive_yes_button") + yes_button = Button("Yes", id="setup_gdrive_has_browser_yes_button") no_button = Button("No", id="setup_gdrive_no_button") self.query_one("#setup_gdrive_buttons_horizontal").mount( @@ -185,10 +206,14 @@ def open_browser_and_setup_gdrive_connection( The connection setup is asynchronous so that the user is able to cancel the setup if anything goes wrong without quitting datashuttle altogether. """ - message = "Please authenticate through browser (it should open automatically)." + message = ( + "Please authenticate through your browser (it should open automatically).\n\n" + "It may take a moment for the connection to register after you confirm in the browser.\n\n" + ) + self.update_message_box_message(message) - asyncio.create_task( + self._task = asyncio.create_task( self.setup_gdrive_connection_and_update_ui( gdrive_client_secret=gdrive_client_secret ), @@ -211,7 +236,9 @@ def prompt_user_for_config_token(self) -> None: message + "\nPress shift+click to copy." ) - self.enter_button = Button("Enter", id="setup_gdrive_enter_button") + self.enter_button = Button( + "Enter", id="setup_gdrive_no_browser_enter_button" + ) self.query_one("#setup_gdrive_buttons_horizontal").mount( self.enter_button, before="#setup_gdrive_cancel_button" ) @@ -221,10 +248,10 @@ def setup_gdrive_connection_using_config_token( self, gdrive_client_secret: str | None, config_token: str | None ) -> None: """Set up the Google Drive connection using rclone config token.""" - message = "Setting up connection." + message = "Setting up connection..." self.update_message_box_message(message) - asyncio.create_task( + self._task = asyncio.create_task( self.setup_gdrive_connection_and_update_ui( gdrive_client_secret=gdrive_client_secret, config_token=config_token, @@ -249,26 +276,46 @@ async def setup_gdrive_connection_and_update_ui( The rclone process object is stored in the `Interface` class to handle closing the process as the thread does not kill the process itself upon cancellation and the process is awaited ensure that the process finishes and any raised errors are caught. - Therefore, the worker thread thread and the rclone process are separately cancelled + Therefore, the worker thread and the rclone process are separately cancelled when the user presses the cancel button. (see `on_button_pressed`) """ self.input_box.disabled = True self.enter_button.disabled = True - worker = self.setup_gdrive_connection( - gdrive_client_secret, config_token - ) - self.setup_worker = worker - if worker.is_running: - await worker.wait() + try: + worker = self.setup_gdrive_connection( + gdrive_client_secret, config_token + ) + self.setup_worker = worker + if worker.is_running: + await worker.wait() + + success, output = worker.result + if success: + # This function is called from different screens that + # contain different widgets. Therefore, remove all possible + # widgets that may / may not be present on the previous screen. + self.show_encryption_screen() + for id in [ + "#setup_gdrive_cancel_button", + "#setup_gdrive_generic_input_box", + "#setup_gdrive_no_browser_enter_button", + ]: + try: + widget = self.query_one(id) + await widget.remove() + except NoMatches: + pass + else: + self.input_box.disabled = False + self.enter_button.disabled = False + self.display_failed(output) - success, output = worker.result - if success: - self.show_finish_screen() - else: - self.input_box.disabled = False - self.enter_button.disabled = False - self.display_failed(output) + except Exception as exc: + tb = "".join( + traceback.format_exception(type(exc), exc, exc.__traceback__) + ) + self.display_failed(tb) @work(exclusive=True, thread=True) def setup_gdrive_connection( @@ -288,22 +335,48 @@ def setup_gdrive_connection( ) return success, output + # Set encryption on RClone config # ---------------------------------------------------------------------------------- - # UI Update Methods - # ---------------------------------------------------------------------------------- - def show_finish_screen(self) -> None: + def show_encryption_screen(self): + """Show the screen asking the user whether to encrypt the Rclone password.""" + message = f"{rclone_encryption.get_explanation_message(self.interface.project.cfg)}" + self.update_message_box_message(message) + + yes_button = Button("Yes", id="setup_gdrive_set_encryption_yes_button") + no_button = Button("No", id="setup_gdrive_set_encryption_no_button") + + self.query_one("#setup_gdrive_buttons_horizontal").mount( + yes_button, no_button + ) + + def set_rclone_encryption(self): + """Try and encrypt the Rclone config file and inform the user of success / failure.""" + success, output = self.interface.try_setup_rclone_encryption() + + if success: + self.set_finish_page( + "The encryption was successful. Setup complete!" + ) + else: + message = f"The password set up failed. Exception: {output}" + self.update_message_box_message(message) + + def set_finish_page(self, message) -> None: """Show the final screen after successful set up.""" - message = "Setup Complete!" - self.query_one("#setup_gdrive_cancel_button").remove() + self.query_one("#setup_gdrive_set_encryption_yes_button").remove() + self.query_one("#setup_gdrive_set_encryption_no_button").remove() self.update_message_box_message(message) self.query_one("#setup_gdrive_buttons_horizontal").mount( Button("Finish", id="setup_gdrive_finish_button") ) + # UI Update Methods + # ---------------------------------------------------------------------------------- + def display_failed(self, output) -> None: - """Update the message box indicating the set up failed.""" + """Update the message box indicating the set-up failed.""" message = ( f"Google Drive setup failed. Please check your credentials" f"\n\n Traceback: {output}" @@ -328,8 +401,3 @@ def mount_input_box_before_buttons( ) self.input_box.visible = True self.input_box.value = "" - - def remove_yes_no_buttons(self) -> None: - """Remove yes and no buttons.""" - self.query_one("#setup_gdrive_yes_button").remove() - self.query_one("#setup_gdrive_no_button").remove() diff --git a/datashuttle/tui/screens/setup_ssh.py b/datashuttle/tui/screens/setup_ssh.py index 3ab5025e8..3073cc3c3 100644 --- a/datashuttle/tui/screens/setup_ssh.py +++ b/datashuttle/tui/screens/setup_ssh.py @@ -16,12 +16,19 @@ Static, ) +from datashuttle.utils import rclone_encryption + class SetupSshScreen(ModalScreen): """Dialog window that sets up an SSH connection. - This asks to confirm the central hostkey, and takes password to setup - SSH key pair. Under the hood uses `project.setup_ssh_connection()`. + This asks to confirm the central hostkey, and takes password to set up + SSH key pair as well as encrypting the RClone config. + + Due to how textual works, it is simples for each button press to + trigger an action (e.g. set up host key) and then set up the widgets + for the next screen. Then, when the next button is pressed, we can + continue in this way of managing the screens. """ def __init__(self, interface: Interface) -> None: @@ -29,7 +36,7 @@ def __init__(self, interface: Interface) -> None: super(SetupSshScreen, self).__init__() self.interface = interface - self.stage = 0 + self.stage = "init" self.failed_password_attempts = 1 self.key: paramiko.RSAKey @@ -67,19 +74,28 @@ def on_button_pressed(self, event: Button.pressed) -> None: input, multiple attempts are allowed. """ if event.button.id == "setup_ssh_cancel_button": - self.dismiss() + if self.stage == "set_up_encryption": + self.show_connection_successful_message() + else: + self.dismiss() if event.button.id == "setup_ssh_ok_button": - if self.stage == 0: + if self.stage == "init": self.ask_user_to_accept_hostkeys() - elif self.stage == 1: + elif self.stage == "save_hostkeys": self.save_hostkeys_and_prompt_password_input() - elif self.stage == 2: + elif self.stage == "ask_for_encryption": self.use_password_to_setup_ssh_key_pairs() - elif self.stage == 3: + elif self.stage == "set_up_encryption": + self.try_setup_rclone_encryption() + + elif self.stage == "show_success_message": + self.show_connection_successful_message() + + elif self.stage == "finished": self.dismiss() def ask_user_to_accept_hostkeys(self) -> None: @@ -112,7 +128,7 @@ def ask_user_to_accept_hostkeys(self) -> None: self.query_one("#setup_ssh_ok_button").disabled = True self.query_one("#messagebox_message_label").update(message) - self.stage += 1 + self.stage = "save_hostkeys" def save_hostkeys_and_prompt_password_input(self) -> None: """Get the user password for the central server. @@ -137,13 +153,14 @@ def save_hostkeys_and_prompt_password_input(self) -> None: self.query_one("#setup_ssh_ok_button").disabled = True self.query_one("#messagebox_message_label").update(message) - self.stage += 1 + self.stage = "ask_for_encryption" def use_password_to_setup_ssh_key_pairs(self) -> None: - """Get the user password for the central server. + """Set up the SSH key pair using the user-supplied password to the central server. - If correct, SSH key pair is set up and 'OK' button changed - to 'Finish'. Otherwise, continue allowing failed password attempts. + Next, set up the request asking if they would like to set + a (separate) password on their RClone config, using the + system credential manager. """ password = self.query_one("#setup_ssh_password_input").value @@ -152,10 +169,16 @@ def use_password_to_setup_ssh_key_pairs(self) -> None: ) if success: - message = "Connection successful! SSH key saved to the RClone config file." - self.query_one("#setup_ssh_ok_button").label = "Finish" - self.query_one("#setup_ssh_cancel_button").disabled = True - self.stage += 1 + message = ( + f"Connection set up successfully.\n" + f"{rclone_encryption.get_explanation_message(self.cfg)}" + ) + self.query_one("#setup_ssh_ok_button").label = "Yes" + self.query_one("#setup_ssh_cancel_button").label = "No" + self.query_one("#setup_ssh_password_input").visible = False + self.stage = ( + "set_up_encryption" # Go to rclone encryption set up screen + ) else: message = ( @@ -166,3 +189,36 @@ def use_password_to_setup_ssh_key_pairs(self) -> None: self.failed_password_attempts += 1 self.query_one("#messagebox_message_label").update(message) + + def try_setup_rclone_encryption(self): + """Try and encrypt the RClone config using the system credential manager. + + If successful, the next screen confirms success. + """ + success, output = self.interface.try_setup_rclone_encryption() + + if success: + message = "Rclone config file was successfully encrypted." + self.query_one("#messagebox_message_label").update(message) + self.query_one("#setup_ssh_ok_button").label = "Ok" + self.query_one("#setup_ssh_cancel_button").remove() + else: + message = f"Encryption failed. Exception: {output}" + self.query_one("#messagebox_message_label").update(message) + + self.stage = "show_success_message" + + def show_connection_successful_message(self): + """Show the final screen indicating the connection was successfully set up.""" + self.query_one("#setup_ssh_ok_button").label = "Finish" + + # Depending on what was the previous screen, `setup_ssh_cancel_button` + # may or may not be displayed. + try: + self.query_one("#setup_ssh_cancel_button").remove() + except BaseException: + pass + + message = "Connection was set up successfully. SSH key saved to the RClone config file." + self.query_one("#messagebox_message_label").update(message) + self.stage = "finished" diff --git a/datashuttle/tui/tabs/transfer.py b/datashuttle/tui/tabs/transfer.py index a6362ad85..07d37c734 100644 --- a/datashuttle/tui/tabs/transfer.py +++ b/datashuttle/tui/tabs/transfer.py @@ -16,6 +16,7 @@ from rich.text import Text from textual import work from textual.containers import Container, Horizontal, Vertical +from textual.css.query import NoMatches from textual.widgets import ( Button, Checkbox, @@ -217,8 +218,6 @@ def on_mount(self) -> None: "#transfer_tab_overwrite_select", "#transfer_tab_dry_run_checkbox", ]: - from textual.css.query import NoMatches - try: # if checkbox is removed by user, hard to predict, skip. self.query_one(id).tooltip = get_tooltip(id) diff --git a/datashuttle/utils/aws.py b/datashuttle/utils/aws.py index 519a2b6db..ba08183cf 100644 --- a/datashuttle/utils/aws.py +++ b/datashuttle/utils/aws.py @@ -10,8 +10,10 @@ def check_if_aws_bucket_exists(cfg: Configs) -> bool: The first part of`cfg["central_path"] should be an existing bucket name. """ - output = rclone.call_rclone( - f"lsjson {cfg.get_rclone_config_name()}:", pipe_std=True + output = rclone.call_rclone_for_central_connection( + cfg, + f"lsjson {cfg.rclone.get_rclone_config_name()}: {rclone.get_config_arg(cfg)}", + pipe_std=True, ) files_and_folders = json.loads(output.stdout) diff --git a/datashuttle/utils/data_transfer.py b/datashuttle/utils/data_transfer.py index c21d39bda..cd294670e 100644 --- a/datashuttle/utils/data_transfer.py +++ b/datashuttle/utils/data_transfer.py @@ -99,7 +99,7 @@ def __init__( self.__upload_or_download, self.__top_level_folder, include_list, - cfg.make_rclone_transfer_options( + rclone.make_rclone_transfer_options( overwrite_existing_files, dry_run ), ) diff --git a/datashuttle/utils/folders.py b/datashuttle/utils/folders.py index bf3e6c973..dcb903919 100644 --- a/datashuttle/utils/folders.py +++ b/datashuttle/utils/folders.py @@ -695,10 +695,13 @@ def search_central_via_connection( If `True`, return the full filepath, otherwise return only the folder/file name. """ - rclone_config_name = cfg.get_rclone_config_name(cfg["connection_method"]) + rclone_config_name = cfg.rclone.get_rclone_config_name( + cfg["connection_method"] + ) - output = rclone.call_rclone( - f'lsjson {rclone_config_name}:"{search_path.as_posix()}"', + output = rclone.call_rclone_for_central_connection( + cfg, + f'lsjson {rclone_config_name}:"{search_path.as_posix()}" {rclone.get_config_arg(cfg)}', pipe_std=True, ) diff --git a/datashuttle/utils/gdrive.py b/datashuttle/utils/gdrive.py index dcb41b401..499817c46 100644 --- a/datashuttle/utils/gdrive.py +++ b/datashuttle/utils/gdrive.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -8,71 +7,6 @@ from datashuttle.utils import rclone, utils -# ----------------------------------------------------------------------------- -# Helper Functions -# ----------------------------------------------------------------------------- - -# These functions are used by both API and TUI for setting up connections to google drive. - - -def preliminary_for_setup_without_browser( - cfg: Configs, - gdrive_client_secret: str | None, - rclone_config_name: str, - log: bool = True, -) -> str: - """Prepare rclone configuration for Google Drive without using a browser. - - This function prepares the rclone configuration for Google Drive without using a browser. - - The `config_is_local=false` flag tells rclone that the configuration process is being run - on a headless machine which does not have access to a browser. - - The `--non-interactive` flag is used to control rclone's behaviour while running it through - external applications. An `rclone config create` command would assume default values for config - variables in an interactive mode. If the `--non-interactive` flag is provided and rclone needs - the user to input some detail, a JSON blob will be returned with the question in it. For this - particular setup, rclone outputs a command for user to run on a machine with a browser. - - This function runs `rclone config create` with the user credentials and returns the rclone's output info. - This output info is presented to the user while asking for a `config_token`. - - Next, the user will run rclone's given command, authenticate with google drive and input the - config token given by rclone for datashuttle to proceed with the setup. - """ - client_id_key_value = ( - f"client_id {cfg['gdrive_client_id']} " - if cfg["gdrive_client_id"] - else " " - ) - client_secret_key_value = ( - f"client_secret {gdrive_client_secret} " - if gdrive_client_secret - else "" - ) - output = rclone.call_rclone( - f"config create " - f"{rclone_config_name} " - f"drive " - f"{client_id_key_value}" - f"{client_secret_key_value}" - f"scope drive " - f"root_folder_id {cfg['gdrive_root_folder_id']} " - f"config_is_local=false " - f"--non-interactive", - pipe_std=True, - ) - - # Extracting rclone's message from the json - output_json = json.loads(output.stdout) - message = output_json["Option"]["Help"] - - if log: - utils.log(message) - - return message - - # ----------------------------------------------------------------------------- # Python API # ----------------------------------------------------------------------------- @@ -108,7 +42,7 @@ def prompt_and_get_config_token( with google drive and input the `config_token` generated by rclone. The `config_token` is then used to complete rclone's config setup for google drive. """ - message = preliminary_for_setup_without_browser( + message = rclone.preliminary_setup_gdrive_config_without_browser( cfg, gdrive_client_secret, rclone_config_name, log=log ) input_ = utils.get_user_input( diff --git a/datashuttle/utils/rclone.py b/datashuttle/utils/rclone.py index fe1909119..ad8af6f90 100644 --- a/datashuttle/utils/rclone.py +++ b/datashuttle/utils/rclone.py @@ -1,20 +1,26 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, List, Literal, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional if TYPE_CHECKING: + from pathlib import Path + from subprocess import CompletedProcess + from datashuttle.configs.config_class import Configs - from datashuttle.utils.custom_types import TopLevelFolder + from datashuttle.utils.custom_types import ( + OverwriteExistingFiles, + TopLevelFolder, + ) +import json import os import platform import shlex import subprocess import tempfile -from subprocess import CompletedProcess from datashuttle.configs import canonical_configs -from datashuttle.utils import utils +from datashuttle.utils import rclone_encryption, utils def call_rclone(command: str, pipe_std: bool = False) -> CompletedProcess: @@ -47,7 +53,24 @@ def call_rclone(command: str, pipe_std: bool = False) -> CompletedProcess: return output -def call_rclone_through_script(command: str) -> CompletedProcess: +def call_rclone_for_central_connection( + cfg, command: str, pipe_std: bool = False +) -> CompletedProcess: + """Call RClone when the config file may need to be unencrypted. + + This is a convenience function to call RClone in places where + the config file may need to be unencrypted. This is for connecting + to the central storage through aws, ssh or gdrive. It wraps the + function call in a set-up / teardown of the config password. + """ + return run_function_that_requires_encrypted_rclone_config_access( + cfg, lambda: call_rclone(command, pipe_std) + ) + + +def call_rclone_through_script_for_central_connection( + cfg: Configs, command: str +) -> CompletedProcess: """Call rclone through a script. This is to avoid limits on command-line calls (in particular on Windows). @@ -55,6 +78,9 @@ def call_rclone_through_script(command: str) -> CompletedProcess: Parameters ---------- + cfg + Datashuttle Configs class. + ---------- command Full command to run with RClone. @@ -83,13 +109,22 @@ def call_rclone_through_script(command: str) -> CompletedProcess: if system != "Windows": os.chmod(tmp_script_path, 0o700) - output = subprocess.run( + lambda_func = lambda: subprocess.run( [tmp_script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, ) + if rclone_encryption.connection_method_requires_encryption( + cfg["connection_method"] + ): + output = run_function_that_requires_encrypted_rclone_config_access( + cfg, lambda_func + ) + else: + output = lambda_func() + if output.returncode != 0: prompt_rclone_download_if_does_not_exist() @@ -99,7 +134,9 @@ def call_rclone_through_script(command: str) -> CompletedProcess: return output -def call_rclone_with_popen(command: str) -> subprocess.Popen: +def call_rclone_with_popen( + command: str, +) -> subprocess.Popen: """Call rclone using `subprocess.Popen` for control over process termination. It is not possible to kill a process while running it using `subprocess.run`. @@ -110,34 +147,76 @@ def call_rclone_with_popen(command: str) -> subprocess.Popen: """ command = "rclone " + command process = subprocess.Popen( - shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE + shlex.split(command), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, ) return process -def await_call_rclone_with_popen_raise_on_fail( - process: subprocess.Popen, log: bool = True +def await_call_rclone_with_popen_for_central_connection_raise_on_fail( + cfg: Configs, process: subprocess.Popen, log: bool = True ): """Await rclone the subprocess.Popen call. Calling `process.communicate()` waits for the process to complete and returns the stdout and stderr. """ - stdout, stderr = process.communicate() + lambda_func = lambda: process.communicate() + + stdout, stderr = run_function_that_requires_encrypted_rclone_config_access( + cfg, lambda_func, check_config_exists=False + ) if process.returncode != 0: utils.log_and_raise_error(stderr.decode("utf-8"), ConnectionError) if log: - log_rclone_config_output() + log_rclone_config_output(cfg) + + return stdout, stderr + + +def run_function_that_requires_encrypted_rclone_config_access( + cfg, lambda_func, check_config_exists: bool = True +) -> Any: + """Run command that requires possibly encrypted Rclone config file. + + The Rclone config file may be encrypted for aws, gdrive or ssh connections. + In this case we need to set an environment variable to tell Rclone how + to decrypt the config file (and remove the variable afterwards). + """ + rclone_config_filepath = ( + cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + if check_config_exists and not rclone_config_filepath.is_file(): + raise RuntimeError( + f"The way RClone configs are managed has changed since version v0.7.1\n" + f"Please set up the {cfg['connection_method']} connection again." + ) + + is_encrypted = cfg.rclone.rclone_file_is_encrypted() + + if is_encrypted: + rclone_encryption.set_credentials_as_password_command(cfg) + + try: + results = lambda_func() + finally: + if is_encrypted: + rclone_encryption.remove_credentials_as_password_command() + + return results # ----------------------------------------------------------------------------- -# Setup +# RClone Configs # ----------------------------------------------------------------------------- def setup_rclone_config_for_local_filesystem( + cfg: Configs, rclone_config_name: str, log: bool = True, ) -> None: @@ -157,9 +236,12 @@ def setup_rclone_config_for_local_filesystem( Parameters ---------- + cfg + datashuttle Configs class + rclone_config_name canonical config name, generated by - datashuttle.cfg.get_rclone_config_name() + datashuttle.cfg.rclone.get_rclone_config_name() log whether to log, if True logger must already be initialised. @@ -168,7 +250,7 @@ def setup_rclone_config_for_local_filesystem( call_rclone(f"config create {rclone_config_name} local", pipe_std=True) if log: - log_rclone_config_output() + log_rclone_config_output(cfg) def setup_rclone_config_for_ssh( @@ -190,7 +272,7 @@ def setup_rclone_config_for_ssh( rclone_config_name canonical config name, generated by - datashuttle.cfg.get_rclone_config_name() + datashuttle.cfg.rclone.get_rclone_config_name() private_key_str PEM encoded ssh private key to pass to RClone. @@ -201,6 +283,8 @@ def setup_rclone_config_for_ssh( """ key_escaped = private_key_str.replace("\n", "\\n") + cfg.rclone.delete_existing_rclone_config_file() + command = ( f"config create " f"{rclone_config_name} " @@ -208,13 +292,13 @@ def setup_rclone_config_for_ssh( f"host {cfg['central_host_id']} " f"user {cfg['central_host_username']} " f"port {canonical_configs.get_default_ssh_port()} " + f"{get_config_arg(cfg)} " f'-- key_pem "{key_escaped}"' ) - call_rclone(command, pipe_std=True) if log: - log_rclone_config_output() + log_rclone_config_output(cfg) def setup_rclone_config_for_gdrive( @@ -240,7 +324,7 @@ def setup_rclone_config_for_gdrive( rclone_config_name Canonical config name, generated by - datashuttle.cfg.get_rclone_config_name() + datashuttle.cfg.rclone.get_rclone_config_name() gdrive_client_secret Google Drive client secret, mandatory when using a Google Drive client. @@ -266,7 +350,9 @@ def setup_rclone_config_for_gdrive( else "" ) - process = call_rclone_with_popen( + cfg.rclone.delete_existing_rclone_config_file() + + command = ( f"config create " f"{rclone_config_name} " f"drive " @@ -274,12 +360,77 @@ def setup_rclone_config_for_gdrive( f"{client_secret_key_value}" f"scope drive " f"root_folder_id {cfg['gdrive_root_folder_id']} " - f"{extra_args}" + f"{extra_args} " + f"{get_config_arg(cfg)}" ) + process = call_rclone_with_popen(command) + return process +def preliminary_setup_gdrive_config_without_browser( + cfg: Configs, + gdrive_client_secret: str | None, + rclone_config_name: str, + log: bool = True, +) -> str: + """Prepare rclone configuration for Google Drive without using a browser. + + This function prepares the rclone configuration for Google Drive without using a browser. + + The `config_is_local=false` flag tells rclone that the configuration process is being run + on a headless machine which does not have access to a browser. + + The `--non-interactive` flag is used to control Rclone's behaviour while running it through + external applications. An `rclone config create` command would assume default values for config + variables in an interactive mode. If the `--non-interactive` flag is provided and rclone needs + the user to input some detail, a JSON blob will be returned with the question in it. For this + particular setup, rclone outputs a command for user to run on a machine with a browser. + + This function runs `rclone config create` with the user credentials and returns the rclone's output info. + This output info is presented to the user while asking for a `config_token`. + + Next, the user will run rclone's given command, authenticate with google drive and input the + config token given by rclone for datashuttle to proceed with the setup. + """ + client_id_key_value = ( + f"client_id {cfg['gdrive_client_id']} " + if cfg["gdrive_client_id"] + else " " + ) + client_secret_key_value = ( + f"client_secret {gdrive_client_secret} " + if gdrive_client_secret + else "" + ) + + cfg.rclone.delete_existing_rclone_config_file() + + output = call_rclone( + f"config create " + f"{get_config_arg(cfg)} " + f"{rclone_config_name} " + f"drive " + f"{client_id_key_value}" + f"{client_secret_key_value}" + f"scope drive " + f"root_folder_id {cfg['gdrive_root_folder_id']} " + f"config_is_local=false " + f"--non-interactive", + pipe_std=True, + ) + + # Extracting rclone's message from the json + output_json = json.loads(output.stdout) + message = output_json["Option"]["Help"] + + if log: + utils.log(message) + + return message + + def setup_rclone_config_for_aws( cfg: Configs, rclone_config_name: str, @@ -296,7 +447,7 @@ def setup_rclone_config_for_aws( rclone_config_name Canonical RClone config name, generated by - datashuttle.cfg.get_rclone_config_name() + datashuttle.cfg.rclone.get_rclone_config_name() aws_secret_access_key The aws secret access key provided by the user. @@ -315,6 +466,8 @@ def setup_rclone_config_for_aws( else f" location_constraint {aws_region}" ) + cfg.rclone.delete_existing_rclone_config_file() + output = call_rclone( "config create " f"{rclone_config_name} " @@ -322,7 +475,8 @@ def setup_rclone_config_for_aws( f"access_key_id {cfg['aws_access_key_id']} " f"secret_access_key {aws_secret_access_key} " f"region {aws_region}" - f"{location_constraint_key_value}", + f"{location_constraint_key_value} " + f"{get_config_arg(cfg)}", pipe_std=True, ) @@ -332,7 +486,21 @@ def setup_rclone_config_for_aws( ) if log: - log_rclone_config_output() + log_rclone_config_output(cfg) + + +def get_config_arg(cfg: Configs) -> str: + """Get the full argument to run Rclone commands with a specific config.""" + if rclone_encryption.connection_method_requires_encryption( + cfg["connection_method"] + ): + rclone_config_path = ( + cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + return f'--config "{rclone_config_path}"' + else: + return "" def check_successful_connection_and_raise_error_on_fail(cfg: Configs) -> None: @@ -351,16 +519,22 @@ def check_successful_connection_and_raise_error_on_fail(cfg: Configs) -> None: else: tempfile_path = (cfg["central_path"] / filename).as_posix() - output = call_rclone( - f"touch {cfg.get_rclone_config_name()}:{tempfile_path}", pipe_std=True + config_name = cfg.rclone.get_rclone_config_name() + + output = call_rclone_for_central_connection( + cfg, + f"touch {config_name}:{tempfile_path} {get_config_arg(cfg)}", + pipe_std=True, ) if output.returncode != 0: utils.log_and_raise_error( output.stderr.decode("utf-8"), ConnectionError ) - output = call_rclone( - f"delete {cfg.get_rclone_config_name()}:{tempfile_path}", pipe_std=True + output = call_rclone_for_central_connection( + cfg, + f"delete {cfg.rclone.get_rclone_config_name()}:{tempfile_path} {get_config_arg(cfg)}", + pipe_std=True, ) if output.returncode != 0: utils.log_and_raise_error( @@ -368,12 +542,25 @@ def check_successful_connection_and_raise_error_on_fail(cfg: Configs) -> None: ) -def log_rclone_config_output() -> None: +def get_rclone_config_filepath(cfg: Configs) -> Path: + """Get the path to the central Rclone config for the current `connection_method`.""" + if rclone_encryption.connection_method_requires_encryption( + cfg["connection_method"] + ): + config_filepath = ( + cfg.rclone.get_rclone_central_connection_config_filepath() + ) + else: + output = call_rclone("config file", pipe_std=True) + config_filepath = output.stdout.decode("utf-8") + + return config_filepath + + +def log_rclone_config_output(cfg: Configs) -> None: """Log the output from creating Rclone config.""" - output = call_rclone("config file", pipe_std=True) - utils.log( - f"Successfully created rclone config. {output.stdout.decode('utf-8')}" - ) + config_filepath = get_rclone_config_filepath(cfg) + utils.log(f"Successfully created rclone config. {config_filepath}") def prompt_rclone_download_if_does_not_exist() -> None: @@ -437,7 +624,7 @@ def transfer_data( rclone_options A list of options to pass to Rclone's copy function. - see `cfg.make_rclone_transfer_options()`. + see `make_rclone_transfer_options()`. Returns ------- @@ -458,22 +645,46 @@ def transfer_data( extra_arguments = handle_rclone_arguments(rclone_options, include_list) if upload_or_download == "upload": - output = call_rclone_through_script( + output = call_rclone_through_script_for_central_connection( + cfg, f"{rclone_args('copy')} " - f'"{local_filepath}" "{cfg.get_rclone_config_name()}:' - f'{central_filepath}" {extra_arguments}', + f'"{local_filepath}" "{cfg.rclone.get_rclone_config_name()}:' + f'{central_filepath}" {extra_arguments} {get_config_arg(cfg)}', ) elif upload_or_download == "download": - output = call_rclone_through_script( + output = call_rclone_through_script_for_central_connection( + cfg, f"{rclone_args('copy')} " - f'"{cfg.get_rclone_config_name()}:' - f'{central_filepath}" "{local_filepath}" {extra_arguments}', + f'"{cfg.rclone.get_rclone_config_name()}:' + f'{central_filepath}" "{local_filepath}" {extra_arguments} {get_config_arg(cfg)}', ) return output +def make_rclone_transfer_options( + overwrite_existing_files: OverwriteExistingFiles, dry_run: bool +) -> Dict: + """Create a dictionary of rclone transfer options.""" + allowed_overwrite = ["never", "always", "if_source_newer"] + + if overwrite_existing_files not in allowed_overwrite: + utils.log_and_raise_error( + f"`overwrite_existing_files` not " + f"recognised, must be one of: " + f"{allowed_overwrite}", + ValueError, + ) + + return { + "overwrite_existing_files": overwrite_existing_files, + "show_transfer_progress": True, + "transfer_verbosity": "vv", + "dry_run": dry_run, + } + + def get_local_and_central_file_differences( cfg: Configs, top_level_folders_to_check: List[TopLevelFolder], @@ -569,13 +780,26 @@ def perform_rclone_check( "central", top_level_folder ).parent.as_posix() - output = call_rclone( - f"{rclone_args('check')} " - f'"{local_filepath}" ' - f'"{cfg.get_rclone_config_name()}:{central_filepath}"' - f" --combined -", - pipe_std=True, - ) + if rclone_encryption.connection_method_requires_encryption( + cfg["connection_method"] + ): + output = call_rclone_for_central_connection( + cfg, + f"{rclone_args('check')} " + f'"{local_filepath}" ' + f'"{cfg.rclone.get_rclone_config_name()}:{central_filepath}" ' + f"--combined - " + f"{get_config_arg(cfg)}", + pipe_std=True, + ) + else: + output = call_rclone( + f"{rclone_args('check')} " + f'"{local_filepath}" ' + f'"{cfg.rclone.get_rclone_config_name()}:{central_filepath}" ' + f"--combined - ", + pipe_std=True, + ) return output.stdout.decode("utf-8") diff --git a/datashuttle/utils/rclone_encryption.py b/datashuttle/utils/rclone_encryption.py new file mode 100644 index 000000000..030a850f7 --- /dev/null +++ b/datashuttle/utils/rclone_encryption.py @@ -0,0 +1,364 @@ +"""Module for encrypting the RClone config file. + +Methods based on: https://rclone.org/docs/#configuration-encryption. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + + from datashuttle.configs.configs_class import Configs + +import os +import platform +import shutil +import subprocess + +from datashuttle.utils import utils + + +def save_credentials_password(cfg: Configs) -> None: + """Use the system password manager to set up a password for the Rclone config file encryption.""" + if platform.system() == "Windows": + set_password_windows(cfg) + elif platform.system() == "Linux": + set_password_linux(cfg) + else: + set_password_macos(cfg) + + +def set_password_windows(cfg: Configs) -> None: + """Generate and securely store a random password in a Windows Credential XML file. + + Use PowerShell to create a random password associated with the name 'rclone'. + The password is stored as a PowerShell `PSCredential` object that can only + be decrypted by the same Windows user account that created it. + + This password is later used to encrypt the Rclone config file. + """ + password_filepath = get_windows_password_filepath(cfg) + + if password_filepath.exists(): + password_filepath.unlink() + + shell = shutil.which("powershell") + if shell is None: + utils.log_and_raise_error( + "powershell.exe not found in PATH (need Windows PowerShell 5.1).", + RuntimeError, + ) + + ps_cmd = ( + "Add-Type -AssemblyName System.Web; " + "New-Object PSCredential 'rclone', " + "(ConvertTo-SecureString ([System.Web.Security.Membership]::GeneratePassword(40,10)) -AsPlainText -Force) " + f"| Export-Clixml -LiteralPath '{password_filepath}'" + ) + + output = subprocess.run( + [shell, "-NoProfile", "-Command", ps_cmd], # type: ignore + capture_output=True, + text=True, + ) + if output.returncode != 0: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}" + f"\n--- STDERR ---\n{output.stderr}" + "\nCould not set the PSCredential with System.web. See the error message above.", + RuntimeError, + ) + + +def set_password_linux(cfg: Configs) -> None: + """Generate and securely store a random password using the Linux `pass` utility. + + This function generates a random password and stores it in the user's + GPG-encrypted password store via the `pass` command-line tool. + + The `pass` utility must be installed and initialized with a GPG ID on the + current user account (via `pass init `). If it is not initialized, + a RuntimeError will be raised. + """ + output = subprocess.run( + "pass --help", + shell=True, + capture_output=True, + text=True, + ) + if output.returncode != 0: + utils.log_and_raise_error( + "`pass` is required to set password. Install e.g. sudo apt install pass.", + RuntimeError, + ) + + output = subprocess.run( + "pass ls", + shell=True, + capture_output=True, + text=True, + ) + if output.returncode != 0: + if "pass init" in output.stderr: + utils.log_and_raise_error( + "Password store is not initialized. " + "Run `pass init ` before using `pass`.", + RuntimeError, + ) + else: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}" + f"\n--- STDERR ---\n{output.stderr}" + "\nCould not set up password with `pass`. See the error message above.", + RuntimeError, + ) + + output = subprocess.run( + f"echo $(openssl rand -base64 40) | pass insert -m {cfg.rclone.get_rclone_config_name()}", + shell=True, + capture_output=True, + text=True, + ) + if output.returncode != 0: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}" + f"\n--- STDERR ---\n{output.stderr}" + "\nCould not encrypt the RClone config file. See the error message above.", + RuntimeError, + ) + + +def set_password_macos(cfg: Configs) -> None: + """Generate and store a password using the macOS Keychain. + + This function generates a random password and stores it in the macOS Keychain + using the built-in `security` command-line tool. + + The password is generated using OpenSSL with 40 random base64 characters and + is securely saved to the user's login Keychain. + """ + output = subprocess.run( + f"security add-generic-password -a datashuttle -s {cfg.rclone.get_rclone_config_name()} -w $(openssl rand -base64 40) -U", + shell=True, + capture_output=True, + text=True, + ) + + if output.returncode != 0: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}" + f"\n--- STDERR ---\n{output.stderr}" + "\nCould not encrypt the RClone config. See the error message above.", + RuntimeError, + ) + + +def set_credentials_as_password_command(cfg: Configs) -> None: + """Configure the RClone password retrieval command based on the operating system. + + This function sets the `RCLONE_PASSWORD_COMMAND` environment variable so that + RClone can securely retrieve stored credentials + + - Windows : Uses PowerShell to decrypt a previously exported `PSCredential` + object from the `.clixml` file created by `set_password_windows()`. + - Linux : Uses the `pass` command-line utility to fetch the stored password + from the user's GPG-encrypted password store. + - macOS : Uses the built-in `security` tool to read the password + from the user's Keychain, associated with the account name `datashuttle` and + the rclone service name. + """ + if platform.system() == "Windows": + password_filepath = get_windows_password_filepath(cfg) + + assert password_filepath.exists(), ( + "Critical error: password file not found when setting password command." + ) + + shell = shutil.which("powershell") + if not shell: + utils.log_and_raise_error( + "powershell.exe not found in PATH", RuntimeError + ) + + # Escape single quotes inside PowerShell string by doubling them + cmd = ( + f'{shell} -NoProfile -Command "Write-Output (' + f"[System.Runtime.InteropServices.Marshal]::PtrToStringAuto(" + f"[System.Runtime.InteropServices.Marshal]::SecureStringToBSTR(" + f"(Import-Clixml -LiteralPath '{password_filepath}' ).Password)))\"" + ) + + os.environ["RCLONE_PASSWORD_COMMAND"] = cmd + + elif platform.system() == "Linux": + os.environ["RCLONE_PASSWORD_COMMAND"] = ( + f"/usr/bin/pass {cfg.rclone.get_rclone_config_name()}" + ) + + elif platform.system() == "Darwin": + os.environ["RCLONE_PASSWORD_COMMAND"] = ( + f"/usr/bin/security find-generic-password -a datashuttle -s {cfg.rclone.get_rclone_config_name()} -w" + ) + + +def run_rclone_config_encrypt(cfg: Configs) -> None: + """Encrypt the rclone config file using an OS-native secret. + + This function: + 1) Generates/stores a random password using the platform-specific backend + (Windows PSCredential, Linux `pass`, or macOS Keychain) via + `save_credentials_password(cfg)`. + 2) Sets `RCLONE_PASSWORD_COMMAND` so rclone can retrieve the secret on demand + via `set_credentials_as_password_command(cfg)`. + 3) Runs `rclone config encryption set --config ` to encrypt the config. + 4) Cleans up by removing the password command environment variable with + `remove_credentials_as_password_command()`. + """ + rclone_config_path = ( + cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + if not rclone_config_path.exists(): + connection_method = cfg["connection_method"] + + utils.log_and_raise_error( + f"Rclone config file for: {connection_method} was not found. " + f"Make sure you set up the connection first with `setup_{connection_method}_connection()`", + RuntimeError, + ) + + save_credentials_password(cfg) + + set_credentials_as_password_command(cfg) + + try: + output = subprocess.run( + f"rclone config encryption set --config {rclone_config_path.as_posix()}", + shell=True, + capture_output=True, + text=True, + ) + if output.returncode != 0: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}\n" + f"\n--- STDERR ---\n{output.stderr}\n" + "\nCould not encrypt the RClone config. See the error message above.", + RuntimeError, + ) + finally: + remove_credentials_as_password_command() + + +def remove_rclone_encryption(cfg: Configs) -> None: + """Remove encryption from a Rclone config file. + + Set the credentials one last time to remove encryption from + the RClone config file. Once removed, clean up the password + as stored with the system credential manager. + """ + set_credentials_as_password_command(cfg) + + config_filepath = ( + cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + output = subprocess.run( + rf"rclone config encryption remove --config {config_filepath.as_posix()}", + shell=True, + capture_output=True, + text=True, + ) + if output.returncode != 0: + utils.log_and_raise_error( + f"\n--- STDOUT ---\n{output.stdout}" + f"\n--- STDERR ---\n{output.stderr}" + "\nCould not remove the password from the RClone config. See the error message above.", + RuntimeError, + ) + + remove_credentials_as_password_command() + + if platform.system() == "Windows": + password_filepath = get_windows_password_filepath(cfg) + if password_filepath.exists(): + password_filepath.unlink() + + elif platform.system() == "Linux": + name = cfg.rclone.get_rclone_config_name() + subprocess.run( + ["pass", "rm", "-f", name], + check=False, + ) + + elif platform.system() == "Darwin": + service = cfg.rclone.get_rclone_config_name() + subprocess.run( + [ + "security", + "delete-generic-password", + "-a", + "datashuttle", + "-s", + service, + ], + check=False, + ) + + utils.log_and_message( + f"Password removed from rclone config file: {config_filepath}" + ) + + +def remove_credentials_as_password_command(): + """Tidy up the rclone password environment variable.""" + if "RCLONE_PASSWORD_COMMAND" in os.environ: + os.environ.pop("RCLONE_PASSWORD_COMMAND") + + +def connection_method_requires_encryption(connection_method: str): + """Check whether the connection method stores sensitive information.""" + return connection_method in ["aws", "gdrive", "ssh"] + + +def get_windows_password_filepath( + cfg: Configs, +) -> Path: + """Get the canonical location where datashuttle stores the windows credentials.""" + assert connection_method_requires_encryption(cfg["connection_method"]) + + # Put this folder next to the project (datashuttle) config file + base_path = cfg.file_path.parent / "credentials" + + base_path.mkdir(exist_ok=True, parents=True) + + return base_path / f"{cfg.rclone.get_rclone_config_name()}.xml" + + +def get_explanation_message( + cfg: Configs, +) -> str: + """Explaining Rclone's default credential storage and OS-specific encryption options. + + Displayed in both the Python API and the TUI. + """ + system_pass_manager = { + "Windows": "PSCredential", + "Linux": "the `pass` program", + "Darwin": "macOS built-in `security` tool", + } + + pass_type = { + "ssh": "your private SSH key", + "aws": "your IAM access key ID and secret access key", + "gdrive": "your Google Drive access token and client secret (if set)", + } + + message = ( + f"By default, RClone stores {pass_type[cfg['connection_method']]} in plain text at the below location:\n\n" + f"{cfg.rclone.get_rclone_central_connection_config_filepath()}\n\n" + f"Would you like to encrypt the RClone config file using {system_pass_manager[platform.system()]}?" + ) + + return message diff --git a/datashuttle/utils/utils.py b/datashuttle/utils/utils.py index ea8c279bf..c6047a3ae 100644 --- a/datashuttle/utils/utils.py +++ b/datashuttle/utils/utils.py @@ -79,6 +79,7 @@ def raise_error(message: str, exception) -> None: if a function call raises an exception in a python environment. """ ds_logger.close_log_filehandler() + raise exception(message) diff --git a/docs/source/pages/get_started/set-up-a-project.md b/docs/source/pages/get_started/set-up-a-project.md index bd94b023c..84a8da396 100644 --- a/docs/source/pages/get_started/set-up-a-project.md +++ b/docs/source/pages/get_started/set-up-a-project.md @@ -546,3 +546,86 @@ project.setup_aws_connection() Running [](setup_aws_connection()) will require entering your `AWS Secret Access Key` and the setup will be completed. + +:::: + + +(password-protection)= +# Encrypting your connection credentials + +Datashuttle uses [RClone](https://rclone.org/) for all data transfers. +RClone stores connection credentials in a +local configuration file that, by default, is not encrypted. + +This file can include: + +- **SSH:** your private SSH key +- **Google Drive:** your OAuth access token and client secret +- **Amazon S3:** your AWS Access Key ID and Secret Access Key + +These are stored in your home directory, which is expected to be secure. However, for an +additional layer of security, it is possible to encrypt the Rclone config file using the +system credential manager of your operating system. This file will then be +unreadable for anyone who does not have access to your machine user account. Note that +anyone with access to the machine user account will be able to decrypt the Rclone file. + +For details on setting up encryption, see the section below. On Windows, you will +need to be running in PowerShell, and on Linux you will need `pass` package installed. + +::::{tab-set} + +:::{tab-item} Windows + +On Windows, the PowerShell `PSCredential` system to encrypt the RClone config file. + +- A random password is generated and stored as a `.clixml` credential file. +- The password can only be decrypted by the same Windows user account that created it. +- The encryption and decryption process uses PowerShell, so PowerShell must be available (it will not work from `cmd.exe`). + +When encryption is enabled, RClone automatically retrieves the password from the PSCredential store whenever it runs. + +::: + +:::{tab-item} macOS + +On macOS, the built-in Keychain via the `security` command-line tool is used. + +- A random password is generated using `openssl rand -base64 40`. +- The password is securely stored in your login Keychain under the service name corresponding to your RClone config. +- Only your macOS user account can access this key. + +When you first set up encryption, macOS may prompt you to authorize access to the Keychain. +Once approved, RClone will automatically retrieve the key when needed. + +::: + +:::{tab-item} Linux + +On Linux, the `pass` package is used to manage the encryption. You can install it with: + ```bash + sudo apt install pass + ``` + +Next, you need to initialize the password store with your GPG key: + ```bash + pass init + ``` + +Once initialized, Datashuttle will: +- Generate a random password with `openssl rand -base64 40` +- Store it securely in the GPG-encrypted password store +- Configure RClone to retrieve it automatically with: + ```bash + /usr/bin/pass + ``` + +::: + +:::: + +## Removing encryption + +Encryption of the Rclone config file used for the central connection +(either SSH, Google Drive or AWS) can be removed with the following command: + +[](remove_rclone_encryption()) diff --git a/pyproject.toml b/pyproject.toml index 2e9402411..415a56cae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -164,6 +164,6 @@ ignore = [ datashuttle = "datashuttle.tui_launcher:main" [tool.codespell] -skip = '.git,*.pdf,*.svg' +skip = '.git,*.pdf,*.svg,*.yml' # # ignore-words-list = '' diff --git a/tests/base.py b/tests/base.py index f0e45588b..1fd9106b7 100644 --- a/tests/base.py +++ b/tests/base.py @@ -4,7 +4,7 @@ from . import test_utils -TEST_PROJECT_NAME = "test_project" +TEST_PROJECT_NAME = "ds-unique-test-project-d375gd234vds2f" class BaseTest: diff --git a/tests/test_utils.py b/tests/test_utils.py index bb38c5851..ff9f7849f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,6 @@ import asyncio import copy import glob -import json import logging import os import pathlib @@ -14,7 +13,7 @@ from datashuttle import DataShuttle from datashuttle.configs import canonical_configs, canonical_folders -from datashuttle.utils import ds_logger, rclone +from datashuttle.utils import ds_logger # ----------------------------------------------------------------------------- # Setup and Teardown Test Project @@ -433,36 +432,11 @@ def check_config_file(config_path, *kwargs): assert value == config_yaml[name], f"{name}" -# ----------------------------------------------------------------------------- -# Search -# ----------------------------------------------------------------------------- - - -def recursive_search_central(project: DataShuttle): - """ - A convenience function to search project for files on remote folders - using rclone's recursive search. - """ - all_filenames: list[str] = [] - - path_ = (project.cfg["central_path"] / "rawdata").as_posix() - - # -R flag searches recursively - output = rclone.call_rclone( - f"lsjson -R {project.cfg.get_rclone_config_name()}:{path_}", - pipe_std=True, - ) - - all_files_or_folders = json.loads(output.stdout) - - for file_or_folder in all_files_or_folders: - is_dir = file_or_folder.get("IsDir", False) - - if not is_dir: - file_path = file_or_folder["Path"] - all_filenames.append(f"{path_}/{file_path}") +def check_rclone_file_is_encrypted(rclone_config_path): + with open(rclone_config_path, "r", encoding="utf-8") as file: + first_line = file.readline().strip() - return all_filenames + assert first_line == "# Encrypted rclone configuration File" # ----------------------------------------------------------------------------- diff --git a/tests/tests_integration/test_create_folders.py b/tests/tests_integration/test_create_folders.py index 72102d986..a14c6b45d 100644 --- a/tests/tests_integration/test_create_folders.py +++ b/tests/tests_integration/test_create_folders.py @@ -309,7 +309,9 @@ def test_all_top_level_folders(self, project, top_level_folder): @pytest.mark.parametrize("top_level_folder", ["rawdata", "derivatives"]) @pytest.mark.parametrize("return_with_prefix", [True, False]) - def test_get_next_sub(self, project, return_with_prefix, top_level_folder): + def test_get_next_sub__( + self, project, return_with_prefix, top_level_folder + ): """Test that the next subject number is suggested correctly. This takes the union of subjects available in the local and central repository. As such test the case where either are diff --git a/tests/tests_integration/test_local_only_mode.py b/tests/tests_integration/test_local_only_mode.py index 9bef8af16..eb461984b 100644 --- a/tests/tests_integration/test_local_only_mode.py +++ b/tests/tests_integration/test_local_only_mode.py @@ -9,7 +9,7 @@ from .. import test_utils from ..base import BaseTest -TEST_PROJECT_NAME = "test_project" +TEST_PROJECT_NAME = "ds-unique-test-project-d375gd234vds2f" class TestLocalOnlyProject(BaseTest): diff --git a/tests/tests_integration/test_logging.py b/tests/tests_integration/test_logging.py index 9b5bd4a0b..5f58591f5 100644 --- a/tests/tests_integration/test_logging.py +++ b/tests/tests_integration/test_logging.py @@ -86,7 +86,7 @@ def clean_project_name(self): Switch on datashuttle logging as required for these tests, then turn back off during tear-down. """ - project_name = "test_project" + project_name = "ds-unique-test-project-d375gd234vds2f" test_utils.delete_project_if_it_exists(project_name) test_utils.set_datashuttle_loggers(disable=False) @@ -290,7 +290,7 @@ def test_logs_upload_and_download( assert "Using config file from" in log assert "--include" in log assert "sub-11/ses-123/anat/**" in log - assert "/central/test_project/rawdata" in log + assert "/central/ds-unique-test-project-d375gd234vds2f/rawdata" in log @pytest.mark.parametrize("upload_or_download", ["upload", "download"]) def test_logs_upload_and_download_folder_or_file( diff --git a/tests/tests_integration/test_rclone_encryption.py b/tests/tests_integration/test_rclone_encryption.py new file mode 100644 index 000000000..50d9dd03c --- /dev/null +++ b/tests/tests_integration/test_rclone_encryption.py @@ -0,0 +1,60 @@ +import os +import textwrap + +from datashuttle.utils import rclone_encryption + +from .. import test_utils +from ..base import BaseTest +from ..tests_transfers.ssh import ssh_test_utils + + +class TestRcloneEncryption(BaseTest): + def test_set_and_remove_rclone_config_encryption(self, project): + """Test that RClone config encryption is set up and torn down correctly. + + First, we generate a mock RClone config file or a mock SSH project. + Then we check datashuttle functions for encryption, decryption and + cleaning up environment variables work as expected. + """ + ssh_test_utils.setup_project_for_ssh( + project, + ) + + rclone_config_path = ( + project.cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + if rclone_config_path.exists(): + rclone_config_path.unlink() + + config_content = textwrap.dedent(f"""\ + [{project.cfg.rclone.get_rclone_config_name()}] + type = sftp + host = ssh.swc.ucl.ac.uk + user = jziminski + port = 22 + key_file = C:/Users/Jzimi/.datashuttle/my_project_name/my_project_name_ssh_key + shell_type = unix + md5sum_command = md5sum + sha1sum_command = sha1sum + """) + + # Write to file + with open(rclone_config_path, "w") as file: + file.write(config_content) + + rclone_encryption.run_rclone_config_encrypt(project.cfg) + + assert "RCLONE_PASSWORD_COMMAND" not in os.environ + + test_utils.check_rclone_file_is_encrypted(rclone_config_path) + + rclone_encryption.remove_rclone_encryption(project.cfg) + + assert "RCLONE_PASSWORD_COMMAND" not in os.environ + + # Read the file contents to check it is no longer encrypted. + with open(rclone_config_path, "r", encoding="utf-8") as f: + first_line = f.readline().strip() + + assert first_line == f"[{project.cfg.rclone.get_rclone_config_name()}]" diff --git a/tests/tests_integration/test_search_methods.py b/tests/tests_integration/test_search_methods.py index a53953146..7f28733ac 100644 --- a/tests/tests_integration/test_search_methods.py +++ b/tests/tests_integration/test_search_methods.py @@ -19,7 +19,7 @@ class TestSubSesSearches(BaseTest): @pytest.mark.parametrize("return_full_path", [True, False]) def test_local_vs_central_search_methods( - self, project, monkeypatch, return_full_path + self, project, monkeypatch, return_full_path, tmp_path ): """ Test the `search_local_filesystem` and `search_central_via_connection` @@ -55,18 +55,23 @@ def test_local_vs_central_search_methods( test_utils.write_file(central_path / path_.parent.parent.parent / f"{path_.parent.parent.name}.md", contents="hello_world",) # fmt: on - # Monkeycatch `get_rclone_config_name` to return `local` and set `local` - # as a rclone config entry associated with the local filesystem. By this - # method we can hijack `search_central_via_connection` to run locally - # (though it is set up in practice to run via ssh, gdrive or aws). + # search_central_via_connection will run the transfer + # function but with additional checks for rclone password + # through `run_function_that_requires_encrypted_rclone_config_access`. + # Here we monkeypatch that to skip all of those checks. + call_rclone(r"config create local local nounc true") + + from datashuttle.utils import rclone + + def mock_rclone_caller(_, func, optional=None): + return func() + monkeypatch.setattr( - project.cfg, - "get_rclone_config_name", - lambda connection_method: "local", + rclone, + "run_function_that_requires_encrypted_rclone_config_access", + mock_rclone_caller, ) - call_rclone("config create local local nounc true") - # Perform a range of checks across folders and files # and check the outputs of both approaches match. # fmt: off diff --git a/tests/tests_integration/test_validation.py b/tests/tests_integration/test_validation.py index 34c5a02fd..f9be7d12b 100644 --- a/tests/tests_integration/test_validation.py +++ b/tests/tests_integration/test_validation.py @@ -5,6 +5,7 @@ import pytest from datashuttle import validate_project_from_path +from datashuttle.configs import canonical_folders from datashuttle.utils import formatting, validation from datashuttle.utils.custom_exceptions import NeuroBlueprintError @@ -787,7 +788,7 @@ def test_name_templates_validate_project(self, project): # Test Quick Validation Function # ---------------------------------------------------------------------------------- - def test_quick_validation(self, mocker, project): + def test_validate_project_from_path(self, mocker, project): project.create_folders("rawdata", "sub-1") os.makedirs(project.cfg["local_path"] / "rawdata" / "sub-02") project.create_folders("derivatives", "sub-1") @@ -803,6 +804,12 @@ def test_quick_validation(self, mocker, project): assert "VALUE_LENGTH" in str(w[1].message) assert len(w) == 2 + # This is used internally to generate a Configs class, + # but should not actually be written to. + assert ( + not canonical_folders.get_internal_datashuttle_from_path().exists() + ) + # For good measure, monkeypatch and change all defaults, # ensuring they are propagated to the validate_project # function (which is tested above) @@ -824,7 +831,7 @@ def test_quick_validation(self, mocker, project): assert kwargs["top_level_folder_list"] == ["derivatives"] assert kwargs["name_templates"] == {"on": False} - def test_quick_validation_top_level_folder(self, project): + def test_validate_from_path_top_level_folder(self, project): """Test that errors are raised as expected on bad project path input. """ diff --git a/tests/tests_regression/test_backwards_compatibility.py b/tests/tests_regression/test_backwards_compatibility.py index cb23165d0..9c1f40f26 100644 --- a/tests/tests_regression/test_backwards_compatibility.py +++ b/tests/tests_regression/test_backwards_compatibility.py @@ -6,7 +6,7 @@ from .. import test_utils -TEST_PROJECT_NAME = "test_project" +TEST_PROJECT_NAME = "ds-unique-test-project-d375gd234vds2f" class TestBackwardsCompatibility: diff --git a/tests/tests_transfers/aws/aws_test_utils.py b/tests/tests_transfers/aws/aws_test_utils.py index fcc86e772..5cbe37787 100644 --- a/tests/tests_transfers/aws/aws_test_utils.py +++ b/tests/tests_transfers/aws/aws_test_utils.py @@ -34,6 +34,15 @@ def setup_aws_connection(project: DataShuttle): The `AWS_SECRET_ACCESS_KEY` is set in the environment by the CI while testing. For testing locally, the developer must set it themselves. """ + + def mock_input(_: str) -> str: + return "y" + + import builtins + + original_input = copy.deepcopy(builtins.input) + builtins.input = mock_input # type: ignore + original_get_secret = copy.deepcopy(aws.get_aws_secret_access_key) aws.get_aws_secret_access_key = lambda *args, **kwargs: os.environ[ "AWS_SECRET_ACCESS_KEY" @@ -41,6 +50,7 @@ def setup_aws_connection(project: DataShuttle): project.setup_aws_connection() + builtins.input = original_input aws.get_aws_secret_access_key = original_get_secret diff --git a/tests/tests_transfers/aws/test_aws_suggest_next.py b/tests/tests_transfers/aws/test_aws_suggest_next.py index a694a20cf..5edca41f6 100644 --- a/tests/tests_transfers/aws/test_aws_suggest_next.py +++ b/tests/tests_transfers/aws/test_aws_suggest_next.py @@ -26,8 +26,9 @@ def aws_setup(self, setup_project_paths): yield project - rclone.call_rclone( - f"purge central_{project.project_name}_gdrive:{project.cfg['central_path'].parent}" + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project.project_name}_aws:{project.cfg['central_path'].parent} {rclone.get_config_arg(project.cfg)}", ) @pytest.mark.asyncio diff --git a/tests/tests_transfers/aws/test_aws_transfer.py b/tests/tests_transfers/aws/test_aws_transfer.py index 348468a1b..cd73004bf 100644 --- a/tests/tests_transfers/aws/test_aws_transfer.py +++ b/tests/tests_transfers/aws/test_aws_transfer.py @@ -27,8 +27,9 @@ def aws_setup(self, pathtable_and_project): yield [pathtable, project] - rclone.call_rclone( - f"purge central_{project.project_name}_aws:{project.cfg['central_path'].parent}" + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project.project_name}_aws:{project.cfg['central_path'].parent} {rclone.get_config_arg(project.cfg)}", ) @pytest.mark.parametrize( diff --git a/tests/tests_transfers/aws/test_tui_setup_aws.py b/tests/tests_transfers/aws/test_tui_setup_aws.py index e16b9e2f8..29039c77a 100644 --- a/tests/tests_transfers/aws/test_tui_setup_aws.py +++ b/tests/tests_transfers/aws/test_tui_setup_aws.py @@ -2,10 +2,12 @@ import pytest +from datashuttle import DataShuttle from datashuttle.tui.app import TuiApp from datashuttle.tui.screens.project_manager import ProjectManagerScreen from datashuttle.utils import rclone, utils +from ... import test_utils from ...tests_tui.tui_base import TuiBase from . import aws_test_utils @@ -31,10 +33,18 @@ def central_path_and_project(self, setup_project_paths): yield central_path, project_name - rclone.call_rclone(f"purge central_{project_name}_aws:{central_path}") + project = DataShuttle(project_name) + + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project_name}_aws:{central_path} {rclone.get_config_arg(project.cfg)}", + ) @pytest.mark.asyncio - async def test_aws_connection_setup(self, central_path_and_project): + @pytest.mark.parametrize("set_encryption", [True, False]) + async def test_aws_connection_setup( + self, central_path_and_project, set_encryption + ): """Test AWS connection setup via the TUI. AWS connection details are filled in the configs tab. The setup @@ -58,12 +68,40 @@ async def test_aws_connection_setup(self, central_path_and_project): ) assert ( - "AWS Connection Successful!" + "Would you like to encrypt the RClone config file" in pilot.app.screen.query_one( "#setup_aws_messagebox_message" ).renderable ) + if set_encryption: + await self.scroll_to_click_pause(pilot, "#setup_aws_ok_button") + + assert ( + "The Rclone config file was successfully encrypted. Setup complete!" + in pilot.app.screen.query_one( + "#setup_aws_messagebox_message" + ).renderable + ) + + project = pilot.app.screen.interface.project + + test_utils.check_rclone_file_is_encrypted( + project.cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + else: + await self.scroll_to_click_pause( + pilot, "#setup_aws_cancel_button" + ) + + assert ( + "AWS Connection Successful!" + in pilot.app.screen.query_one( + "#setup_aws_messagebox_message" + ).renderable + ) + @pytest.mark.asyncio async def test_aws_connection_setup_failed(self, central_path_and_project): """Test AWS connection setup using an incorrect client secret and check diff --git a/tests/tests_transfers/base_transfer.py b/tests/tests_transfers/base_transfer.py index 0b83fe108..3dc15e51b 100644 --- a/tests/tests_transfers/base_transfer.py +++ b/tests/tests_transfers/base_transfer.py @@ -1,6 +1,7 @@ """ """ import copy +import json import shutil from pathlib import Path @@ -31,7 +32,7 @@ def pathtable_and_project(self, tmpdir_factory): tmp_path = tmpdir_factory.mktemp("test") base_path = tmp_path / "test with space" - test_project_name = "test_file_conflicts" + test_project_name = "ds-unique-test-project-d375gd234vds2f" project = test_utils.setup_project_fixture( base_path, test_project_name @@ -200,7 +201,7 @@ def run_and_check_transfers( # Search the paths that were transferred and tidy them up, # then check against the paths that were expected to be transferred. - transferred_files = test_utils.recursive_search_central(project) + transferred_files = self.recursive_search_central(project) paths_to_transferred_files = self.remove_path_before_rawdata( transferred_files ) @@ -241,8 +242,9 @@ def run_and_check_transfers( # Clean up, removing the temp directories and # resetting the project paths. - rclone.call_rclone( - f"purge {project.cfg.get_rclone_config_name()}:{tmp_central_path.as_posix()}" + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge {project.cfg.rclone.get_rclone_config_name()}:{tmp_central_path.as_posix()} {rclone.get_config_arg(project.cfg)}", ) shutil.rmtree(tmp_local_path) @@ -256,3 +258,30 @@ def remake_logging_path(self, project): local_path location in the test environment. """ project.get_logging_path().mkdir(parents=True, exist_ok=True) + + @staticmethod + def recursive_search_central(project): + """ + A convenience function to search project for files on remote folders + using rclone's recursive search. + """ + all_filenames: list[str] = [] + + path_ = (project.cfg["central_path"] / "rawdata").as_posix() + + # -R flag searches recursively + output = rclone.call_rclone_for_central_connection( + project.cfg, + f"lsjson -R {project.cfg.rclone.get_rclone_config_name()}:{path_} {rclone.get_config_arg(project.cfg)}", + pipe_std=True, + ) + all_files_or_folders = json.loads(output.stdout) + + for file_or_folder in all_files_or_folders: + is_dir = file_or_folder.get("IsDir", False) + + if not is_dir: + file_path = file_or_folder["Path"] + all_filenames.append(f"{path_}/{file_path}") + + return all_filenames diff --git a/tests/tests_transfers/gdrive/gdrive_test_utils.py b/tests/tests_transfers/gdrive/gdrive_test_utils.py index 99cabe508..747818fcf 100644 --- a/tests/tests_transfers/gdrive/gdrive_test_utils.py +++ b/tests/tests_transfers/gdrive/gdrive_test_utils.py @@ -35,14 +35,24 @@ def setup_gdrive_connection(project: DataShuttle): connection without a browser. The credentials are set in the environment by the CI. To run tests locally, the developer must set them themselves. """ - state = {"first": True} + state = {"count": 0} def mock_input(_: str) -> str: - if state["first"]: - state["first"] = False - return "n" + if state["count"] == 0: + return_value = "n" + state["count"] += 1 + elif state["count"] == 1: + return_value = os.environ["GDRIVE_CONFIG_TOKEN"] + state["count"] += 1 + elif state["count"] == 2: + return_value = "y" + state["count"] += 1 + elif state["count"] == 3: + return_value = "y" else: - return os.environ["GDRIVE_CONFIG_TOKEN"] + raise ValueError(f"return count is {state['count']}") + + return return_value original_input = copy.deepcopy(builtins.input) builtins.input = mock_input # type: ignore diff --git a/tests/tests_transfers/gdrive/test_gdrive_suggest_next.py b/tests/tests_transfers/gdrive/test_gdrive_suggest_next.py index 39a98a6ae..6655ca7d8 100644 --- a/tests/tests_transfers/gdrive/test_gdrive_suggest_next.py +++ b/tests/tests_transfers/gdrive/test_gdrive_suggest_next.py @@ -28,8 +28,9 @@ def gdrive_setup(self, setup_project_paths): yield project - rclone.call_rclone( - f"purge central_{project.project_name}_gdrive:{project.cfg['central_path'].parent}" + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project.project_name}_gdrive:{project.cfg['central_path'].parent} {rclone.get_config_arg(project.cfg)}", ) @pytest.mark.asyncio diff --git a/tests/tests_transfers/gdrive/test_gdrive_transfer.py b/tests/tests_transfers/gdrive/test_gdrive_transfer.py index ca75f19a6..47fa04fdc 100644 --- a/tests/tests_transfers/gdrive/test_gdrive_transfer.py +++ b/tests/tests_transfers/gdrive/test_gdrive_transfer.py @@ -29,8 +29,9 @@ def gdrive_setup(self, pathtable_and_project): yield [pathtable, project] - rclone.call_rclone( - f"purge central_{project.project_name}_gdrive:{project.cfg['central_path'].parent}" + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project.project_name}_gdrive:{project.get_central_path()} {rclone.get_config_arg(project.cfg)}", ) @pytest.mark.parametrize( diff --git a/tests/tests_transfers/gdrive/test_tui_setup_gdrive.py b/tests/tests_transfers/gdrive/test_tui_setup_gdrive.py index 7c2f89faa..9985ff575 100644 --- a/tests/tests_transfers/gdrive/test_tui_setup_gdrive.py +++ b/tests/tests_transfers/gdrive/test_tui_setup_gdrive.py @@ -2,6 +2,7 @@ import pytest +from datashuttle import DataShuttle from datashuttle.tui.app import TuiApp from datashuttle.tui.screens.project_manager import ProjectManagerScreen from datashuttle.utils import rclone, utils @@ -31,14 +32,23 @@ def central_path_and_project(self, setup_project_paths): yield central_path, project_name - rclone.call_rclone( - f"purge central_{project_name}_gdrive:{central_path}" + project = DataShuttle(project_name) + + rclone.call_rclone_for_central_connection( + project.cfg, + f"purge central_{project_name}_gdrive:{central_path} {rclone.get_config_arg(project.cfg)}", ) - @pytest.mark.parametrize("central_path_none", [True, False]) + @pytest.mark.parametrize( + "parameter_sets", + [ + {"central_path_none": True, "set_encryption": True}, + {"central_path_none": False, "set_encryption": False}, + ], + ) @pytest.mark.asyncio async def test_gdrive_connection_setup_without_browser( - self, central_path_none, central_path_and_project + self, parameter_sets, central_path_and_project ): """Test Google Drive connection setup via the TUI. @@ -47,7 +57,14 @@ async def test_gdrive_connection_setup_without_browser( not possible to authenticate via a browser during tests, the connection setup is tested without a browser. The credentials in the environment are set by the CI. For testing locally, the developer must set these themselves. + + We test the case when central path is None or not, and encryption + is set or not. We don't need to test every combination (these settings + are unrelated) so we test across parameter sets. + """ + central_path_none = parameter_sets["central_path_none"] + set_encryption = parameter_sets["set_encryption"] central_path, project_name = central_path_and_project app = TuiApp() @@ -78,8 +95,9 @@ async def test_gdrive_connection_setup_without_browser( "#setup_gdrive_generic_input_box", os.environ["GDRIVE_CONFIG_TOKEN"], ) + await self.scroll_to_click_pause( - pilot, "#setup_gdrive_enter_button" + pilot, "#setup_gdrive_no_browser_enter_button" ) await test_utils.await_task_by_name_if_present( @@ -87,12 +105,42 @@ async def test_gdrive_connection_setup_without_browser( ) assert ( - "Setup Complete!" + "Would you like to encrypt the RClone config file" in pilot.app.screen.query_one( "#gdrive_setup_messagebox_message" ).renderable ) + if set_encryption: + await self.scroll_to_click_pause( + pilot, "#setup_gdrive_set_encryption_yes_button" + ) + + assert ( + "The encryption was successful. Setup complete!" + in pilot.app.screen.query_one( + "#gdrive_setup_messagebox_message" + ).renderable + ) + + project = pilot.app.screen.interface.project + + test_utils.check_rclone_file_is_encrypted( + project.cfg.rclone.get_rclone_central_connection_config_filepath() + ) + + else: + await self.scroll_to_click_pause( + pilot, "#setup_gdrive_set_encryption_no_button" + ) + + assert ( + "Setup complete!" + in pilot.app.screen.query_one( + "#gdrive_setup_messagebox_message" + ).renderable + ) + @pytest.mark.asyncio async def test_gdrive_connection_setup_incorrect_config_token( self, setup_project_paths @@ -129,8 +177,9 @@ async def test_gdrive_connection_setup_incorrect_config_token( "#setup_gdrive_generic_input_box", "placeholder", ) + await self.scroll_to_click_pause( - pilot, "#setup_gdrive_enter_button" + pilot, "#setup_gdrive_no_browser_enter_button" ) await test_utils.await_task_by_name_if_present( @@ -180,8 +229,9 @@ async def test_gdrive_connection_setup_incorrect_root_folder_id( "#setup_gdrive_generic_input_box", os.environ["GDRIVE_CONFIG_TOKEN"], ) + await self.scroll_to_click_pause( - pilot, "#setup_gdrive_enter_button" + pilot, "#setup_gdrive_no_browser_enter_button" ) await test_utils.await_task_by_name_if_present( @@ -194,6 +244,7 @@ async def test_gdrive_connection_setup_incorrect_root_folder_id( "#gdrive_setup_messagebox_message" ).renderable ) + assert ( "Error 404: File not found" in pilot.app.screen.query_one( @@ -224,25 +275,30 @@ async def test_cancel_gdrive_connection_setup(self, setup_project_paths): ) # Setup connection and cancel midway + await self.setup_gdrive_connection_via_tui(pilot) + assert ( - "Please authenticate through browser" + "Please authenticate through your browser" in pilot.app.screen.query_one( "#gdrive_setup_messagebox_message" ).renderable ) + await self.scroll_to_click_pause( pilot, "#setup_gdrive_cancel_button" ) # Try setting up the connection again await self.setup_gdrive_connection_via_tui(pilot) + assert ( - "Please authenticate through browser" + "Please authenticate through your browser" in pilot.app.screen.query_one( "#gdrive_setup_messagebox_message" ).renderable ) + await self.scroll_to_click_pause( pilot, "#setup_gdrive_cancel_button" ) @@ -310,7 +366,9 @@ async def setup_gdrive_connection_via_tui( "#setup_gdrive_generic_input_box", os.environ["GDRIVE_CLIENT_SECRET"], ) - await self.scroll_to_click_pause(pilot, "#setup_gdrive_enter_button") + await self.scroll_to_click_pause( + pilot, "#setup_gdrive_no_browser_enter_button" + ) assert ( "Are you running datashuttle on a machine " @@ -321,6 +379,8 @@ async def setup_gdrive_connection_via_tui( ) if with_browser: - await self.scroll_to_click_pause(pilot, "#setup_gdrive_yes_button") + await self.scroll_to_click_pause( + pilot, "#setup_gdrive_has_browser_yes_button" + ) else: await self.scroll_to_click_pause(pilot, "#setup_gdrive_no_button") diff --git a/tests/tests_transfers/ssh/ssh_test_utils.py b/tests/tests_transfers/ssh/ssh_test_utils.py index b57f18fcc..3ef95796c 100644 --- a/tests/tests_transfers/ssh/ssh_test_utils.py +++ b/tests/tests_transfers/ssh/ssh_test_utils.py @@ -3,7 +3,7 @@ import subprocess import sys -from datashuttle.utils import rclone, ssh, utils +from datashuttle.utils import utils def setup_project_for_ssh( @@ -23,7 +23,7 @@ def setup_project_for_ssh( ) -def setup_ssh_connection(project, setup_ssh_key_pair=True): +def setup_ssh_connection(project): """ Convenience function to verify the server hostkey and ssh key pairs to the Dockerfile image for ssh tests. @@ -46,27 +46,13 @@ def setup_ssh_connection(project, setup_ssh_key_pair=True): orig_isatty = copy.deepcopy(sys.stdin.isatty) sys.stdin.isatty = lambda: True - # Run setup - verified = ssh.verify_ssh_central_host_api( - project.cfg["central_host_id"], project.cfg.hostkeys_path, log=True - ) - - if setup_ssh_key_pair: - private_key_str = ssh.setup_ssh_key_api(project.cfg, log=False) - - rclone.setup_rclone_config_for_ssh( - project.cfg, - project.cfg.get_rclone_config_name("ssh"), - private_key_str, - ) + project.setup_ssh_connection() # Restore functions builtins.input = orig_builtin utils.get_connection_secret_from_user = orig_get_secret sys.stdin.isatty = orig_isatty - return verified - def docker_is_running(): if not is_docker_installed(): diff --git a/tests/tests_transfers/ssh/test_ssh_setup.py b/tests/tests_transfers/ssh/test_ssh_setup.py deleted file mode 100644 index 10eeb353b..000000000 --- a/tests/tests_transfers/ssh/test_ssh_setup.py +++ /dev/null @@ -1,81 +0,0 @@ -import builtins -import copy -import platform - -import pytest - -from ... import test_utils -from . import ssh_test_utils -from .base_ssh import BaseSSHTransfer - -TEST_SSH = ssh_test_utils.docker_is_running() - - -@pytest.mark.skipif( - platform.system == "Darwin", reason="Docker set up is not robust on macOS." -) -@pytest.mark.skipif( - not TEST_SSH, - reason="SSH tests are not run as docker is either not installed, " - "running or current user is not in the docker group.", -) -class TestSSH(BaseSSHTransfer): - @pytest.fixture(scope="function") - def project(test, tmp_path, setup_ssh_container_fixture): - """Set up a project with configs for SSH into - the test Dockerfile image. - """ - tmp_path = tmp_path / "test with space" - - test_project_name = "test_ssh" - - project = test_utils.setup_project_fixture(tmp_path, test_project_name) - - ssh_test_utils.setup_project_for_ssh( - project, - ) - - yield project - test_utils.teardown_project(project) - - # ----------------------------------------------------------------- - # Test Setup SSH Connection - # ----------------------------------------------------------------- - - @pytest.mark.parametrize("input_", ["n", "o", "@"]) - def test_verify_ssh_central_host_do_not_accept( - self, capsys, project, input_ - ): - """Test that host not accepted if input is not "y".""" - orig_builtin = copy.deepcopy(builtins.input) - builtins.input = lambda _: input_ # type: ignore - - project.setup_ssh_connection() - - builtins.input = orig_builtin - - captured = capsys.readouterr() - - assert "Host not accepted. No connection made.\n" in captured.out - - def test_verify_ssh_central_host_accept(self, capsys, project): - """User is asked to accept the server hostkey. Mock this here - and check hostkey is successfully accepted and written to configs. - """ - test_utils.clear_capsys(capsys) - - verified = ssh_test_utils.setup_ssh_connection( - project, setup_ssh_key_pair=False - ) - - assert verified - captured = capsys.readouterr() - - assert captured.out == "Host accepted.\n" - - with open(project.cfg.hostkeys_path) as file: - hostkey = file.readlines()[0] - - assert ( - f"[{project.cfg['central_host_id']}]:3306 ssh-ed25519 " in hostkey - ) diff --git a/tests/tests_transfers/ssh/test_ssh_suggest_next.py b/tests/tests_transfers/ssh/test_ssh_suggest_next.py index ed6523270..9b87565c4 100644 --- a/tests/tests_transfers/ssh/test_ssh_suggest_next.py +++ b/tests/tests_transfers/ssh/test_ssh_suggest_next.py @@ -27,6 +27,7 @@ def ssh_setup(self, setup_project_paths, setup_ssh_container_fixture): Setup pathtable and project for SSH transfer tests. """ project = test_utils.make_project(setup_project_paths["project_name"]) + ssh_test_utils.setup_project_for_ssh( project, ) diff --git a/tests/tests_transfers/ssh/test_ssh_transfer.py b/tests/tests_transfers/ssh/test_ssh_transfer.py index a7f74887e..72b310a81 100644 --- a/tests/tests_transfers/ssh/test_ssh_transfer.py +++ b/tests/tests_transfers/ssh/test_ssh_transfer.py @@ -3,6 +3,7 @@ import pytest +from ... import test_utils from . import ssh_test_utils from .base_ssh import BaseSSHTransfer @@ -157,3 +158,11 @@ def test_ssh_wildcards_3(self, ssh_setup): self.run_and_check_transfers( project, sub_names, ses_names, datatype, expected_transferred_paths ) + + def test_rclone_config_file_encrypted(self, ssh_setup): + """Quick confidence check the set up rclone config is indeed encrypted.""" + pathtable, project = ssh_setup + + test_utils.check_rclone_file_is_encrypted( + project.cfg.rclone.get_rclone_central_connection_config_filepath() + ) diff --git a/tests/tests_tui/tui_base.py b/tests/tests_tui/tui_base.py index a0c8f16d6..8bc443ce9 100644 --- a/tests/tests_tui/tui_base.py +++ b/tests/tests_tui/tui_base.py @@ -43,7 +43,7 @@ async def empty_project_paths(self, tmp_path_factory, monkeypatch): 2) It fails for testing CLI, because CLI spawns a new process in which `get_datashuttle_path()` is not monkeypatched. """ - project_name = "my-test-project" + project_name = "ds-unique-test-project-d375gd234vds2f" tmp_path = tmp_path_factory.mktemp("test") tmp_config_path = tmp_path / "config" diff --git a/tests/tests_unit/test_gdrive_preliminary_setup.py b/tests/tests_unit/test_gdrive_preliminary_setup.py index 79924552f..a77faece0 100644 --- a/tests/tests_unit/test_gdrive_preliminary_setup.py +++ b/tests/tests_unit/test_gdrive_preliminary_setup.py @@ -4,7 +4,7 @@ import pytest -from datashuttle.utils import gdrive +from datashuttle.utils import rclone class TestGdrivePreliminarySetup: @@ -16,14 +16,31 @@ class TestGdrivePreliminarySetup: def test_preliminary_setup_for_gdrive( self, client_id, root_folder_id, client_secret ): - """Test the outputs of `preliminary_for_setup_without_browser` and check + """Test the outputs of `preliminary_setup_gdrive_config_without_browser` and check that they contain the correct credentials in the encoded format. """ - mock_configs = { - "gdrive_client_id": client_id, - "gdrive_root_folder_id": root_folder_id, - } - output = gdrive.preliminary_for_setup_without_browser( + from collections import UserDict + from pathlib import Path + + class MockConfigs(UserDict): + def __init__(self, client_id_, root_folder_id_): + super(MockConfigs, self).__init__() + self.data["gdrive_client_id"] = client_id_ + self.data["gdrive_root_folder_id"] = root_folder_id_ + self.data["connection_method"] = "drive" + + class RClone: + def delete_existing_rclone_config_file(self): + pass + + def get_rclone_central_connection_config_filepath(self): + return Path("") + + self.rclone = RClone() + + mock_configs = MockConfigs(client_id, root_folder_id) + + output = rclone.preliminary_setup_gdrive_config_without_browser( mock_configs, client_secret, "test_gdrive_preliminary" )