From 4db0e74f4318107e3f125be259f2d331e70c5c65 Mon Sep 17 00:00:00 2001 From: Guillemo Ramos Date: Tue, 14 May 2024 15:58:36 +0200 Subject: [PATCH] F #6053: In-place restore of VM backups (Ceph) (#3051) --- install.sh | 2 + src/tm_mad/ceph/restore | 172 +++++++++++++++++++++++++++++++++++- src/tm_mad/lib/ceph.rb | 114 ++++++++++++++++++++++++ src/tm_mad/lib/tm_action.rb | 68 +++++++++++++- 4 files changed, 354 insertions(+), 2 deletions(-) mode change 120000 => 100755 src/tm_mad/ceph/restore create mode 100644 src/tm_mad/lib/ceph.rb diff --git a/install.sh b/install.sh index d80df4a2d92..a3018bec900 100755 --- a/install.sh +++ b/install.sh @@ -1949,6 +1949,7 @@ IPAM_DRIVER_EC2_SCRIPTS="src/ipamm_mad/remotes/aws/register_address_range \ TM_FILES="src/tm_mad/tm_common.sh" TM_LIB_FILES="src/tm_mad/lib/kvm.rb \ + src/tm_mad/lib/ceph.rb \ src/tm_mad/lib/tm_action.rb \ src/tm_mad/lib/backup_qcow2.rb \ src/tm_mad/lib/datastore.rb \ @@ -2114,6 +2115,7 @@ TM_CEPH_FILES="src/tm_mad/ceph/clone \ src/tm_mad/ceph/mkswap \ src/tm_mad/ceph/resize \ src/tm_mad/ceph/resize.ssh \ + src/tm_mad/ceph/restore \ src/tm_mad/ceph/prebackup_live \ src/tm_mad/ceph/prebackup \ src/tm_mad/ceph/postbackup_live \ diff --git a/src/tm_mad/ceph/restore b/src/tm_mad/ceph/restore deleted file mode 120000 index 9c454e8cd4d..00000000000 --- a/src/tm_mad/ceph/restore +++ /dev/null @@ -1 +0,0 @@ -../common/not_supported.sh \ No newline at end of file diff --git a/src/tm_mad/ceph/restore b/src/tm_mad/ceph/restore new file mode 100755 index 00000000000..4d1459e60bd --- /dev/null +++ b/src/tm_mad/ceph/restore @@ -0,0 +1,171 @@ +#!/usr/bin/env ruby + +# -------------------------------------------------------------------------- # +# Copyright 2002-2023, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # +ONE_LOCATION = ENV['ONE_LOCATION'] + +if !ONE_LOCATION + RUBY_LIB_LOCATION = '/usr/lib/one/ruby' + GEMS_LOCATION = '/usr/share/one/gems' + VMDIR = '/var/lib/one' + CONFIG_FILE = '/var/lib/one/config' +else + RUBY_LIB_LOCATION = ONE_LOCATION + '/lib/ruby' + GEMS_LOCATION = ONE_LOCATION + '/share/gems' + VMDIR = ONE_LOCATION + '/var' + CONFIG_FILE = ONE_LOCATION + '/var/config' +end + +# %%RUBYGEMS_SETUP_BEGIN%% +if File.directory?(GEMS_LOCATION) + real_gems_path = File.realpath(GEMS_LOCATION) + if !defined?(Gem) || Gem.path != [real_gems_path] + $LOAD_PATH.reject! {|l| l =~ /vendor_ruby/ } + + # Suppress warnings from Rubygems + # https://github.com/OpenNebula/one/issues/5379 + begin + verb = $VERBOSE + $VERBOSE = nil + require 'rubygems' + Gem.use_paths(real_gems_path) + ensure + $VERBOSE = verb + end + end +end +# %%RUBYGEMS_SETUP_END%% + +$LOAD_PATH << RUBY_LIB_LOCATION + +require 'rexml/document' +require 'json' +require 'securerandom' + +require_relative '../lib/tm_action' +require_relative '../lib/ceph' +require_relative '../lib/datastore' + +#------------------------------------------------------------------------------- +# RESTORE vm_id img_id inc_id disk_id +#------------------------------------------------------------------------------- +# dir = ARGV[0].split ':' +vm_id = ARGV[1] +bk_img_id = ARGV[2].to_i +inc_id = ARGV[3] +disk_id = ARGV[4].to_i + +begin + action = TransferManager::Action.new(:action_name => 'restore', + :vm_id => vm_id) + + # -------------------------------------------------------------------------- + # Backup image information + # -------------------------------------------------------------------------- + bk_img = OpenNebula::Image.new_with_id(bk_img_id, action.one) + rc = bk_img.info + raise rc.message.to_s if OpenNebula.is_error?(rc) + + # -------------------------------------------------------------------------- + # Backup bk_img datastore + # -------------------------------------------------------------------------- + ds_id = bk_img['/IMAGE/DATASTORE_ID'].to_i + + # -------------------------------------------------------------------------- + # Backup information + # -------------------------------------------------------------------------- + + # sample output: {"0":"rsync://100//0:3ffce7/var/lib/one/datastores/100/1/3ffce7/disk.0.0"} + rc = action.call_ds_driver(ds_id, "ls -i #{inc_id}", :extra_xml => bk_img.to_xml) + raise 'cannot list backup contents' unless rc.code == 0 + + disk_urls = JSON.parse(rc.stdout) + disk_urls = disk_urls.filter {|id, _url| id.to_i == disk_id } if disk_id != -1 + + # -------------------------------------------------------------------------- + # Restore disk_urls in Host VM folder + # -------------------------------------------------------------------------- + ceph_disks = TransferManager::Ceph::Disk.from_vm(action.vm.template_xml) + success_disks = [] + info = {} + disk_urls.each do |id, url| + ceph_disk = ceph_disks[id.to_i] + + randsuffix = SecureRandom.hex(5) + info[ceph_disk] = { + :br => action.pick_bridge(action.vm["/VM/TEMPLATE/DISK[DISK_ID = #{id}]/DATASTORE_ID"]), + :bak => "#{ceph_disk.rbd_image}.backup.#{randsuffix}", + :old => "#{ceph_disk.rbd_image}.old.#{randsuffix}" + } + + upload_ceph = <<~EOS + tmpimg="$(mktemp -t disk#{id}.XXXX)" + #{__dir__}/../../datastore/downloader.sh --nodecomp #{url} $tmpimg && \ + qemu-img convert -m 4 -O raw $tmpimg $tmpimg.raw && \ + ssh #{info[ceph_disk][:br]} #{ceph_disk.rbd_cmd} import - #{info[ceph_disk][:bak]} < $tmpimg.raw; \ + rm $tmpimg $tmpimg.raw + EOS + + rc = action.ssh(:host => nil, + :cmds => upload_ceph, + :forward => false, + :nostdout => false, + :nostderr => false) + + break if rc.code != 0 + + success_disks << ceph_disk + end + + # Rollback and raise error if it was unable to backup all disks + if success_disks.length != disk_urls.length + success_disks.each do |ceph_disk| + cleanup = <<~EOS + #{ceph_disk.rbd_cmd} rm #{info[ceph_disk][:bak]} + EOS + action.ssh(:host => info[ceph_disk][:br], + :cmds => cleanup, + :forward => false, + :nostdout => false, + :nostderr => false) + end + raise "error uploading backup disk to Ceph (#{success_disks.length}/#{disk_urls.length})" + end + + # -------------------------------------------------------------------------- + # Replace VM disk_urls with backup copies (~prolog) + # -------------------------------------------------------------------------- + success_disks.each do |ceph_disk| + move = <<~EOS + #{ceph_disk.shdefs} + + #{ceph_disk.rbd_cmd} mv #{ceph_disk.rbd_image} #{info[ceph_disk][:old]} && \ + #{ceph_disk.rbd_cmd} mv #{info[ceph_disk][:bak]} #{ceph_disk.rbd_image} && \ + rbd_rm_image #{info[ceph_disk][:old]} + EOS + + rc = action.ssh(:host => info[ceph_disk][:br], + :cmds => move, + :forward => false, + :nostdout => false, + :nostderr => false) + + warn 'cannot restore disk backup' if rc.code != 0 + end +rescue StandardError => e + STDERR.puts "Error restoring VM disks: #{e.message}" + exit(1) +end diff --git a/src/tm_mad/lib/ceph.rb b/src/tm_mad/lib/ceph.rb new file mode 100644 index 00000000000..edf9d971180 --- /dev/null +++ b/src/tm_mad/lib/ceph.rb @@ -0,0 +1,114 @@ +#!/usr/bin/env ruby + +# -------------------------------------------------------------------------- # +# Copyright 2002-2023, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +require 'rexml/document' +require_relative 'datastore' + +module TransferManager + + # Ceph utils + class Ceph + + # Ceph disks + class Disk + + attr_reader :id, :vmid, :source, :clone, :rbd_image, :rbd_cmd + + # @param vmid [Integer] + # @param disk_xml [String, REXML::Document, REXML::Element] + # @return [Disk] + def initialize(vmid, disk_xml) + disk_xml = REXML::Document.new(disk_xml) if disk_xml.is_a?(String) + + @id = disk_xml.elements['DISK_ID'].text.to_i + @vmid = vmid + @source = disk_xml.elements['SOURCE'].text + @clone = disk_xml.elements['CLONE'].text == 'YES' + + @rbd_image = + if @clone + "#{@source}-#{@vmid}-#{@id}" + else + @source + end + + @rbd_cmd = 'rbd' + @rbd_cmd += Ceph.xml_opt(disk_xml, 'CEPH_USER', '--id') + @rbd_cmd += Ceph.xml_opt(disk_xml, 'CEPH_KEY', '--keyfile') + @rbd_cmd += Ceph.xml_opt(disk_xml, 'CEPH_CONF', '--conf') + end + + # @return [String] Shell definitions for functionality related to this disk + def shdefs + <<~SCRIPT + rbd_rm_image() { + image="$1" + + snapshots="$(#{@rbd_cmd} snap ls "$image" 2>/dev/null| awk 'NR > 1 {print $2}')" + for snapshot in $snapshots; do + rbd_rm_snapshot "$image@$snapshot" + done + #{@rbd_cmd} rm "$image" + } + + rbd_rm_snapshot() { + snapshot="$1" + + children="$(#{@rbd_cmd} children "$snapshot" 2>/dev/null)" + + for child in $children; do + rbd_rm_image "$child" + done + + #{@rbd_cmd} snap unprotect "$snapshot" + #{@rbd_cmd} snap rm "$snapshot" + } + SCRIPT + end + + #################################################################### + ## CLASS METHODS + + # @param vm_xml [String, REXML::Document, REXML::Element] + # @return [Array(Disk)] indexed VM disks (disk id = position in array) + def self.from_vm(vm_xml) + vm_xml = REXML::Document.new(vm_xml) if vm_xml.is_a?(String) + vm = vm_xml.root + vmid = vm.elements['VMID'].text + + indexed_disks = [] + vm.elements.each('DISK[TYPE="RBD"]') do |d| + disk = new(vmid, d) + indexed_disks[disk.id] = disk + end + + indexed_disks + end + + end + + def self.xml_opt(disk_xml, name, opt) + opt_val = disk_xml.elements[name].text + " #{opt} #{opt_val}" unless opt_val.empty? + rescue StandardError + '' + end + + end + +end diff --git a/src/tm_mad/lib/tm_action.rb b/src/tm_mad/lib/tm_action.rb index 305425aaa8e..f9900349008 100644 --- a/src/tm_mad/lib/tm_action.rb +++ b/src/tm_mad/lib/tm_action.rb @@ -59,6 +59,9 @@ def initialize(options = {}) @logger.formatter = proc do |severity, _date, _progname, message| "#{severity}: #{@options[:action_name]}: #{message}" end + + # Round robin index + @rridx = 0 end # Executes cmds in a remote host @@ -68,7 +71,7 @@ def initialize(options = {}) # @option options [Integer] :rc_ok successful return code apart from 0 # @option options [String] :host hostname or IP of the remote host # - # @return [Integer] return code of the command + # @return [GenericCommand] return code of the command def self.ssh(aname, options) action = Action.new(:action_name => aname, :vm_id => -1) action.ssh(options) @@ -128,6 +131,69 @@ def ssh(options = {}) rc end + # Makes a local call to some operation of the given DS driver + # @param [Integer] ds_id datastore ID + # @param [String] ds_op operation, as well as its arguments (e.g., "cp ") + # + # @return [GenericCommand] return code of the command + def call_ds_driver(ds_id, ds_op, extra_data = {}) + ds = OpenNebula::Datastore.new_with_id(ds_id, @one) + rc = ds.info true + raise rc.message.to_s if OpenNebula.is_error?(rc) + + extra_xml = extra_data[:extra_xml] || '' + extra_xml << + if extra_data[:img_id] + image = OpenNebula::Image.new_with_id(extra_data[:img_id], action.one) + rc = image.info + raise rc.message.to_s if OpenNebula.is_error?(rc) + + image.to_xml + else + '' + end + + ds_cmd = "#{__dir__}/../../datastore/#{ds['/DATASTORE/DS_MAD'].downcase}/#{ds_op}" + + driver_action = <<~EOS + + #{@vm.to_xml} + #{ds.to_xml} + #{extra_xml} + + EOS + + ssh(:host => nil, + :cmds => "echo '#{driver_action}' | #{ds_cmd}", + :forward => false, + :nostdout => false, + :nostderr => false) + end + + # Select a host from the datastore's BRIDGE_LIST. + # Equivalent to `get_destination_host` from datastore_mad/remotes/libfs.sh + # @param [Integer] ds_id datastore ID + # + # @return [String] chosen bridge host + def pick_bridge(ds_id) + bridges = get_bridge_list(ds_id) + bridge = bridges[@rridx % bridges.length] + @rridx += 1 + bridge + end + + # Return a datastore's BRIDGE_LIST + # @param [Integer] ds_id datastore ID + # + # @return [[String]] array of bridge hosts + def get_bridge_list(ds_id) + ds = OpenNebula::Datastore.new_with_id(ds_id, @one) + rc = ds.info + raise rc.message.to_s if OpenNebula.is_error?(rc) + + ds['/DATASTORE/TEMPLATE/BRIDGE_LIST'].split + end + # Creates dst path dir at host. # @param [String] dst path to create # @param [String] host target host