From 1559fd0d9d070812de4b2c35fa62c1265fe8b311 Mon Sep 17 00:00:00 2001 From: Romain Malmain Date: Wed, 16 Oct 2024 22:20:09 +0200 Subject: [PATCH] wip cow cache rewriting --- block.c | 94 +++ block/block-backend.c | 73 +- include/block/block-common.h | 1 + include/libafl/exit.h | 14 +- include/libafl/syx-snapshot/syx-cow-cache.h | 32 +- include/libafl/syx-snapshot/syx-snapshot.h | 19 +- include/libafl/utils.h | 1 + include/qemu/iov.h | 2 +- libafl/meson.build | 6 +- libafl/syx-snapshot/syx-cow-cache.c | 751 +++++++++++++++----- libafl/syx-snapshot/syx-snapshot.c | 108 ++- libafl/utils.c | 21 + meson.build | 18 +- qapi/block-core.json | 18 + system/vl.c | 5 + util/iov.c | 2 +- 16 files changed, 828 insertions(+), 337 deletions(-) diff --git a/block.c b/block.c index 50bdd197b7a..dd3ec6ecfc7 100644 --- a/block.c +++ b/block.c @@ -54,6 +54,8 @@ #include "qemu/rcu.h" #include "block/coroutines.h" +#include "libafl/syx-snapshot/syx-snapshot.h" + #ifdef CONFIG_BSD #include #include @@ -1259,6 +1261,34 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, *child_flags &= ~BDRV_O_NATIVE_AIO; } +//// --- Begin LibAFL code --- + +/* + * Returns the options and flags that a temporary snapshot should get, based on + * the originally requested flags (the originally requested image will have + * flags like a backing file) + */ +static void bdrv_syx_cow_cache_options(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + GLOBAL_STATE_CODE(); + *child_flags = parent_flags; + + /* For temporary files, unconditional cache=unsafe is fine */ + qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off"); + qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on"); + + /* Copy the read-only and discard options from the parent */ + qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); + qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD); + + /* aio=native doesn't work for cache.direct=off, so disable it for the + * temporary snapshot */ + *child_flags &= ~BDRV_O_NATIVE_AIO; +} + +//// --- End LibAFL code --- + static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c) { BlockDriverState *parent = c->opaque; @@ -3870,6 +3900,33 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) return bs; } +//// --- Begin LibAFL code --- + +static BlockDriverState *bdrv_append_syx_cow_cache(BlockDriverState *bs, + int flags, + QDict *scc_options, + Error **errp) +{ + BlockDriverState* bs_scc = NULL; + + /* We add a syx-cow-cache layer on top of the node being opened */ + qdict_put_str(scc_options, "driver", "syx-cow-cache"); + qdict_put_str(scc_options, "file", bs->node_name); + + /* Open the syx cow cache */ + bs_scc = bdrv_open(NULL, NULL, scc_options, flags, errp); + scc_options = NULL; + if (!bs_scc) { + goto out; + } + +out: + qobject_unref(scc_options); + return bs_scc; +} + +//// --- End LibAFL code --- + static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, QDict *snapshot_options, @@ -3966,6 +4023,14 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, QDict *snapshot_options = NULL; int snapshot_flags = 0; +//// --- Begin LibAFL code --- + + QDict* scc_options = NULL; + int scc_flags = 0; + bool attach_syx_continue = false; + +//// --- End LibAFL code --- + assert(!child_class || !flags); assert(!child_class == !parent); GLOBAL_STATE_CODE(); @@ -4152,6 +4217,24 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, * (the inverse results in an error message from bdrv_open_common()) */ assert(!(flags & BDRV_O_PROTOCOL) || !file); +//// --- Begin LibAFL code --- + if (!(flags & BDRV_O_NOSYX) && syx_snapshot_use_scc() && !strcmp(drv->format_name, "file") && bs->open_flags & BDRV_O_RDWR) { + if (snapshot_flags) { + error_setg(errp, "Syx snapshots should not be used with any QEMU snapshot option"); + goto close_and_fail; + } + + scc_options = qdict_new(); + bdrv_syx_cow_cache_options(&scc_flags, scc_options, + flags, options); + + // qdict_put_bool(options, BDRV_OPT_READ_ONLY, true); + // bs->open_flags &= ~BDRV_O_RDWR; + + attach_syx_continue = true; + } +//// --- End LibAFL code --- + /* Open the image */ ret = bdrv_open_common(bs, file, options, &local_err); if (ret < 0) { @@ -4221,6 +4304,17 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, bs = snapshot_bs; } + if (attach_syx_continue) { + BlockDriverState* scc_bs; + scc_bs = bdrv_append_syx_cow_cache(bs, scc_flags, scc_options, &local_err); + scc_options = NULL; + if (local_err) { + goto close_and_fail; + } + bdrv_unref(bs); + bs = scc_bs; + } + return bs; fail: diff --git a/block/block-backend.c b/block/block-backend.c index df3f3254330..f8ed8957512 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -28,12 +28,6 @@ #include "trace.h" #include "migration/misc.h" -//// --- Begin LibAFL code --- -#ifdef CONFIG_SOFTMMU -#include "libafl/syx-snapshot/syx-snapshot.h" -#endif -//// --- End LibAFL code --- - /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 @@ -48,9 +42,6 @@ typedef struct BlockBackendAioNotifier { struct BlockBackend { char *name; -//// --- Begin LibAFL code --- - guint name_hash; -//// --- End LibAFL code --- int refcnt; BdrvChild *root; AioContext *ctx; /* access with atomic operations only */ @@ -705,12 +696,6 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) error_setg(errp, "Device with id '%s' already exists", name); return false; } -//// --- Begin LibAFL code --- - if (blk_by_name_hash(g_str_hash(name))) { - error_setg(errp, "Device with name hash '%x' already exists", g_str_hash(name)); - return false; - } -//// --- End LibAFL code --- if (bdrv_find_node(name)) { error_setg(errp, "Device name '%s' conflicts with an existing node name", @@ -719,9 +704,6 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) } blk->name = g_strdup(name); -//// --- Begin LibAFL code --- - blk->name_hash = g_str_hash(blk->name); -//// --- End LibAFL code --- QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link); return true; } @@ -753,14 +735,6 @@ const char *blk_name(const BlockBackend *blk) return blk->name ?: ""; } -//// --- Begin LibAFL code --- -guint blk_name_hash(const BlockBackend* blk) -{ - IO_CODE(); - return blk->name_hash; -} -//// --- End LibAFL code --- - /* * Return the BlockBackend with name @name if it exists, else null. * @name must not be null. @@ -779,22 +753,6 @@ BlockBackend *blk_by_name(const char *name) return NULL; } -/* - * Return the BlockBackend with name hash @name_hash if it exists, else null. - */ -BlockBackend *blk_by_name_hash(guint name_hash) -{ - BlockBackend *blk = NULL; - - GLOBAL_STATE_CODE(); - while ((blk = blk_next(blk)) != NULL) { - if (name_hash == blk->name_hash) { - return blk; - } - } - return NULL; -} - /* * Return the BlockDriverState attached to @blk if any, else null. */ @@ -1648,21 +1606,8 @@ static void coroutine_fn blk_aio_read_entry(void *opaque) assert(qiov->size == acb->bytes); -//// --- Begin LibAFL code --- -#ifdef CONFIG_SOFTMMU - if (!syx_snapshot_cow_cache_read_entry(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags)) { -#endif -//// --- End LibAFL code --- - rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov, - 0, rwco->flags); -//// --- Begin LibAFL code --- -#ifdef CONFIG_SOFTMMU - } else { - rwco->ret = 0; - } -#endif -//// --- End LibAFL code --- - + rwco->ret = blk_co_do_preadv_part(rwco->blk, rwco->offset, acb->bytes, qiov, + 0, rwco->flags); blk_aio_complete(acb); } @@ -1674,19 +1619,7 @@ static void coroutine_fn blk_aio_write_entry(void *opaque) assert(!qiov || qiov->size == acb->bytes); -//// --- Begin LibAFL code --- -#ifdef CONFIG_SOFTMMU - if (!syx_snapshot_cow_cache_write_entry(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags)) { -#endif -//// --- End LibAFL code --- - rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags); -//// --- Begin LibAFL code --- -#ifdef CONFIG_SOFTMMU - } else { - rwco->ret = 0; - } -#endif -//// --- End LibAFL code --- + rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, qiov, 0, rwco->flags); blk_aio_complete(acb); } diff --git a/include/block/block-common.h b/include/block/block-common.h index a846023a098..be9c67ff2f6 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -226,6 +226,7 @@ typedef enum { writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_NOSYX 0x0040 #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ diff --git a/include/libafl/exit.h b/include/libafl/exit.h index e305d9a35d3..b5259591b31 100644 --- a/include/libafl/exit.h +++ b/include/libafl/exit.h @@ -36,20 +36,22 @@ struct libafl_exit_reason_breakpoint { }; // A synchronous exit has been triggered. -struct libafl_exit_reason_sync_exit {}; +struct libafl_exit_reason_sync_exit { +}; // A timeout occured and we were asked to exit on timeout -struct libafl_exit_reason_timeout {}; +struct libafl_exit_reason_timeout { +}; struct libafl_exit_reason { enum libafl_exit_reason_kind kind; CPUState* cpu; // CPU that triggered an exit. vaddr next_pc; // The PC that should be stored in the CPU when re-entering. union { - struct libafl_exit_reason_internal internal; // kind == INTERNAL - struct libafl_exit_reason_breakpoint breakpoint; // kind == BREAKPOINT - struct libafl_exit_reason_sync_exit sync_exit; // kind == SYNC_EXIT - struct libafl_exit_reason_timeout timeout; // kind == TIMEOUT + struct libafl_exit_reason_internal internal; // kind == INTERNAL + struct libafl_exit_reason_breakpoint breakpoint; // kind == BREAKPOINT + struct libafl_exit_reason_sync_exit sync_exit; // kind == SYNC_EXIT + struct libafl_exit_reason_timeout timeout; // kind == TIMEOUT } data; }; diff --git a/include/libafl/syx-snapshot/syx-cow-cache.h b/include/libafl/syx-snapshot/syx-cow-cache.h index 2375f0d4011..36234cb1a0b 100644 --- a/include/libafl/syx-snapshot/syx-cow-cache.h +++ b/include/libafl/syx-snapshot/syx-cow-cache.h @@ -11,14 +11,14 @@ #define INITIAL_NB_CHUNKS_PER_DEVICE (1024 * 64) typedef struct SyxCowCacheDevice { - GArray* data; - GHashTable* positions; // blk_offset -> data_position + GArray* data; // [u8] + GHashTable* positions; // blkdev offset (must be aligned on chunk_size) -> + // data offset } SyxCowCacheDevice; -typedef struct SyxCowCacheLayer SyxCowCacheLayer; - typedef struct SyxCowCacheLayer { - GHashTable* cow_cache_devices; // H(device) -> SyxCowCacheDevice + GArray* blks; // [SyxCowCacheDevice] + uint64_t chunk_size; uint64_t max_nb_chunks; @@ -31,21 +31,15 @@ typedef struct SyxCowCache { SyxCowCache* syx_cow_cache_new(void); -// lhs <- rhs -// rhs is freed and nulled. -void syx_cow_cache_move(SyxCowCache* lhs, SyxCowCache** rhs); +// Returns a SyxCowCache with a new layer on top. +// Other layers from scc are still present. +SyxCowCache* syx_cow_cache_push(SyxCowCache* scc, uint64_t chunk_size, + uint64_t max_size); -void syx_cow_cache_push_layer(SyxCowCache* scc, uint64_t chunk_size, - uint64_t max_size); -void syx_cow_cache_pop_layer(SyxCowCache* scc); +void syx_cow_cache_pop(SyxCowCache* scc); -void syx_cow_cache_flush_highest_layer(SyxCowCache* scc); +// void syx_cow_cache_pop_layer(SyxCowCache* scc); -void syx_cow_cache_read_entry(SyxCowCache* scc, BlockBackend* blk, - int64_t offset, int64_t bytes, QEMUIOVector* qiov, - size_t qiov_offset, BdrvRequestFlags flags); +void syx_cow_cache_flush_highest_layer(SyxCowCache* scc); -bool syx_cow_cache_write_entry(SyxCowCache* scc, BlockBackend* blk, - int64_t offset, int64_t bytes, - QEMUIOVector* qiov, size_t qiov_offset, - BdrvRequestFlags flags); +void syx_cow_cache_check_files_ro(void); \ No newline at end of file diff --git a/include/libafl/syx-snapshot/syx-snapshot.h b/include/libafl/syx-snapshot/syx-snapshot.h index 5a6cbbc85ce..1639ae7d785 100644 --- a/include/libafl/syx-snapshot/syx-snapshot.h +++ b/include/libafl/syx-snapshot/syx-snapshot.h @@ -18,8 +18,9 @@ #include "libafl/syx-misc.h" -#define SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE 64 -#define SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS (1024 * 1024) +#define SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE 4096 +#define SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS \ + (1024 * (SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE)) typedef struct SyxSnapshotRoot SyxSnapshotRoot; typedef struct SyxSnapshotIncrement SyxSnapshotIncrement; @@ -50,17 +51,17 @@ typedef struct SyxSnapshotState { uint64_t page_size; uint64_t page_mask; + SyxSnapshot* active_snapshot; + // Actively tracked snapshots. Their dirty lists will // be updated at each dirty access SyxSnapshotTracker tracked_snapshots; // In use iif syx is initialized with cached_bdrvs flag on. - // It is not updated anymore when an active bdrv cache snapshto is set. SyxCowCache* before_fuzz_cache; + // snapshot used to restore bdrv cache if enabled. SyxSnapshot* active_bdrv_cache_snapshot; - - // Root } SyxSnapshotState; typedef struct SyxSnapshotCheckResult { @@ -73,8 +74,12 @@ void syx_snapshot_init(bool cached_bdrvs); // Snapshot API // -SyxSnapshot* syx_snapshot_new(bool track, bool is_active_bdrv_cache, - DeviceSnapshotKind kind, char** devices); +bool syx_snapshot_use_scc(void); + +SyxCowCache* syx_snapshot_current_scc(void); + +SyxSnapshot* syx_snapshot_new(bool track, DeviceSnapshotKind kind, + char** devices); void syx_snapshot_free(SyxSnapshot* snapshot); diff --git a/include/libafl/utils.h b/include/libafl/utils.h index a4dd40f8c0d..db4d337708c 100644 --- a/include/libafl/utils.h +++ b/include/libafl/utils.h @@ -1,3 +1,4 @@ #pragma once uintptr_t libafl_qemu_host_page_size(void); +void libafl_qemu_backtrace(void); diff --git a/include/qemu/iov.h b/include/qemu/iov.h index 63a1c01965d..48a63dcc15f 100644 --- a/include/qemu/iov.h +++ b/include/qemu/iov.h @@ -237,7 +237,7 @@ size_t qemu_iovec_concat_iov(QEMUIOVector *dst, bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); -size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, +size_t qemu_iovec_to_buf(const QEMUIOVector *qiov, size_t offset, void *buf, size_t bytes); size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, const void *buf, size_t bytes); diff --git a/libafl/meson.build b/libafl/meson.build index b6d1dc52e0a..f6aa88930a6 100644 --- a/libafl/meson.build +++ b/libafl/meson.build @@ -1,4 +1,4 @@ -specific_ss.add(files( +libafl_ss.add(files( 'cpu.c', 'exit.c', 'hook.c', @@ -19,7 +19,7 @@ specific_ss.add(files( 'hooks/thread.c', )) -specific_ss.add(when : 'CONFIG_SOFTMMU', if_true : [files( +libafl_ss.add(when : 'CONFIG_SOFTMMU', if_true : [files( 'system.c', 'qemu_snapshot.c', 'syx-snapshot/device-save.c', @@ -28,7 +28,7 @@ specific_ss.add(when : 'CONFIG_SOFTMMU', if_true : [files( 'syx-snapshot/channel-buffer-writeback.c', )]) -specific_ss.add(when : 'CONFIG_USER_ONLY', if_true : [files( +libafl_ss.add(when : 'CONFIG_USER_ONLY', if_true : [files( 'user.c', 'hooks/syscall.c', )]) diff --git a/libafl/syx-snapshot/syx-cow-cache.c b/libafl/syx-snapshot/syx-cow-cache.c index 10f5de0fe2b..d1597fe2896 100644 --- a/libafl/syx-snapshot/syx-cow-cache.c +++ b/libafl/syx-snapshot/syx-cow-cache.c @@ -1,259 +1,680 @@ #include "libafl/syx-snapshot/syx-cow-cache.h" +#include "block/block_int-common.h" #include "sysemu/block-backend.h" +#include "block/qdict.h" +#include "block/block_int.h" +#include "qemu/option.h" +#include "qemu/cutils.h" + +#include "libafl/syx-snapshot/syx-snapshot.h" + +#include +#include + +#define IS_POWER_OF_TWO(x) (__builtin_popcountll(x) == 1) + +typedef struct BDRVSyxCowCacheState { + uint32_t id; + BlockDriverState* clone_bs; +} BDRVSyxCowCacheState; + +// Checks that every opened file is opened in read-only mode. +// It's a sanity check for the cow-cache mode, no file should ever be opened in +// RW mode It's because the LibAFL QEMU process can open these files multiple +// times in different processes to enable multi-core fuzzing. Another good +// side-effect is the fact that disks will never be polluted by a fuzzing run, +// the disk remains unchanged. +void syx_cow_cache_check_files_ro(void) +{ + BlockDriverState* bs = NULL; + const BdrvChild* root = NULL; + DIR* d; + struct dirent* dir; + struct stat st; + char path[PATH_MAX] = {0}; + char real_path[PATH_MAX] = {0}; + + d = opendir("/proc/self/fd"); + assert(d); + + BlockBackend* blk = NULL; + while ((blk = blk_all_next(blk)) != NULL) { + if ((root = blk_root(blk))) { + bs = root->bs; + assert(bs); + + while ((dir = readdir(d)) != NULL) { + int fd = atoi(dir->d_name); + if (fd > 2) { + assert(fstat(fd, &st) == 0); + if (S_ISREG(st.st_mode)) { + strcpy(path, "/proc/self/fd/"); + strcat(path, dir->d_name); + ssize_t nb_bytes = readlink(path, real_path, PATH_MAX); + assert(nb_bytes > 0); + real_path[nb_bytes] = '\0'; + + if (!strcmp(bs->filename, real_path)) { + int res = fcntl(fd, F_GETFL); + if ((res & O_ACCMODE) != O_RDONLY) { + fprintf( + stderr, + "A file opened by QEMU is in RW mode: " + "%s.\nThis is a bug, please report it.\n", + bs->filename); + abort(); + } + } + } + } + } -#define IS_POWER_OF_TWO(x) ((x != 0) && ((x & (x - 1)) == 0)) + rewinddir(d); + } + } + + closedir(d); +} SyxCowCache* syx_cow_cache_new(void) { - SyxCowCache* cache = g_new0(SyxCowCache, 2); + SyxCowCache* cache = g_new0(SyxCowCache, 1); QTAILQ_INIT(&cache->layers); return cache; } +static void syx_cow_cache_add_blk(SyxCowCache* scc) +{ + SyxCowCacheLayer* layer; + SyxCowCacheDevice dev; + + layer = QTAILQ_FIRST(&scc->layers); + assert(layer); + + dev.data = g_array_sized_new(false, false, layer->chunk_size, + INITIAL_NB_CHUNKS_PER_DEVICE); + dev.positions = + g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); + + g_array_append_val(layer->blks, dev); +} + static gchar* g_array_element_ptr(GArray* array, guint position) { assert(position < array->len); return array->data + position * g_array_get_element_size(array); } -void syx_cow_cache_push_layer(SyxCowCache* scc, uint64_t chunk_size, - uint64_t max_size) +SyxCowCache* syx_cow_cache_push(SyxCowCache* scc, uint64_t chunk_size, + uint64_t max_size) { SyxCowCacheLayer* new_layer = g_new0(SyxCowCacheLayer, 1); + SyxCowCache* new_scc = syx_cow_cache_new(); - new_layer->cow_cache_devices = - g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); + // Re-insert older layers + SyxCowCacheLayer* layer; + QTAILQ_FOREACH(layer, &scc->layers, next) + { + QTAILQ_INSERT_HEAD(&new_scc->layers, layer, next); + } + + // Init new layer + new_layer->blks = g_array_new(false, false, sizeof(SyxCowCacheDevice)); new_layer->chunk_size = chunk_size; new_layer->max_nb_chunks = max_size; assert(IS_POWER_OF_TWO(chunk_size)); assert(!(max_size % chunk_size)); + // Insert new layer at the top QTAILQ_INSERT_HEAD(&scc->layers, new_layer, next); -} -void syx_cow_cache_pop_layer(SyxCowCache* scc) -{ - // TODO + return new_scc; } -static void flush_device_layer(gpointer _blk_name_hash, gpointer cache_device, - gpointer _user_data) -{ - SyxCowCacheDevice* sccd = (SyxCowCacheDevice*)cache_device; - - g_hash_table_remove_all(sccd->positions); - g_array_set_size(sccd->data, 0); -} +void syx_cow_cache_pop(SyxCowCache* scc) { assert(false && "TODO"); } void syx_cow_cache_flush_highest_layer(SyxCowCache* scc) { - SyxCowCacheLayer* highest_layer = QTAILQ_FIRST(&scc->layers); + SyxCowCacheLayer* layer = QTAILQ_FIRST(&scc->layers); + SyxCowCacheDevice* blk; - // highest_layer->cow_cache_devices - g_hash_table_foreach(highest_layer->cow_cache_devices, flush_device_layer, - NULL); -} + for (int i = 0; i < layer->blks->len; ++i) { + blk = &g_array_index(layer->blks, SyxCowCacheDevice, i); -void syx_cow_cache_move(SyxCowCache* lhs, SyxCowCache** rhs) -{ - lhs->layers = (*rhs)->layers; - g_free(*rhs); - *rhs = NULL; + g_hash_table_remove_all(blk->positions); + g_array_set_size(blk->data, 0); + } } -static bool read_chunk_from_cache_layer_device(SyxCowCacheDevice* sccd, - QEMUIOVector* qiov, - size_t qiov_offset, - uint64_t blk_offset) +// Returns a pointer to chunk to read from, if it exists. +// If nothing was found in the cache for the given offset, NULL is returned. +static void* coroutine_fn get_read_chunk(SyxCowCacheDevice* sccd, int64_t aligned_offset) { + const int64_t chunk_size = g_array_get_element_size(sccd->data); + assert(QEMU_IS_ALIGNED(aligned_offset, chunk_size)); gpointer data_position = NULL; - bool found = g_hash_table_lookup_extended( - sccd->positions, GUINT_TO_POINTER(blk_offset), NULL, &data_position); - // cache hit + bool found = g_hash_table_lookup_extended(sccd->positions, + GUINT_TO_POINTER(aligned_offset), + NULL, &data_position); + if (found) { - void* data_position_ptr = - g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position)); - assert(qemu_iovec_from_buf(qiov, qiov_offset, data_position_ptr, - g_array_get_element_size(sccd->data)) == - g_array_get_element_size(sccd->data)); + return g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position)); } - return found; + return NULL; } -// len must be smaller than nb bytes to next aligned to chunk of blk_offset. -// static void write_to_cache_layer_device_unaligned(SyxCowCacheDevice* sccd, -// QEMUIOVector* qiov, size_t qiov_offset, uint64_t blk_offset, uint64_t len) -// { -// const uint64_t chunk_size = g_array_get_element_size(sccd->data); -// -// assert(ROUND_UP(blk_offset, chunk_size) - blk_offset <= len); -// assert(IS_POWER_OF_TWO(chunk_size)); -// -// uint64_t blk_offset_aligned = ROUND_DOWN(blk_offset, chunk_size); -// -// gpointer data_position = NULL; -// bool found = g_hash_table_lookup_extended(sccd->positions, -// GUINT_TO_POINTER(blk_offset_aligned), NULL, &data_position); -// -// if (!found) { -// data_position = GUINT_TO_POINTER(sccd->data->len); -// sccd->data = g_array_set_size(sccd->data, sccd->data->len + 1); -// g_hash_table_insert(sccd->positions, GUINT_TO_POINTER(blk_offset), -// data_position); -// } -// -// void* data_position_ptr = g_array_element_ptr(sccd->data, -// GPOINTER_TO_UINT(data_position)); -// -// assert(qemu_iovec_to_buf(qiov, qiov_offset, data_position_ptr, -// g_array_get_element_size(sccd->data)) == -// g_array_get_element_size(sccd->data)); -// } - -// cache layer is allocated and all the basic checks are already done. -static void write_chunk_to_cache_layer_device(SyxCowCacheDevice* sccd, - QEMUIOVector* qiov, - size_t qiov_offset, - uint64_t blk_offset) +// returns a pointer to write the chunk to. +// if it does not exist and child is non-NULL, it is prefilled with child data. +// in other words, it is guaranteed to be valid to write to the pointer with +// chunk_size bytes child should be NULL iif it is planned to fully fill the +// chunk after the call. +static void* coroutine_fn reserve_write_chunk(SyxCowCacheDevice* sccd, BdrvChild* child, + int64_t aligned_offset) { - const uint64_t chunk_size = g_array_get_element_size(sccd->data); - + const int64_t chunk_size = g_array_get_element_size(sccd->data); + assert(QEMU_IS_ALIGNED(aligned_offset, chunk_size)); gpointer data_position = NULL; - bool found = g_hash_table_lookup_extended( - sccd->positions, GUINT_TO_POINTER(blk_offset), NULL, &data_position); + + bool found = g_hash_table_lookup_extended(sccd->positions, + GUINT_TO_POINTER(aligned_offset), + NULL, &data_position); if (!found) { + printf("\t\tAddr 0x%lx: not found\n", aligned_offset); data_position = GUINT_TO_POINTER(sccd->data->len); sccd->data = g_array_set_size(sccd->data, sccd->data->len + 1); - g_hash_table_insert(sccd->positions, GUINT_TO_POINTER(blk_offset), + g_hash_table_insert(sccd->positions, GUINT_TO_POINTER(aligned_offset), data_position); + + if (child) { + bdrv_co_pread(child, aligned_offset, chunk_size, data_position, 0); + } + } + + return g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position)); +} + +static void coroutine_fn write_chunk_to_cache_layer_device(SyxCowCacheDevice* sccd, + const QEMUIOVector* qiov, + BdrvChild* child, + const int64_t offset, const int64_t bytes, + const int64_t chunk_size) +{ + int64_t size_written = 0; + int64_t size_to_write; + void* data_position; + int64_t offset_begin_aligned, offset_begin_remainder, offset_end_aligned, + offset_end_remainder, offset_aligned_size, nb_middle_chunks; + + // chunk size should be a power of 2 + assert(IS_POWER_OF_TWO(chunk_size)); + + offset_begin_aligned = ROUND_DOWN(offset, chunk_size); + offset_begin_remainder = offset % chunk_size; + + // number of chunks that can be written without alignment issues + nb_middle_chunks = + (ROUND_DOWN(offset + bytes, chunk_size) - ROUND_UP(offset, chunk_size)) / chunk_size; + + offset_end_aligned = ROUND_DOWN(offset + bytes, chunk_size); + offset_end_remainder = (offset + bytes) % chunk_size; + + // total size effectively reserved in the cache buffer + offset_aligned_size = + ROUND_UP(offset + bytes, chunk_size) - offset_begin_aligned; + + assert((offset_aligned_size % chunk_size) == + 0); // aligned size should be... aligned + + // Handle unaligned start + if (offset_begin_remainder) { + size_to_write = + MIN((offset_begin_aligned + chunk_size) - offset, bytes); + + data_position = + reserve_write_chunk(sccd, child, offset_begin_aligned); + + printf("\t[unaligned begin] Chunk write @addr 0x%lx\n", offset_begin_aligned); + + qemu_iovec_to_buf(qiov, size_written, + data_position + offset_begin_remainder, + size_to_write); + + size_written += size_to_write; + + if (size_written == bytes) { + goto end; + } + } + + // write every chunk until (potentially) unaligned end. + for (int64_t i = 0; i < nb_middle_chunks; ++i) { + printf("\tChunk write @addr 0x%lx\n", offset + size_written); + + // get cache pointer, either fresh of already allocated + data_position = + reserve_write_chunk(sccd, NULL, offset + size_written); + + // overwrite cache with full chunk + assert(size_written <= qiov->size); + qemu_iovec_to_buf(qiov, size_written, data_position, chunk_size); + + // Update size_written + size_written += chunk_size; } - void* data_position_ptr = - g_array_element_ptr(sccd->data, GPOINTER_TO_UINT(data_position)); + // Handle unaligned end + if (offset_end_remainder) { + size_to_write = bytes - size_written; + + printf("\t[unaligned end] Chunk write @addr 0x%lx (size %ld)\n", offset_end_aligned, size_to_write); + + data_position = + reserve_write_chunk(sccd, child, offset_end_aligned); - assert(qemu_iovec_to_buf(qiov, qiov_offset, data_position_ptr, - chunk_size) == chunk_size); + printf("\t[unaligned end] Writting %ld bytes to data_position (offset %ld)...\n", size_to_write, size_written); + qemu_iovec_to_buf(qiov, size_written, data_position, size_to_write); + + size_written += size_to_write; + } + +end: + assert(size_written == bytes); } -static bool read_chunk_from_cache_layer(SyxCowCacheLayer* sccl, - BlockBackend* blk, QEMUIOVector* qiov, - size_t qiov_offset, uint64_t blk_offset) + +static void coroutine_fn read_chunk_from_cache_layer_device(SyxCowCacheDevice* sccd, + QEMUIOVector* qiov, + const int64_t offset, const int64_t bytes, + const uint64_t chunk_size) { - assert(!(qiov->size % sccl->chunk_size)); + printf("\t\tread chunk (chunk size: 0x%lx)\n", chunk_size); + int64_t size_read = 0; + int64_t size_to_read; + void* data_position; + int64_t offset_begin_aligned, offset_begin_remainder, offset_end_aligned, + offset_end_remainder, offset_aligned_size, nb_middle_chunks; + + // chunk size should be a power of 2 + assert(IS_POWER_OF_TWO(chunk_size)); + + offset_begin_aligned = ROUND_DOWN(offset, chunk_size); + offset_begin_remainder = offset % chunk_size; + + // number of chunks that can be read without alignment issues + nb_middle_chunks = + (ROUND_DOWN(offset + bytes, chunk_size) - ROUND_UP(offset, chunk_size)) / chunk_size; + + offset_end_aligned = ROUND_DOWN(offset + bytes, chunk_size); + offset_end_remainder = (offset + bytes) % chunk_size; + + // total size effectively reserved in the cache buffer + offset_aligned_size = + ROUND_UP(offset + bytes, chunk_size) - offset_begin_aligned; + + assert((offset_aligned_size % chunk_size) == 0); // aligned size should be... aligned + + // Handle unaligned start + printf("A\n"); + if (offset_begin_remainder) { + size_to_read = + MIN((offset_begin_aligned + chunk_size) - offset, bytes); + + printf("\t[unaligned begin] Read chunk @addr 0x%lx (size %ld)\n", offset_end_aligned, size_to_read); + + data_position = + get_read_chunk(sccd, offset_begin_aligned); + + if (data_position) { + printf("[read] cache hit!\n"); + qemu_iovec_from_buf(qiov, size_read, + data_position + offset_begin_remainder, + size_to_read); + } + + size_read += size_to_read; + + if (size_read == bytes) { + return; + } + } + + printf("B\n"); + // write every chunk until (potentially) unaligned end. + for (int64_t i = 0; i < nb_middle_chunks; ++i) { + data_position = + get_read_chunk(sccd, offset + size_read); + + printf("\tRead chunk @addr 0x%lx (size %ld)\n", offset + size_read, chunk_size); - SyxCowCacheDevice* cache_entry = g_hash_table_lookup( - sccl->cow_cache_devices, GINT_TO_POINTER(blk_name_hash(blk))); + // Cache hit, we must update the qiov + if (data_position) { + // printf("cache hit!\n"); + qemu_iovec_from_buf(qiov, size_read, + data_position, + chunk_size); + } - // return early if nothing is registered - if (!cache_entry) { - return false; + size_read += chunk_size; } + printf("C. offset_end_remainder: %ld. offset + bytes = %ld\n", offset_end_remainder, offset + bytes); + // Handle unaligned end + if (offset_end_remainder) { + printf("READ REMINDER....\n"); + size_to_read = bytes - size_read; + assert(size_to_read == offset_end_remainder); + + printf("\tRead chunk @addr 0x%lx (size %ld)\n", offset_end_aligned, chunk_size); + + data_position = + get_read_chunk(sccd, offset_end_aligned); + + // Cache hit, we must update the qiov + if (data_position) { + printf("cache hit!\n"); + qemu_iovec_from_buf(qiov, size_read, + data_position, + size_to_read); + } + + size_read += size_to_read; + } + + printf("D\n"); + assert(size_read == bytes); +} + +static void coroutine_fn write_chunk_to_cache_layer(SyxCowCacheLayer* sccl, BdrvChild* child, const uint32_t id, + const QEMUIOVector* qiov, const int64_t offset, + const int64_t bytes) +{ + assert(id < sccl->blks->len); + SyxCowCacheDevice* cache_entry = + &g_array_index(sccl->blks, SyxCowCacheDevice, id); + assert(cache_entry && cache_entry->data); + + // write qiov to cached pages in current layer. + write_chunk_to_cache_layer_device(cache_entry, qiov, child, offset, bytes, + sccl->chunk_size); +} + +static void coroutine_fn read_chunk_from_cache_layer(SyxCowCacheLayer* sccl, const uint32_t id, + QEMUIOVector* qiov, const int64_t offset, + const int64_t bytes) +{ + assert(id < sccl->blks->len); + SyxCowCacheDevice* cache_entry = + &g_array_index(sccl->blks, SyxCowCacheDevice, id); assert(cache_entry && cache_entry->data); // try to read cached pages in current layer if something is registered. - return read_chunk_from_cache_layer_device(cache_entry, qiov, qiov_offset, - blk_offset); + read_chunk_from_cache_layer_device(cache_entry, qiov, offset, bytes, + sccl->chunk_size); } -// Returns false if could not write to current layer. -static bool write_to_cache_layer(SyxCowCacheLayer* sccl, BlockBackend* blk, - int64_t offset, int64_t bytes, - QEMUIOVector* qiov) + +static int syx_cow_cache_do_preadv(BlockDriverState* bs, int64_t offset, int64_t bytes, + QEMUIOVector* qiov, BdrvRequestFlags flags) { - if (qiov->size % sccl->chunk_size) { - // todo: determine if it is worth developing an unaligned access - // version. - printf("error: 0x%zx %% 0x%lx == 0x%lx\n", qiov->size, sccl->chunk_size, - qiov->size % sccl->chunk_size); - exit(1); + BDRVSyxCowCacheState* s = bs->opaque; + SyxCowCache* scc = syx_snapshot_current_scc(); + SyxCowCacheLayer* layer = QTAILQ_FIRST(&scc->layers); + size_t qiov_sz; + + printf("[%d] Read @addr 0x%lx -> 0x%lx\n", s->id, offset, offset + bytes); + for (int64_t i = ROUND_DOWN(offset, layer->chunk_size); i < ROUND_UP(offset + bytes, layer->chunk_size); i += layer->chunk_size) { + printf("[%d]\tReading @addr 0x%lx\n", s->id, i); } - SyxCowCacheDevice* cache_entry = g_hash_table_lookup( - sccl->cow_cache_devices, GINT_TO_POINTER(blk_name_hash(blk))); - - if (unlikely(!cache_entry)) { - cache_entry = g_new0(SyxCowCacheDevice, 1); - cache_entry->data = g_array_sized_new(false, false, sccl->chunk_size, - INITIAL_NB_CHUNKS_PER_DEVICE); - cache_entry->positions = - g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, NULL); - g_hash_table_insert(sccl->cow_cache_devices, - GINT_TO_POINTER(blk_name_hash(blk)), cache_entry); + + qiov_sz = iov_size(qiov->iov, qiov->niov); + assert(qiov_sz == qiov->size); + + assert(scc); + + // First read the backing block device normally. + bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); + + // Then fix the result with the chunks that have been written before. + // Start from the oldest layer, and go towards most recent layers + // use the real nb of bytes read, it could be lower than bytes with some + // blkdevs. + // A better strategy could be to go from top to bottom, and only read once for each sector. It is not + // so easy though, since unaligned start / end would require special treatment. For "full" chunks, this works. + QTAILQ_FOREACH_REVERSE(layer, &scc->layers, next) + { + read_chunk_from_cache_layer(layer, s->id, qiov, offset, bytes); } - assert(cache_entry && cache_entry->data); + return bytes; +} + +// ----- QEMU plug ----- +// TODO: cleanup, move in another directory + +static int syx_cow_cache_open(BlockDriverState* bs, QDict* options, int _flags, + Error** errp) +{ + BDRVSyxCowCacheState* state = bs->opaque; + SyxCowCache* current_cache = syx_snapshot_current_scc(); + char tmp[32]; - if (cache_entry->data->len + (qiov->size / sccl->chunk_size) > - sccl->max_nb_chunks) { - return false; + static int ctr = 0; + state->id = ctr++; + + assert(current_cache); + syx_cow_cache_add_blk(current_cache); + + // Open child bdrv (files only are supported atm) + int ret = bdrv_open_file_child(NULL, options, "file", bs, errp); + if (ret < 0) { + return ret; } - // write cached page - uint64_t blk_offset = offset; - size_t qiov_offset = 0; - for (; qiov_offset < qiov->size; - blk_offset += sccl->chunk_size, qiov_offset += sccl->chunk_size) { - write_chunk_to_cache_layer_device(cache_entry, qiov, qiov_offset, - blk_offset); + assert(bs->file); + assert(!strcmp(bs->file->bs->drv->format_name, "file")); + assert(sizeof(tmp) == sizeof(bs->node_name)); + + // exchange node names so that future references to the file falls back to + // our hook + pstrcpy(tmp, sizeof(bs->node_name), bs->node_name); + pstrcpy(bs->node_name, sizeof(bs->node_name), bs->file->bs->node_name); + pstrcpy(bs->file->bs->node_name, sizeof(bs->node_name), tmp); + + // child should never have 'write' or 'write_unchanged' permission + assert(!(bs->file->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))); + + // Reuse file parameters + bs->total_sectors = bs->file->bs->total_sectors; + bs->supported_read_flags = bs->file->bs->supported_read_flags; + bs->supported_write_flags = + BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + bs->supported_zero_flags = + BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + printf("\t[%d] linked to %s\n", state->id, bs->file->bs->filename); + + // debug, to remove + { + char clone_file[PATH_MAX + 32]; + Error* err = NULL; + sprintf(clone_file, "%s.clone", bs->file->bs->filename); + QDict *clone_options = qdict_new(); + qdict_put_str(clone_options, "driver", "file"); + qdict_put_str(clone_options, "filename", clone_file); + + BlockDriverState* clone = bdrv_open(NULL, NULL, clone_options, BDRV_O_RDWR | BDRV_O_NOSYX, &err); + if (clone == NULL) { + assert(false); + } + state->clone_bs = clone; } - return true; + return 0; } -void syx_cow_cache_read_entry(SyxCowCache* scc, BlockBackend* blk, - int64_t offset, int64_t bytes, QEMUIOVector* qiov, - size_t _qiov_offset, BdrvRequestFlags flags) -{ - SyxCowCacheLayer* layer; - uint64_t blk_offset = offset; - size_t qiov_offset = 0; - uint64_t chunk_size = 0; +static void syx_cow_cache_check_bs_equality(BlockDriverState* bs, int64_t offset, int64_t bytes, int64_t chunk_size) { + BDRVSyxCowCacheState* s = bs->opaque; + QEMUIOVector qiov_scc, qiov_clone; - // printf("[%s] Read 0x%zx bytes @addr %lx\n", blk_name(blk), qiov->size, - // offset); + char* buf_scc = malloc(bytes); + qemu_iovec_init_buf(&qiov_scc, buf_scc, bytes); - // First read the backing block device normally. - assert(blk_co_preadv(blk, offset, bytes, qiov, flags) >= 0); - - // Then fix the chunks that have been read from before. - if (!QTAILQ_EMPTY(&scc->layers)) { - for (; qiov_offset < qiov->size; - blk_offset += chunk_size, qiov_offset += chunk_size) { - QTAILQ_FOREACH(layer, &scc->layers, next) - { - chunk_size = layer->chunk_size; - if (read_chunk_from_cache_layer(layer, blk, qiov, qiov_offset, - blk_offset)) { - break; - } + char* buf_clone = malloc(bytes); + qemu_iovec_init_buf(&qiov_clone, buf_clone, bytes); + + printf("Comparison starts...\n"); + + // read scc + syx_cow_cache_do_preadv(bs, offset, bytes, &qiov_scc, 0); + + // read clone + s->clone_bs->drv->bdrv_co_preadv(s->clone_bs, offset, bytes, &qiov_clone, 0); + + // compare QIOVs + char* buf_scc_out = malloc(bytes); + qemu_iovec_to_buf(&qiov_scc, 0, buf_scc_out, bytes); + + char* buf_clone_out = malloc(bytes); + qemu_iovec_to_buf(&qiov_clone, 0, buf_clone_out, bytes); + + if (memcmp(buf_scc_out, buf_clone_out, bytes) != 0) { + for (int i = 0; i < bytes; ++i) { + if (buf_scc_out[i] != buf_clone_out[i]) { + printf("\t\tdifference on chunk 0x%lx\n", ROUND_DOWN(offset + i, chunk_size)); } } + + // assert(false && "Bug in the syx-cow-cache bdrv found."); + printf("Bug in the syx-cow-cache bdrv found.\n"); + while (true) { + sleep(1); + } } + + printf("Comparison successful.\n"); + + free(buf_scc); + free(buf_clone); + free(buf_scc_out); + free(buf_clone_out); } -bool syx_cow_cache_write_entry(SyxCowCache* scc, BlockBackend* blk, - int64_t offset, int64_t bytes, - QEMUIOVector* qiov, size_t qiov_offset, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +syx_cow_cache_co_preadv(BlockDriverState* bs, int64_t offset, int64_t bytes, + QEMUIOVector* qiov, BdrvRequestFlags flags) { - SyxCowCacheLayer* layer; + SyxCowCache* scc = syx_snapshot_current_scc(); + SyxCowCacheLayer* layer = QTAILQ_FIRST(&scc->layers); - // printf("[%s] Write 0x%zx bytes @addr %lx\n", blk_name(blk), qiov->size, - // offset); + syx_cow_cache_check_bs_equality(bs, offset, bytes, layer->chunk_size); + + syx_cow_cache_do_preadv(bs, offset, bytes, qiov, flags); + + // debug, to remove... + syx_cow_cache_check_bs_equality(bs, offset, bytes, layer->chunk_size); + + return bytes; +} + +static int coroutine_fn GRAPH_RDLOCK +syx_cow_cache_co_pwritev(BlockDriverState* bs, int64_t offset, int64_t bytes, + QEMUIOVector* qiov, BdrvRequestFlags flags) +{ + BDRVSyxCowCacheState* s = bs->opaque; + SyxCowCache* scc = syx_snapshot_current_scc(); + SyxCowCacheLayer* layer; layer = QTAILQ_FIRST(&scc->layers); - if (layer) { - assert(write_to_cache_layer(layer, blk, offset, bytes, qiov)); - return true; - } else { - return false; + assert(layer); + + for (int64_t i = ROUND_DOWN(offset, layer->chunk_size); i < ROUND_UP(offset + bytes, layer->chunk_size); i += layer->chunk_size) { + printf("[%d]\tWriting chunk @ 0x%lx\n", s->id, i); } + + syx_cow_cache_check_bs_equality(bs, offset, bytes, layer->chunk_size); + + s->clone_bs->drv->bdrv_co_pwritev(s->clone_bs, offset, bytes, qiov, flags); + + write_chunk_to_cache_layer(layer, bs->file, s->id, qiov, offset, bytes); + + syx_cow_cache_check_bs_equality(bs, offset, bytes, layer->chunk_size); + + return bytes; } + +static int coroutine_fn GRAPH_RDLOCK syx_cow_cache_co_pwrite_zeroes( + BlockDriverState* bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags) +{ + printf("WRITE ZEROES\n"); + abort(); + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + +static int coroutine_fn GRAPH_RDLOCK +syx_cow_cache_co_flush(BlockDriverState* bs) +{ + if (!bs->file) { + return 0; + } + + return bdrv_co_flush(bs->file->bs); +} + +static int coroutine_fn GRAPH_RDLOCK +syx_cow_cache_co_pdiscard(BlockDriverState* bs, int64_t offset, int64_t bytes) +{ + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + +static void GRAPH_RDLOCK +syx_cow_cache_child_perm(BlockDriverState* bs, BdrvChild* c, BdrvChildRole role, + BlockReopenQueue* reopen_queue, uint64_t perm, + uint64_t shared, uint64_t* nperm, uint64_t* nshared) +{ + assert(role & BDRV_CHILD_FILTERED); + + bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared); + + // We do not need 'write' and 'write_unchanged' permissions, the child is + // read-only anyway. + // *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED); +} + +static BlockDriver bdrv_syx_cow_cache = { + .format_name = "syx-cow-cache", + .instance_size = sizeof(BDRVSyxCowCacheState), + + .bdrv_open = syx_cow_cache_open, + // .bdrv_close = syx_cow_cache_close, + + .bdrv_co_preadv = syx_cow_cache_co_preadv, + .bdrv_co_pwritev = syx_cow_cache_co_pwritev, + .bdrv_co_pwrite_zeroes = syx_cow_cache_co_pwrite_zeroes, + .bdrv_co_pdiscard = syx_cow_cache_co_pdiscard, + .bdrv_co_flush = syx_cow_cache_co_flush, + + // .bdrv_co_preadv_snapshot = syx_cow_cache_co_preadv_snapshot, + // .bdrv_co_pdiscard_snapshot = syx_cow_cache_co_pdiscard_snapshot, + // .bdrv_co_snapshot_block_status = syx_cow_cache_co_snapshot_block_status, + + // .bdrv_refresh_filename = syx_cow_cache_refresh_filename, + + .bdrv_child_perm = syx_cow_cache_child_perm, + + .is_filter = true, +}; + +static void syx_cow_cache_init(void) { bdrv_register(&bdrv_syx_cow_cache); } + +block_init(syx_cow_cache_init); diff --git a/libafl/syx-snapshot/syx-snapshot.c b/libafl/syx-snapshot/syx-snapshot.c index 7eb4ad79e8a..9f4271c255e 100644 --- a/libafl/syx-snapshot/syx-snapshot.c +++ b/libafl/syx-snapshot/syx-snapshot.c @@ -144,7 +144,7 @@ struct rb_check_memory_args { uint64_t nb_inconsistent_pages; // OUT }; -void syx_snapshot_init(bool cached_bdrvs) +void syx_snapshot_init(bool use_syx_cow_cache) { uint64_t page_size = TARGET_PAGE_SIZE; @@ -153,18 +153,33 @@ void syx_snapshot_init(bool cached_bdrvs) syx_snapshot_state.tracked_snapshots = syx_snapshot_tracker_init(); - if (cached_bdrvs) { + if (use_syx_cow_cache) { syx_snapshot_state.before_fuzz_cache = syx_cow_cache_new(); - syx_cow_cache_push_layer(syx_snapshot_state.before_fuzz_cache, - SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE, - SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS); + syx_cow_cache_push(syx_snapshot_state.before_fuzz_cache, + SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE, + SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS); } syx_snapshot_state.is_enabled = false; } -SyxSnapshot* syx_snapshot_new(bool track, bool is_active_bdrv_cache, - DeviceSnapshotKind kind, char** devices) +bool syx_snapshot_use_scc(void) +{ + return syx_snapshot_state.before_fuzz_cache != NULL; + // return false; +} + +SyxCowCache* syx_snapshot_current_scc(void) +{ + if (syx_snapshot_state.active_snapshot) { + return syx_snapshot_state.active_snapshot->bdrvs_cow_cache; + } else { + return syx_snapshot_state.before_fuzz_cache; + } +} + +SyxSnapshot* syx_snapshot_new(bool track, DeviceSnapshotKind kind, + char** devices) { SyxSnapshot* snapshot = g_new0(SyxSnapshot, 1); @@ -173,19 +188,24 @@ SyxSnapshot* syx_snapshot_new(bool track, bool is_active_bdrv_cache, snapshot->rbs_dirty_list = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)g_hash_table_remove_all); - snapshot->bdrvs_cow_cache = syx_cow_cache_new(); - - if (is_active_bdrv_cache) { - syx_cow_cache_move(snapshot->bdrvs_cow_cache, - &syx_snapshot_state.before_fuzz_cache); - syx_snapshot_state.active_bdrv_cache_snapshot = snapshot; - } else { - syx_cow_cache_push_layer(snapshot->bdrvs_cow_cache, - SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE, - SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS); - } + snapshot->bdrvs_cow_cache = + syx_cow_cache_push(syx_snapshot_state.before_fuzz_cache, + SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE, + SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS); + + // if (is_active_bdrv_cache) { + // syx_cow_cache_move(snapshot->bdrvs_cow_cache, + // &syx_snapshot_state.before_fuzz_cache); + // current_layer = snapshot->bdrvs_cow_cache->layers; + // syx_snapshot_state.active_bdrv_cache_snapshot = snapshot; + // } else { + // current_layer = syx_cow_cache_push_layer(snapshot->bdrvs_cow_cache, + // SYX_SNAPSHOT_COW_CACHE_DEFAULT_CHUNK_SIZE, + // SYX_SNAPSHOT_COW_CACHE_DEFAULT_MAX_BLOCKS); + // } if (track) { + syx_snapshot_state.active_snapshot = snapshot; syx_snapshot_track(&syx_snapshot_state.tracked_snapshots, snapshot); } @@ -521,13 +541,16 @@ static inline void syx_snapshot_dirty_list_add_internal(RAMBlock* rb, } } -bool syx_snapshot_is_enabled(void) { return syx_snapshot_state.is_enabled; } +bool syx_snapshot_is_enabled(void) +{ + // return syx_snapshot_state.is_enabled; + return true; +} /* // TODO: Check if using this method is better for performances. // The implementation is pretty bad, it would be nice to store host addr -directly for -// the memcopy happening later on. +// directly for the memcopy happening later on. __attribute__((target("no-3dnow,no-sse,no-mmx"),no_caller_saved_registers)) void syx_snapshot_dirty_list_add_tcg_target(uint64_t dummy, void* host_addr) { // early check to know whether we should log the page access or not @@ -742,46 +765,3 @@ void syx_snapshot_root_restore(SyxSnapshot* snapshot) bql_unlock(); } } - -bool syx_snapshot_cow_cache_read_entry(BlockBackend* blk, int64_t offset, - int64_t bytes, QEMUIOVector* qiov, - size_t qiov_offset, - BdrvRequestFlags flags) -{ - if (!syx_snapshot_state.active_bdrv_cache_snapshot) { - if (syx_snapshot_state.before_fuzz_cache) { - syx_cow_cache_read_entry(syx_snapshot_state.before_fuzz_cache, blk, - offset, bytes, qiov, qiov_offset, flags); - return true; - } - - return false; - } else { - syx_cow_cache_read_entry( - syx_snapshot_state.active_bdrv_cache_snapshot->bdrvs_cow_cache, blk, - offset, bytes, qiov, qiov_offset, flags); - return true; - } -} - -bool syx_snapshot_cow_cache_write_entry(BlockBackend* blk, int64_t offset, - int64_t bytes, QEMUIOVector* qiov, - size_t qiov_offset, - BdrvRequestFlags flags) -{ - if (!syx_snapshot_state.active_bdrv_cache_snapshot) { - if (syx_snapshot_state.before_fuzz_cache) { - assert(syx_cow_cache_write_entry( - syx_snapshot_state.before_fuzz_cache, blk, offset, bytes, qiov, - qiov_offset, flags)); - return true; - } - - return false; - } else { - assert(syx_cow_cache_write_entry( - syx_snapshot_state.active_bdrv_cache_snapshot->bdrvs_cow_cache, blk, - offset, bytes, qiov, qiov_offset, flags)); - return true; - } -} diff --git a/libafl/utils.c b/libafl/utils.c index 427b0a44d5d..1ba49db6a4f 100644 --- a/libafl/utils.c +++ b/libafl/utils.c @@ -1,7 +1,28 @@ #include "qemu/osdep.h" #include "libafl/utils.h" +#include + +#define MAX_NB_ADDRESSES 32 uintptr_t libafl_qemu_host_page_size(void) { return qemu_real_host_page_size(); } + +void libafl_qemu_backtrace(void) +{ + void* addresses[MAX_NB_ADDRESSES] = {0}; + + int nb_addresses = backtrace(addresses, MAX_NB_ADDRESSES); + char** symbols = backtrace_symbols(addresses, nb_addresses); + + for (int i = 0; i < nb_addresses; ++i) { + fprintf(stderr, "[%p] %s]\n", addresses[i], symbols[i]); + } + + if (nb_addresses == MAX_NB_ADDRESSES) { + fprintf(stderr, "... and continues...\n"); + } + + free(symbols); +} \ No newline at end of file diff --git a/meson.build b/meson.build index 7e7790ea27b..61ff63deded 100644 --- a/meson.build +++ b/meson.build @@ -3433,6 +3433,9 @@ util_ss = ss.source_set() qtest_module_ss = ss.source_set() tcg_module_ss = ss.source_set() +# libafl module +libafl_ss = ss.source_set() + modules = {} target_modules = {} hw_arch = {} @@ -3930,9 +3933,22 @@ foreach target : target_dirs arch_srcs += target_specific.sources() arch_deps += target_specific.dependencies() + libafl_ss = libafl_ss.apply(config_target, strict: false) + liblibafl = static_library('libafl-' + target, + sources: libafl_ss.sources() + genh, + dependencies: arch_deps, + objects: objects, + include_directories: target_inc, + c_args: c_args, + build_by_default: false, + name_suffix: 'fa', + pic: 'AS_SHARED_LIB' in config_host) + libafl = declare_dependency(link_whole: [liblibafl], + link_args: '@libafl.syms') + lib = static_library('qemu-' + target, sources: arch_srcs + genh, - dependencies: arch_deps, + dependencies: arch_deps + libafl, objects: objects, include_directories: target_inc, c_args: c_args, diff --git a/qapi/block-core.json b/qapi/block-core.json index 4b18e01b859..158b04de373 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3189,6 +3189,7 @@ 'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs', 'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg', 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster', + 'syx-cow-cache', {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, 'http', 'https', @@ -3613,6 +3614,22 @@ '*encrypt': 'BlockdevQcow2Encryption', '*data-file': 'BlockdevRef' } } + +## +# @BlockdevOptionsSyxCowCache: +# +# Driver specific block device options for Syx Cow Cache. It is intended +# to be used as a wrapper around other devices transparently. This should +# never be invoked directly by a user. +# +# Since: 9.0.2 +## +{ + 'struct': 'BlockdevOptionsSyxCowCache', + 'base': 'BlockdevOptionsGenericFormat', + 'data': { } +} + ## # @SshHostKeyCheckMode: # @@ -4734,6 +4751,7 @@ 'if': 'CONFIG_REPLICATION' }, 'snapshot-access': 'BlockdevOptionsGenericFormat', 'ssh': 'BlockdevOptionsSsh', + 'syx-cow-cache': 'BlockdevOptionsSyxCowCache', 'throttle': 'BlockdevOptionsThrottle', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', diff --git a/system/vl.c b/system/vl.c index c6442229824..b70a97b3104 100644 --- a/system/vl.c +++ b/system/vl.c @@ -134,6 +134,8 @@ #include "qemu/guest-random.h" #include "qemu/keyval.h" +#include "libafl/syx-snapshot/syx-cow-cache.h" + #define MAX_VIRTIO_CONSOLES 1 typedef struct BlockdevOptionsQueueEntry { @@ -3668,7 +3670,9 @@ void qemu_init(int argc, char **argv) qemu_disable_default_devices(); qemu_setup_display(); qemu_create_default_devices(); + syx_cow_cache_check_files_ro(); qemu_create_early_backends(); + syx_cow_cache_check_files_ro(); qemu_apply_legacy_machine_options(machine_opts_dict); qemu_apply_machine_options(machine_opts_dict); @@ -3710,6 +3714,7 @@ void qemu_init(int argc, char **argv) * over memory-backend-file objects). */ qemu_create_late_backends(); + syx_cow_cache_check_files_ro(); phase_advance(PHASE_LATE_BACKENDS_CREATED); /* diff --git a/util/iov.c b/util/iov.c index 7e73948f5e3..980e4c5e464 100644 --- a/util/iov.c +++ b/util/iov.c @@ -481,7 +481,7 @@ void qemu_iovec_reset(QEMUIOVector *qiov) qiov->size = 0; } -size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, +size_t qemu_iovec_to_buf(const QEMUIOVector *qiov, size_t offset, void *buf, size_t bytes) { return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);