From 7d738a7f4be4119c53753a4f395f769206a6f322 Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Thu, 31 Aug 2023 18:07:43 +0300 Subject: [PATCH 1/2] Store and fetch relation keys from fork --- src/access/pg_tde_tdemap.c | 179 ++++++++++++++++++++++++++--- src/access/pg_tdeam.c | 4 +- src/access/pg_tdeam_handler.c | 14 +-- src/encryption/enc_tuple.c | 11 +- src/include/access/pg_tde_tdemap.h | 43 ++++++- src/include/encryption/enc_tuple.h | 6 +- 6 files changed, 224 insertions(+), 33 deletions(-) diff --git a/src/access/pg_tde_tdemap.c b/src/access/pg_tde_tdemap.c index ac66a6a7..c4ad3934 100644 --- a/src/access/pg_tde_tdemap.c +++ b/src/access/pg_tde_tdemap.c @@ -10,11 +10,19 @@ *------------------------------------------------------------------------- */ +#define TDE_FORK_DEBUG 1 + #include "postgres.h" #include "access/pg_tde_tdemap.h" #include "transam/pg_tde_xact_handler.h" #include "storage/fd.h" #include "utils/wait_event.h" +#include "utils/memutils.h" + +#include "access/pg_tde_tdemap.h" + +#include +#include /* * Creates a relation fork file relfilenode.tde that contains the @@ -23,35 +31,106 @@ void pg_tde_create_key_fork(const RelFileLocator *newrlocator, Relation rel) { - char *rel_file_path; - char *key_file_path; - File file = -1; - char enc_key[256]; /* Dummy key */ + /* TODO: should be a user defined */ + static const char *MasterKeyName = "master-key"; + + char *rel_file_path; + char *key_file_path; + File file = -1; + InternalKey int_key = {0}; + RelKeysData *data; + size_t sz; /* We get a relation name for MAIN fork and manually append the * .tde postfix to the file name */ rel_file_path = relpathperm(*newrlocator, MAIN_FORKNUM); - key_file_path = psprintf("%s.tde", rel_file_path); + key_file_path = psprintf("%s."TDE_FORK_EXT, rel_file_path); pfree(rel_file_path); file = PathNameOpenFile(key_file_path, O_RDWR | O_CREAT | PG_BINARY); if (file < 0) - { ereport(FATAL, - (errcode_for_file_access(), - errmsg("could not open tde key file %s", key_file_path))); - } - /* TODO: - * For now just write a dummy data to the file. We will write the actual - * key later. - */ - snprintf(enc_key, sizeof(enc_key), "Percona TDE Dummy key for relation:%s", RelationGetRelationName(rel)); - if (FileWrite(file, enc_key, sizeof(enc_key), - 0, WAIT_EVENT_DATA_FILE_WRITE) != sizeof(enc_key)) - ereport(FATAL, (errcode_for_file_access(), - errmsg("Could not write key data to file: %s", - key_file_path))); + (errcode_for_file_access(), + errmsg("could not open tde key file \"%s\": %m", + key_file_path))); + + + if (!RAND_bytes(int_key.key, INTERNAL_KEY_LEN)) + ereport(FATAL, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not generate internal key for relation \"%s\": %s", + RelationGetRelationName(rel), ERR_error_string(ERR_get_error(), NULL)))); + +#if TDE_FORK_DEBUG + ereport(DEBUG2, + (errmsg("internal_key: %s", tde_sprint_key(&int_key)))); +#endif + + data = (RelKeysData *) palloc(SizeOfRelKeysData(2)); + + strcpy(data->master_key_name, MasterKeyName); + data->internal_key[0] = int_key; + data->internal_keys_len = 1; + + sz = SizeOfRelKeysData(data->internal_keys_len); + if (FileWrite(file, data, sz, 0, WAIT_EVENT_DATA_FILE_WRITE) != sz) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not write key data to file \"%s\": %m", + key_file_path))); + + pfree(key_file_path); + pfree(data); + FileClose(file); +} + +/* + * Reads tde keys for the relatoin from a fork file. + */ +RelKeysData * +pg_tde_get_keys_from_fork(const RelFileLocator *rlocator) +{ + char *rel_file_path; + char *key_file_path; + File file = -1; + Size sz; + int nbytes; + RelKeysData *keys; + + rel_file_path = relpathperm(*rlocator, MAIN_FORKNUM); + key_file_path = psprintf("%s."TDE_FORK_EXT, rel_file_path); + pfree(rel_file_path); + + file = PathNameOpenFile(key_file_path, O_RDONLY | PG_BINARY); + if (file < 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not open tde key file \"%s\": %m", + key_file_path))); + + + sz = (Size) FileSize(file); + keys = (RelKeysData *) MemoryContextAlloc(TopMemoryContext, sz); + + nbytes = FileRead(file, keys, sz, 0, WAIT_EVENT_DATA_FILE_READ); + if (nbytes < 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not read key data file \"%s\": %m", + key_file_path))); + else if (nbytes < SizeOfRelKeysData(1) || + (nbytes - SizeOfRelKeysDataHeader) % sizeof(InternalKey) != 0) + ereport(FATAL, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted key data in file \"%s\"", + key_file_path))); + +#if TDE_FORK_DEBUG + for (Size i = 0; i < keys->internal_keys_len; i++) + ereport(DEBUG2, + (errmsg("fork file keys: [%lu] %s: %s", i+1, keys->master_key_name, tde_sprint_key(&keys->internal_key[i])))); +#endif /* Register the file for delete in case transaction Aborts */ RegisterFileForDeletion(key_file_path, false); @@ -59,4 +138,66 @@ pg_tde_create_key_fork(const RelFileLocator *newrlocator, Relation rel) pfree(key_file_path); /* For now just close the key file.*/ FileClose(file); + + return keys; +} + +/* Head of the keys cache (linked list) */ +RelKeys *tde_rel_keys_map = NULL; + +/* + * Returns TDE keys for a given relation. + * First it looks in a cache. If nothing found in the cache, it reads data from + * the tde fork file and populates cache. + */ +RelKeysData * +GetRelationKeys(Relation rel) +{ + RelKeys *curr; + RelKeys *prev; + RelKeys *new; + RelKeysData *keys; + + Oid rel_id = RelationGetRelid(rel); + for (curr = tde_rel_keys_map; curr != NULL; curr = curr->next) + { + if (curr->rel_id == rel_id) { +#if TDE_FORK_DEBUG + ereport(DEBUG2, + (errmsg("TDE: cache hit, \"%s\" %s | rel %s (%d)", + curr->keys->master_key_name, + tde_sprint_key(&curr->keys->internal_key[0]), + RelationGetRelationName(rel), + rel_id))); +#endif + return curr->keys; + } + prev = curr; + } + + keys = pg_tde_get_keys_from_fork(&rel->rd_locator); + new = (RelKeys *) MemoryContextAlloc(TopMemoryContext, sizeof(RelKeys)); + new->rel_id = rel_id; + new->keys = keys; + + if (tde_rel_keys_map == NULL) + tde_rel_keys_map = new; + else + prev->next = new; + + return keys; +} + +const char * +tde_sprint_key(InternalKey *k) +{ + static char buf[256]; + int i; + + for (i = 0; i < sizeof(k->key); i++) + sprintf(buf+i, "%02X", k->key[i]); + + sprintf(buf+i, "[%lu, %lu]", k->start_loc, k->end_loc); + + return buf; } diff --git a/src/access/pg_tdeam.c b/src/access/pg_tdeam.c index e1a264dc..aef070a1 100644 --- a/src/access/pg_tdeam.c +++ b/src/access/pg_tdeam.c @@ -1168,7 +1168,7 @@ pg_tde_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot pgstat_count_pg_tde_getnext(scan->rs_base.rs_rd); - PGTdeExecStoreBufferHeapTuple(&scan->rs_ctup, slot, + PGTdeExecStoreBufferHeapTuple(sscan->rs_rd, &scan->rs_ctup, slot, scan->rs_cbuf); return true; } @@ -1316,7 +1316,7 @@ pg_tde_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, */ pgstat_count_pg_tde_getnext(scan->rs_base.rs_rd); - PGTdeExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf); + PGTdeExecStoreBufferHeapTuple(sscan->rs_rd, &scan->rs_ctup, slot, scan->rs_cbuf); return true; } diff --git a/src/access/pg_tdeam_handler.c b/src/access/pg_tdeam_handler.c index 58d795db..23a6514b 100644 --- a/src/access/pg_tdeam_handler.c +++ b/src/access/pg_tdeam_handler.c @@ -169,7 +169,7 @@ pg_tdeam_index_fetch_tuple(struct IndexFetchTableData *scan, *call_again = !IsMVCCSnapshot(snapshot); slot->tts_tableOid = RelationGetRelid(scan->rel); - PGTdeExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + PGTdeExecStoreBufferHeapTuple(scan->rel, &bslot->base.tupdata, slot, hscan->xs_cbuf); } else { @@ -201,7 +201,7 @@ pg_tdeam_fetch_row_version(Relation relation, if (pg_tde_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false)) { /* store in slot, transferring existing pin */ - PGTdeExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer); + PGTdeExecStorePinnedBufferHeapTuple(relation, &bslot->base.tupdata, slot, buffer); slot->tts_tableOid = RelationGetRelid(relation); return true; @@ -575,7 +575,7 @@ pg_tdeam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, tuple->t_tableOid = slot->tts_tableOid; /* store in slot, transferring existing pin */ - PGTdeExecStorePinnedBufferHeapTuple(tuple, slot, buffer); + PGTdeExecStorePinnedBufferHeapTuple(relation, tuple, slot, buffer); return result; } @@ -1165,7 +1165,7 @@ pg_tdeam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, if (sample_it) { - PGTdeExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf); + PGTdeExecStoreBufferHeapTuple(scan->rs_rd, targtuple, slot, hscan->rs_cbuf); hscan->rs_cindex++; /* note that we leave the buffer locked here! */ @@ -1645,7 +1645,7 @@ pg_tdeam_index_build_range_scan(Relation heapRelation, MemoryContextReset(econtext->ecxt_per_tuple_memory); /* Set up for predicate or expression evaluation */ - PGTdeExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf); + PGTdeExecStoreBufferHeapTuple(heapRelation, heapTuple, slot, hscan->rs_cbuf); /* * In a partial index, discard tuples that don't satisfy the @@ -2279,7 +2279,7 @@ pg_tdeam_scan_bitmap_next_tuple(TableScanDesc scan, * Set up the result slot to point to this tuple. Note that the slot * acquires a pin on the buffer. */ - PGTdeExecStoreBufferHeapTuple(&hscan->rs_ctup, + PGTdeExecStoreBufferHeapTuple(scan->rs_rd, &hscan->rs_ctup, slot, hscan->rs_cbuf); @@ -2433,7 +2433,7 @@ pg_tdeam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, if (!pagemode) LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); - PGTdeExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf); + PGTdeExecStoreBufferHeapTuple(scan->rs_rd, tuple, slot, hscan->rs_cbuf); /* Count successfully-fetched tuples as heap fetches */ pgstat_count_pg_tde_getnext(scan->rs_rd); diff --git a/src/encryption/enc_tuple.c b/src/encryption/enc_tuple.c index e006f58c..1d0abe4a 100644 --- a/src/encryption/enc_tuple.c +++ b/src/encryption/enc_tuple.c @@ -4,6 +4,7 @@ #include "postgres.h" #include "utils/memutils.h" +#include "access/pg_tde_tdemap.h" #include "encryption/enc_tuple.h" #include "encryption/enc_aes.h" #include "storage/bufmgr.h" @@ -130,21 +131,27 @@ PGTdePageAddItemExtended(Oid oid, } TupleTableSlot * -PGTdeExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer) +PGTdeExecStoreBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer) { Page pageHeader; pageHeader = BufferGetPage(buffer); PGTdeDecryptTupData(BufferGetBlockNumber(buffer), pageHeader, tuple); + /* TODO: use the keys in approprate place */ + RelKeysData *keys = GetRelationKeys(rel); + return ExecStoreBufferHeapTuple(tuple, slot, buffer); } TupleTableSlot * -PGTdeExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer) +PGTdeExecStorePinnedBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer) { Page pageHeader; + /* TODO: use the keys in approprate place */ + RelKeysData *keys = GetRelationKeys(rel); + pageHeader = BufferGetPage(buffer); PGTdeDecryptTupData(BufferGetBlockNumber(buffer), pageHeader, tuple); diff --git a/src/include/access/pg_tde_tdemap.h b/src/include/access/pg_tde_tdemap.h index f8f53b97..4391e241 100644 --- a/src/include/access/pg_tde_tdemap.h +++ b/src/include/access/pg_tde_tdemap.h @@ -11,6 +11,47 @@ #include "utils/rel.h" #include "storage/relfilelocator.h" -extern void pg_tde_create_key_fork(const RelFileLocator *newrlocator, Relation rel); +#define TDE_FORK_EXT "tde" + +#define INTERNAL_KEY_LEN 16 +typedef struct InternalKey +{ + uint8 key[INTERNAL_KEY_LEN]; + /* start and end range of the key + * (start_loc == 0 && end_loc == 0) -> the key if for the whole file + */ + Size start_loc; + Size end_loc; +} InternalKey; + +#define MASTER_KEY_LEN 256 +typedef struct RelKeysData +{ + char master_key_name[MASTER_KEY_LEN]; + Size internal_keys_len; + InternalKey internal_key[FLEXIBLE_ARRAY_MEMBER]; +} RelKeysData; + +#define SizeOfRelKeysDataHeader offsetof(RelKeysData, internal_key) +#define SizeOfRelKeysData(keys_num) \ + (SizeOfRelKeysDataHeader + sizeof(InternalKey) * keys_num) +/* Relation keys cache. + * + * TODO: For now it is just a linked list. Data can only be added w/o any + * ability to remove or change it. Also concider usage of more efficient data + * struct (hash map) in the shared memory(?) - currently allocated in the + * TopMemoryContext of the process. + */ +typedef struct RelKeys +{ + Oid rel_id; + RelKeysData *keys; + struct RelKeys *next; +} RelKeys; + +extern void pg_tde_create_key_fork(const RelFileLocator *newrlocator, Relation rel); +extern RelKeysData *pg_tde_get_keys_from_fork(const RelFileLocator *rlocator); +extern RelKeysData *GetRelationKeys(Relation rel); +const char * tde_sprint_key(InternalKey *k); #endif /* PG_TDE_MAP_H */ \ No newline at end of file diff --git a/src/include/encryption/enc_tuple.h b/src/include/encryption/enc_tuple.h index 52e87555..c996d040 100644 --- a/src/include/encryption/enc_tuple.h +++ b/src/include/encryption/enc_tuple.h @@ -1,6 +1,8 @@ #pragma once +#include "utils/rel.h" + #include "storage/bufpage.h" #include "executor/tuptable.h" @@ -14,6 +16,6 @@ PGTdePageAddItemExtended(Oid oid, BlockNumber bn, Page page, /* Wrapper functions for reading decrypted tuple into a given slot */ TupleTableSlot * -PGTdeExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer); +PGTdeExecStoreBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer); TupleTableSlot * -PGTdeExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer); +PGTdeExecStorePinnedBufferHeapTuple(Relation rel, HeapTuple tuple, TupleTableSlot *slot, Buffer buffer); From e4fae93147c0dc3c2fdaf52b44293825667701d8 Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Fri, 1 Sep 2023 13:27:12 +0300 Subject: [PATCH 2/2] chores and comments --- src/access/pg_tde_tdemap.c | 6 ++++++ src/include/access/pg_tde_tdemap.h | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/access/pg_tde_tdemap.c b/src/access/pg_tde_tdemap.c index c4ad3934..efb25178 100644 --- a/src/access/pg_tde_tdemap.c +++ b/src/access/pg_tde_tdemap.c @@ -74,6 +74,9 @@ pg_tde_create_key_fork(const RelFileLocator *newrlocator, Relation rel) data->internal_keys_len = 1; sz = SizeOfRelKeysData(data->internal_keys_len); + /* + * TODO: internal key(s) should be encrypted + */ if (FileWrite(file, data, sz, 0, WAIT_EVENT_DATA_FILE_WRITE) != sz) ereport(FATAL, (errcode_for_file_access(), @@ -113,6 +116,9 @@ pg_tde_get_keys_from_fork(const RelFileLocator *rlocator) sz = (Size) FileSize(file); keys = (RelKeysData *) MemoryContextAlloc(TopMemoryContext, sz); + /* + * TODO: internal key(s) should be encrypted + */ nbytes = FileRead(file, keys, sz, 0, WAIT_EVENT_DATA_FILE_READ); if (nbytes < 0) ereport(FATAL, diff --git a/src/include/access/pg_tde_tdemap.h b/src/include/access/pg_tde_tdemap.h index 4391e241..6b5834ff 100644 --- a/src/include/access/pg_tde_tdemap.h +++ b/src/include/access/pg_tde_tdemap.h @@ -17,17 +17,17 @@ typedef struct InternalKey { uint8 key[INTERNAL_KEY_LEN]; - /* start and end range of the key - * (start_loc == 0 && end_loc == 0) -> the key if for the whole file + /* a start and end range of the key + * (start_loc == 0 && end_loc == 0) -> the key is for the whole file */ Size start_loc; Size end_loc; } InternalKey; -#define MASTER_KEY_LEN 256 +#define MASTER_KEY_NAME_LEN 256 typedef struct RelKeysData { - char master_key_name[MASTER_KEY_LEN]; + char master_key_name[MASTER_KEY_NAME_LEN]; Size internal_keys_len; InternalKey internal_key[FLEXIBLE_ARRAY_MEMBER]; } RelKeysData; @@ -39,7 +39,7 @@ typedef struct RelKeysData /* Relation keys cache. * * TODO: For now it is just a linked list. Data can only be added w/o any - * ability to remove or change it. Also concider usage of more efficient data + * ability to remove or change it. Also consider usage of more efficient data * struct (hash map) in the shared memory(?) - currently allocated in the * TopMemoryContext of the process. */