From ce56e9ec1b1c886755b263e3881e6e618cfba4cc Mon Sep 17 00:00:00 2001 From: Konstantin Kushnir Date: Sat, 20 Jul 2024 15:04:20 +0300 Subject: [PATCH] Refactor storage format (#43) --- ChangeLog | 3 + TODO.txt | 16 + configure | 2 +- configure.in | 2 +- cookfswriter/cookfswriter.tcl | 65 ++- generic/cookfs.h | 15 + generic/pageObj.c | 33 +- generic/pageObj.h | 6 + generic/pages.c | 680 ++++++++++++++---------- generic/pages.h | 3 +- generic/pagesAsync.c | 475 +++++++++++++++++ generic/pagesAsync.h | 20 + generic/pagesCmd.c | 19 +- generic/pagesCompr.c | 962 ++++++++-------------------------- generic/pagesCompr.h | 27 +- generic/pagesComprBrotli.c | 135 ++--- generic/pagesComprBrotli.h | 8 +- generic/pagesComprBz2.c | 142 ++--- generic/pagesComprBz2.h | 8 +- generic/pagesComprCustom.c | 120 +++++ generic/pagesComprCustom.h | 21 + generic/pagesComprLzma.c | 192 ++----- generic/pagesComprLzma.h | 8 +- generic/pagesComprZlib.c | 279 ++++------ generic/pagesComprZlib.h | 8 +- generic/pagesComprZstd.c | 144 ++--- generic/pagesComprZstd.h | 8 +- generic/pagesInt.h | 22 +- generic/pgindex.c | 360 +++++++++++++ generic/pgindex.h | 43 ++ generic/vfsCmd.c | 18 +- scripts/pages.tcl | 300 ++++++++--- tests/compression.test | 38 +- tests/crypto.test | 6 +- tests/pages.test | 42 +- tests/pagesAsync.test | 2 +- tests/vfs.test | 50 +- 37 files changed, 2451 insertions(+), 1831 deletions(-) create mode 100644 generic/pagesAsync.c create mode 100644 generic/pagesAsync.h create mode 100644 generic/pagesComprCustom.c create mode 100644 generic/pagesComprCustom.h create mode 100644 generic/pgindex.c create mode 100644 generic/pgindex.h diff --git a/ChangeLog b/ChangeLog index 14ac03d..672555f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +2024-07-20 Konstantin Kushnir + * Refactor storage format + 2024-07-08 Konstantin Kushnir * Add procedures for AES encryption/decryption * Rename crypt->crypto diff --git a/TODO.txt b/TODO.txt index fb673a0..f627e5d 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,2 +1,18 @@ * Add support for encrypted/password protected files * Consider mapping archive to memory to improve performance on readonly VFS +* Add ability to disable custom compression to reduce size footprint when it is not needed +* Add ability to disable async compression to reduce size footprint when it is not needed +* Add MD5 hash validation on load for pgindex data +* Add MD5 hash validation on load for fsindex data +* Add MD5 hash validation on load for all pages data +* Update bzip2 and use it as a submodule. Perhaps this will get rid of its compile-time warnings. + +== Examples + +* Add simple example of tclsh with cookfs. Compare with tclsh9+zipfs (size and +loading speed). This example will cover cookfs usage from C side. + +* Add an example of a Tcl archiver application that uses the cookfs format. +It will be like any other archiver and will support file +compression/decompression. This example will cover cookfs usage from Tcl side. + diff --git a/configure b/configure index 76d05e4..e164c04 100755 --- a/configure +++ b/configure @@ -8754,7 +8754,7 @@ if test ${USECPAGES} = yes; then COOKFS_PKGCONFIG_USECPAGES=1 COOKFS_PKGCONFIG_FEATURE_ASIDE=1 - vars="pageObj.c pages.c pagesCompr.c pagesComprZlib.c pagesCmd.c" + vars="pgindex.c pageObj.c pages.c pagesCompr.c pagesAsync.c pagesComprZlib.c pagesComprCustom.c pagesCmd.c" for i in $vars; do case $i in \$*) diff --git a/configure.in b/configure.in index 7c5e417..7337fba 100644 --- a/configure.in +++ b/configure.in @@ -143,7 +143,7 @@ if test ${USECPAGES} = yes; then AC_DEFINE(COOKFS_USECPAGES) COOKFS_PKGCONFIG_USECPAGES=1 COOKFS_PKGCONFIG_FEATURE_ASIDE=1 - TEA_ADD_SOURCES([pageObj.c pages.c pagesCompr.c pagesComprZlib.c pagesCmd.c]) + TEA_ADD_SOURCES([pgindex.c pageObj.c pages.c pagesCompr.c pagesAsync.c pagesComprZlib.c pagesComprCustom.c pagesCmd.c]) # enable bz2 files only if pages are handled using C if test ${USEBZ2} = yes; then diff --git a/cookfswriter/cookfswriter.tcl b/cookfswriter/cookfswriter.tcl index 2904699..e136fe4 100644 --- a/cookfswriter/cookfswriter.tcl +++ b/cookfswriter/cookfswriter.tcl @@ -6,6 +6,35 @@ namespace eval cookfs {} +proc cookfs::createArchivePageIndex {pagelist} { + set rc "" + + # add page count + append rc [binary format I [llength $pagelist]] + + set data_compression "" + set data_compressionLevel "" + set data_encryption "" + set data_sizeCompressed "" + set data_sizeUncompressed "" + set data_hash "" + + foreach page $pagelist { + append data_compression [binary format c 0] + append data_compressionLevel [binary format c 0] + append data_encryption [binary format c 0] + append data_sizeCompressed [binary format I [string length $page]] + append data_sizeUncompressed [binary format I [string length $page]] + append data_hash [binary format c16 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0}] + } + + append rc $data_compression $data_compressionLevel $data_encryption + append rc $data_sizeCompressed $data_sizeUncompressed $data_hash + + return $rc +} + + proc cookfs::createArchiveFileIndex {filelist} { set rc "" @@ -150,24 +179,34 @@ proc cookfs::createArchive {archivefile filelist {bootstrap ""}} { } fconfigure $fh -translation binary foreach page $pagelist { - puts -nonewline $fh \u0000$page + puts -nonewline $fh $page } - # add fake md5 indexes - foreach page $pagelist { - puts -nonewline $fh [binary format IIII 0 0 0 0] - } + set pgindexdata [createArchivePageIndex $pagelist] + puts -nonewline $fh $pgindexdata - # add page indexes - foreach page $pagelist { - puts -nonewline $fh [binary format I [expr {[string length $page] + 1}]] - } + set fsindexdata "CFS2.200[createArchiveFileIndex $fileindex]" + puts -nonewline $fh $fsindexdata + + # write archive footer + # base compression type + base compression level + encryption + puts -nonewline $fh [binary format ccc 0 0 0] + + # write pgindex info: + # compression + compression level + hash(16 bytes) + size compressed + size uncompressed + puts -nonewline $fh [binary format ccc16II \ + 0 0 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + [string length $pgindexdata] [string length $pgindexdata]] + + # write fsindex info: + # compression + compression level + hash(16 bytes) + size compressed + size uncompressed + puts -nonewline $fh [binary format ccc16II \ + 0 0 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + [string length $fsindexdata] [string length $fsindexdata]] - # TODO: add index - set indexdata "\u0000CFS2.200[createArchiveFileIndex $fileindex]" + # write signature + puts -nonewline $fh "CFS0003" - puts -nonewline $fh $indexdata - puts -nonewline $fh [binary format IIcca* [string length $indexdata] [llength $pagelist] 0 0 CFS0002] close $fh } diff --git a/generic/cookfs.h b/generic/cookfs.h index 019d0c0..1db1921 100644 --- a/generic/cookfs.h +++ b/generic/cookfs.h @@ -20,6 +20,7 @@ #include #ifdef COOKFS_INTERNAL_DEBUG + #ifndef __FUNCTION_NAME__ #ifdef _WIN32 // WINDOWS #define __FUNCTION_NAME__ __FUNCTION__ @@ -27,6 +28,20 @@ #define __FUNCTION_NAME__ __func__ #endif #endif + +// This is an experiment to print debug messages indented according to +// the current stack depth. The -funwind-tables compiler key must be used +// for the backtrace() function to work. +// +// This feature is not currently used, but may be used in the future. +// +// #include +// static inline int ___get_stack_depth() { +// void *buffer[200]; +// return backtrace(buffer, 200); +// } +// #define CookfsLog(a) {printf("%d ", ___get_stack_depth()); a; printf("\n"); fflush(stdout);} + // #define CookfsLog(a) {printf("[%p] ", (void *)Tcl_GetCurrentThread()); a; printf("\n"); fflush(stdout);} // #define CookfsLog2(a) {printf("[%p] ", (void *)Tcl_GetCurrentThread()); printf("%s: ", __FUNCTION_NAME__); a; printf("\n"); fflush(stdout);} #define CookfsLog(a) {a; printf("\n"); fflush(stdout);} diff --git a/generic/pageObj.c b/generic/pageObj.c index cb2337a..71286dc 100644 --- a/generic/pageObj.c +++ b/generic/pageObj.c @@ -12,13 +12,12 @@ void Cookfs_PageObjIncrRefCount(Cookfs_PageObj pg) { Cookfs_PageObjStruct *ps = (Cookfs_PageObjStruct *)(pg - sizeof(Cookfs_PageObjStruct)); - // CookfsLog(printf("Cookfs_PageObjIncrRefCount: %p", (void *)pg)); + // CookfsLog2(printf("%p (allocated at %p)", (void *)pg, (void *)ps)); #ifdef TCL_THREADS Tcl_MutexLock(&ps->mx); #endif /* TCL_THREADS */ ps->refCount++; - // CookfsLog(printf("Cookfs_PageObjIncrRefCount: %p - count:%d", - // (void *)pg, ps->refCount)); + // CookfsLog2(printf("%p - count:%d", (void *)pg, ps->refCount)); #ifdef TCL_THREADS Tcl_MutexUnlock(&ps->mx); #endif /* TCL_THREADS */ @@ -27,13 +26,16 @@ void Cookfs_PageObjIncrRefCount(Cookfs_PageObj pg) { void Cookfs_PageObjDecrRefCount(Cookfs_PageObj pg) { Cookfs_PageObjStruct *ps = (Cookfs_PageObjStruct *)(pg - sizeof(Cookfs_PageObjStruct)); - // CookfsLog(printf("Cookfs_PageObjDecrRefCount: release %p", (void *)pg)); + // CookfsLog2(printf("%p (allocated at %p)", (void *)pg, (void *)ps)); #ifdef TCL_THREADS Tcl_MutexLock(&ps->mx); #endif /* TCL_THREADS */ + // There should not be Cookfs_PageObjDecrRefCount() without + // a corresponding Cookfs_PageObjIncrRefCount() that was called before it. + // Throw an error if refcount is less than or equal to zero. + assert(ps->refCount > 0); ps->refCount--; - // CookfsLog(printf("Cookfs_PageObjDecrRefCount: %p - count:%d", - // (void *)pg, ps->refCount)); + // CookfsLog2(printf("%p - count:%d", (void *)pg, ps->refCount)); #ifdef TCL_THREADS Tcl_MutexUnlock(&ps->mx); #endif /* TCL_THREADS */ @@ -41,7 +43,7 @@ void Cookfs_PageObjDecrRefCount(Cookfs_PageObj pg) { #ifdef TCL_THREADS Tcl_MutexFinalize(&ps->mx); #endif /* TCL_THREADS */ - // CookfsLog(printf("Cookfs_PageObjDecrRefCount: release %p", (void *)pg)); + // CookfsLog2(printf("release %p", (void *)pg)); ckfree(ps); } } @@ -59,7 +61,7 @@ static Tcl_Size Cookfs_PageObjCalculateSize(Tcl_Size size) { } Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size) { - CookfsLog2(printf("enter...")); + // CookfsLog2(printf("enter...")); Tcl_Size bufferSize = Cookfs_PageObjCalculateSize(size); Cookfs_PageObj p = ckalloc(bufferSize + sizeof(Cookfs_PageObjStruct)); if (p != NULL) { @@ -75,13 +77,14 @@ Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size) { #endif /* COOKFS_USECCRYPTO */ p += sizeof(Cookfs_PageObjStruct); } - CookfsLog(printf("Cookfs_PageObjAlloc: return %p", (void *)p)); + CookfsLog(printf("Cookfs_PageObjAlloc: return %p (allocated at %p)", + (void *)p, (void *)(p - sizeof(Cookfs_PageObjStruct)))); return p; } -Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj) { - Tcl_Size size; - unsigned char *bytes = Tcl_GetByteArrayFromObj(obj, &size); +Cookfs_PageObj Cookfs_PageObjNewFromString(const unsigned char *bytes, + Tcl_Size size) +{ Cookfs_PageObj rc = Cookfs_PageObjAlloc(size); if (rc != NULL) { memcpy(rc, bytes, size); @@ -89,6 +92,12 @@ Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj) { return rc; } +Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj) { + Tcl_Size size; + const unsigned char *bytes = Tcl_GetByteArrayFromObj(obj, &size); + return Cookfs_PageObjNewFromString(bytes, size); +} + #ifdef COOKFS_USECCRYPTO Cookfs_PageObj Cookfs_PageObjNewFromByteArrayIV(Tcl_Obj *obj) { diff --git a/generic/pageObj.h b/generic/pageObj.h index 5260b14..46dd84c 100644 --- a/generic/pageObj.h +++ b/generic/pageObj.h @@ -44,11 +44,17 @@ void Cookfs_PageObjDecrRefCount(Cookfs_PageObj pg); (((Cookfs_PageObjStruct *)((Cookfs_PageObj)(p) - \ sizeof(Cookfs_PageObjStruct)))->effectiveSize) +#define Cookfs_PageObjSetSize(p,n) \ + (((Cookfs_PageObjStruct *)((Cookfs_PageObj)(p) - \ + sizeof(Cookfs_PageObjStruct)))->effectiveSize) = (n) + #define Cookfs_PageObjCopyAsByteArray(p) \ Tcl_NewByteArrayObj(p, Cookfs_PageObjSize(p)) Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size); Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj); +Cookfs_PageObj Cookfs_PageObjNewFromString(const unsigned char *bytes, + Tcl_Size size); #ifdef COOKFS_USECCRYPTO diff --git a/generic/pages.c b/generic/pages.c index 20df204..400fd3c 100644 --- a/generic/pages.c +++ b/generic/pages.c @@ -11,8 +11,31 @@ #include "pages.h" #include "pagesInt.h" #include "pagesCompr.h" - -#define COOKFS_SUFFIX_BYTES 17 +#include "pagesAsync.h" + +// 1 byte - base compression +// 1 byte - base compression level +// 1 byte - encryption +// 26 bytes - pgindex info (1 byte compression + 1 byte level + 16 bytes MD5 hash + 4 bytes size compressed + 4 bytes size uncompressed ) +// 26 bytes - fsindex info (1 byte compression + 1 byte level + 16 bytes MD5 hash + 4 bytes size compressed + 4 bytes size uncompressed ) +// 7 bytes - signature +// Total: 62 bytes +#define COOKFS_SUFFIX_BYTES (1 + 1 + 1 + 26 * 2 + COOKFS_SIGNATURE_LENGTH) +// Offsets +#define COOKFS_SUFFIX_OFFSET_BASE_COMPRESSION 0 +#define COOKFS_SUFFIX_OFFSET_BASE_LEVEL (COOKFS_SUFFIX_OFFSET_BASE_COMPRESSION + 1) +#define COOKFS_SUFFIX_OFFSET_ENCRYPTION (COOKFS_SUFFIX_OFFSET_BASE_LEVEL + 1) +#define COOKFS_SUFFIX_OFFSET_PGINDEX_COMPRESSION (COOKFS_SUFFIX_OFFSET_ENCRYPTION + 1) +#define COOKFS_SUFFIX_OFFSET_PGINDEX_LEVEL (COOKFS_SUFFIX_OFFSET_PGINDEX_COMPRESSION + 1) +#define COOKFS_SUFFIX_OFFSET_PGINDEX_HASH (COOKFS_SUFFIX_OFFSET_PGINDEX_LEVEL + 1) +#define COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_COMPR (COOKFS_SUFFIX_OFFSET_PGINDEX_HASH + 16) +#define COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_UNCOMPR (COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_COMPR + 4) +#define COOKFS_SUFFIX_OFFSET_FSINDEX_COMPRESSION (COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_UNCOMPR + 4) +#define COOKFS_SUFFIX_OFFSET_FSINDEX_LEVEL (COOKFS_SUFFIX_OFFSET_FSINDEX_COMPRESSION + 1) +#define COOKFS_SUFFIX_OFFSET_FSINDEX_HASH (COOKFS_SUFFIX_OFFSET_FSINDEX_LEVEL + 1) +#define COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_COMPR (COOKFS_SUFFIX_OFFSET_FSINDEX_HASH + 16) +#define COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_UNCOMPR (COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_COMPR + 4) +#define COOKFS_SUFFIX_OFFSET_SIGNATURE (COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_UNCOMPR + 4) // read by 512kb chunks #define COOKFS_SEARCH_STAMP_CHUNK 524288 @@ -25,7 +48,6 @@ static Cookfs_PageObj CookfsPagesPageGetInt(Cookfs_Pages *p, int index, Tcl_Obj **err); static void CookfsPagesPageCacheMoveToTop(Cookfs_Pages *p, int index); static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p, Tcl_Obj **err); -static void CookfsPagesPageExtendIfNeeded(Cookfs_Pages *p, int count); static void CookfsTruncateFileIfNeeded(Cookfs_Pages *p, Tcl_WideInt targetOffset); static Tcl_WideInt Cookfs_PageSearchStamp(Cookfs_Pages *p); static void Cookfs_PagesFree(Cookfs_Pages *p); @@ -126,7 +148,8 @@ void Cookfs_PagesLockExclusive(Cookfs_Pages *p) { } int Cookfs_PagesGetLength(Cookfs_Pages *p) { - return p->dataNumPages; + Cookfs_PagesWantRead(p); + return Cookfs_PgIndexGetLength(p->pagesIndex); } /* @@ -282,7 +305,8 @@ Cookfs_Pages *Cookfs_PagesGetHandle(Tcl_Interp *interp, const char *cmdName) { *---------------------------------------------------------------------- */ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, - int fileReadOnly, int fileCompression, int fileCompressionLevel, + int fileReadOnly, int baseCompression, int baseCompressionLevel, + int currentCompression, int currentCompressionLevel, char *fileSignature, int useFoffset, Tcl_WideInt foffset, int isAside, int asyncDecompressQueueSize, Tcl_Obj *compressCommand, Tcl_Obj *decompressCommand, @@ -330,21 +354,21 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, // Split the signature into 2 strings so we don't find that whole string // when searching for the signature memcpy(rc->fileSignature, "CFS", 3); - memcpy(rc->fileSignature + 3, "0002", 4); + memcpy(rc->fileSignature + 3, "0003", 4); } // Split the stamp into 2 strings so we don't find that whole string // when searching for the stamp memcpy(rc->fileStamp, "CFS", 3); - memcpy(rc->fileStamp + 3, "S002", 4); + memcpy(rc->fileStamp + 3, "S003", 4); /* initialize parameters */ rc->fileLastOp = COOKFS_LASTOP_UNKNOWN; - rc->fileCompression = fileCompression; - rc->fileCompressionLevel = fileCompressionLevel; - rc->dataNumPages = 0; - rc->dataPagesDataSize = 256; - rc->dataPagesSize = (int *) ckalloc(rc->dataPagesDataSize * sizeof(int)); - rc->dataPagesMD5 = (unsigned char *) ckalloc(rc->dataPagesDataSize * 16); + rc->baseCompression = baseCompression; + rc->baseCompressionLevel = baseCompressionLevel; + rc->currentCompression = currentCompression; + rc->currentCompressionLevel = currentCompressionLevel; + rc->encryption = COOKFS_ENCRYPT_NONE; + rc->pagesIndex = NULL; rc->dataAsidePages = NULL; rc->dataPagesIsAside = isAside; @@ -385,7 +409,7 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, CookfsLog(printf("Opening file %s as %s with compression %d level %d", Tcl_GetStringFromObj(fileName, NULL), (rc->fileReadOnly ? "rb" : "ab+"), - fileCompression, fileCompressionLevel)); + baseCompression, baseCompressionLevel)); /* open file for reading / writing */ CookfsLog(printf("Cookfs_PagesInit - Tcl_FSOpenFileChannel")) @@ -444,13 +468,10 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, } else { rc->isFirstWrite = 1; rc->dataInitialOffset = Tcl_Seek(rc->fileChannel, 0, SEEK_END); - rc->dataAllPagesSize = 0; - rc->dataNumPages = 0; rc->pagesUptodate = 0; rc->indexChanged = 1; rc->shouldTruncate = 1; } - CookfsLog(printf("Index not read!")) } else { rc->pagesUptodate = 1; @@ -458,15 +479,34 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, rc->shouldTruncate = 1; } - /* force compression since we want to use target compression anyway */ - if (!rc->fileReadOnly) { - rc->fileCompression = fileCompression; - rc->fileCompressionLevel = fileCompressionLevel; + if (rc->pagesIndex == NULL) { + CookfsLog2(printf("pgindex is not defined, initialize a new one")); + rc->pagesIndex = Cookfs_PgIndexInit(0); + } + + if (rc->baseCompression == -1 || rc->baseCompressionLevel == -1) { + // Cookfs_CompressionFromObj() returns the default compression + // type/level when NULL is passed as input compression name. + Cookfs_CompressionFromObj(NULL, NULL, &rc->baseCompression, + &rc->baseCompressionLevel); + CookfsLog2(printf("base compression is not defined, setting to" + " the default: compression: %d level %d", rc->baseCompression, + rc->baseCompressionLevel)); + } else { + CookfsLog2(printf("base compression is defined: compression: %d" + " level %d", rc->baseCompression, rc->baseCompressionLevel)); } - CookfsLog(printf("Opening file %s - compression %d level %d", - Tcl_GetStringFromObj(fileName, NULL), - rc->fileCompression, rc->fileCompressionLevel)); + if (rc->currentCompression == -1 || rc->currentCompressionLevel == -1) { + rc->currentCompression = rc->baseCompression; + rc->currentCompressionLevel = rc->baseCompressionLevel; + CookfsLog2(printf("current compression is not defined, setting to" + " the same values as base: compression: %d level %d", + rc->currentCompression, rc->currentCompressionLevel)); + } else { + CookfsLog2(printf("current compression is defined: compression: %d" + " level %d", rc->baseCompression, rc->baseCompressionLevel)); + } return rc; } @@ -497,9 +537,7 @@ Tcl_WideInt Cookfs_PagesClose(Cookfs_Pages *p) { CookfsLog(printf("Cookfs_PagesClose - Pages up to date = %d, Index changed = %d", p->pagesUptodate, p->indexChanged)) /* if changes were made, save them to disk */ if ((!p->pagesUptodate) || (p->indexChanged)) { - int indexSize = 0; unsigned char buf[COOKFS_SUFFIX_BYTES]; - Tcl_Obj *obj; /* ensure all async pages are written */ while(Cookfs_AsyncCompressWait(p, 1)) {}; @@ -510,51 +548,104 @@ Tcl_WideInt Cookfs_PagesClose(Cookfs_Pages *p) { // Add initial stamp if needed Cookfs_PageAddStamp(p, 0); - /* seek to proper position */ - Cookfs_SeekToPage(p, p->dataNumPages); - - if (p->dataNumPages > 0) { - unsigned char *bufSizes; - /* add MD5 information */ - obj = Tcl_NewByteArrayObj(p->dataPagesMD5, p->dataNumPages * 16); - Tcl_IncrRefCount(obj); - Tcl_WriteObj(p->fileChannel, obj); - Tcl_DecrRefCount(obj); - - /* add page size information */ - bufSizes = (unsigned char *) ckalloc(p->dataNumPages * 4); - Cookfs_Int2Binary(p->dataPagesSize, bufSizes, p->dataNumPages); - obj = Tcl_NewByteArrayObj(bufSizes, p->dataNumPages * 4); - Tcl_IncrRefCount(obj); - Tcl_WriteObj(p->fileChannel, obj); - Tcl_DecrRefCount(obj); - ckfree((void *) bufSizes); + // Fill in the basic data for the file system suffix + buf[COOKFS_SUFFIX_OFFSET_BASE_COMPRESSION] = p->baseCompression; + buf[COOKFS_SUFFIX_OFFSET_BASE_LEVEL] = p->baseCompressionLevel; + buf[COOKFS_SUFFIX_OFFSET_ENCRYPTION] = 0; + memcpy(&buf[COOKFS_SUFFIX_OFFSET_SIGNATURE], p->fileSignature, + COOKFS_SIGNATURE_LENGTH); + + // Make sure we use base compression and compression level for + // pgindex/fsindex data + p->currentCompression = p->baseCompression; + p->currentCompressionLevel = p->baseCompressionLevel; + + // First, we get a dump of pages index. Then we add the dump of + // the pages index and fsindex as additional pages to the pages index. + // This will allow us to use Cookfs_Write...() functions to write + // data and get the properties of the written data (compression, + // compression level, size of the written data) + + int indexSizeCompressed; + int indexSizeUncompressed; + + if (Cookfs_PagesGetLength(p) > 0) { + + CookfsLog2(printf("write pgindex data...")); + Cookfs_PageObj pgindexExportObj = Cookfs_PgIndexExport(p->pagesIndex); + indexSizeUncompressed = Cookfs_PageObjSize(pgindexExportObj); + + Cookfs_MD5(pgindexExportObj, indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_PGINDEX_HASH]); + + int pgindexIndex = Cookfs_PgIndexAddPage(p->pagesIndex, 0, 0, 0, + -1, indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_PGINDEX_HASH]); + + indexSizeCompressed = Cookfs_WritePageObj(p, pgindexIndex, + pgindexExportObj); + + Cookfs_PageObjBounceRefCount(pgindexExportObj); + + if (indexSizeCompressed < 0) { + Tcl_Panic("Unable to compress pgindex"); + } + + buf[COOKFS_SUFFIX_OFFSET_PGINDEX_COMPRESSION] = + Cookfs_PgIndexGetCompression(p->pagesIndex, pgindexIndex) & 0xFF; + buf[COOKFS_SUFFIX_OFFSET_PGINDEX_LEVEL] = + Cookfs_PgIndexGetCompression(p->pagesIndex, pgindexIndex) & 0xFF; + + } else { + CookfsLog2(printf("pgindex data is empty")); + indexSizeCompressed = 0; + indexSizeUncompressed = 0; } - /* write index */ + Cookfs_Int2Binary(&indexSizeCompressed, + &buf[COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_COMPR], 1); + Cookfs_Int2Binary(&indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_UNCOMPR], 1); + if (p->dataIndex != NULL) { - indexSize = Cookfs_WritePageObj(p, -1, p->dataIndex, NULL); - if (indexSize < 0) { - /* TODO: handle index writing issues better */ - Tcl_Panic("Unable to compress index"); + + CookfsLog2(printf("write fsindex data...")); + indexSizeUncompressed = Cookfs_PageObjSize(p->dataIndex); + + Cookfs_MD5(p->dataIndex, indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_FSINDEX_HASH]); + + int fsindexIndex = Cookfs_PgIndexAddPage(p->pagesIndex, 0, 0, 0, + -1, indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_FSINDEX_HASH]); + + indexSizeCompressed = Cookfs_WritePageObj(p, fsindexIndex, + p->dataIndex); + if (indexSizeCompressed < 0) { + Tcl_Panic("Unable to compress fsindex"); } + + buf[COOKFS_SUFFIX_OFFSET_FSINDEX_COMPRESSION] = + Cookfs_PgIndexGetCompression(p->pagesIndex, fsindexIndex) & 0xFF; + buf[COOKFS_SUFFIX_OFFSET_FSINDEX_LEVEL] = + Cookfs_PgIndexGetCompression(p->pagesIndex, fsindexIndex) & 0xFF; + + } else { + CookfsLog2(printf("fsindex data is empty")); + indexSizeCompressed = 0; + indexSizeUncompressed = 0; } - CookfsLog(printf("Cookfs_PagesClose - Offset write: %d", (int) Tcl_Seek(p->fileChannel, 0, SEEK_CUR))) + Cookfs_Int2Binary(&indexSizeCompressed, + &buf[COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_COMPR], 1); + Cookfs_Int2Binary(&indexSizeUncompressed, + &buf[COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_UNCOMPR], 1); - /* provide index size and number of pages */ - Cookfs_Int2Binary(&indexSize, buf, 1); - Cookfs_Int2Binary(&(p->dataNumPages), buf + 4, 1); + CookfsLog2(printf("offset write suffix: %" TCL_LL_MODIFIER "d", + Tcl_Tell(p->fileChannel))); - /* provide compression type and file signature */ - buf[8] = p->fileCompression; - buf[9] = p->fileCompressionLevel; - memcpy(buf + 10, p->fileSignature, 7); + Tcl_Write(p->fileChannel, (char *)buf, COOKFS_SUFFIX_BYTES); - obj = Tcl_NewByteArrayObj(buf, COOKFS_SUFFIX_BYTES); - Tcl_IncrRefCount(obj); - Tcl_WriteObj(p->fileChannel, obj); - Tcl_DecrRefCount(obj); p->foffset = Tcl_Tell(p->fileChannel); CookfsTruncateFileIfNeeded(p, p->foffset); @@ -674,9 +765,10 @@ void Cookfs_PagesFini(Cookfs_Pages *p) { } /* clean up pages data */ - CookfsLog(printf("Cleaning up pages MD5/size")) - ckfree((void *) p->dataPagesSize); - ckfree((void *) p->dataPagesMD5); + CookfsLog(printf("Cleaning up pages index")) + if (p->pagesIndex != NULL) { + Cookfs_PgIndexFini(p->pagesIndex); + } if (p->commandToken != NULL) { CookfsLog(printf("Cleaning tcl command")); @@ -960,7 +1052,6 @@ int Cookfs_PageAddRaw(Cookfs_Pages *p, unsigned char *bytes, int objLength, int idx; int dataSize; unsigned char md5sum[16]; - unsigned char *pageMD5 = p->dataPagesMD5; CookfsLog(printf("Cookfs_PageAdd: new page with [%d] bytes", objLength)) @@ -999,46 +1090,49 @@ int Cookfs_PageAddRaw(Cookfs_Pages *p, unsigned char *bytes, int objLength, } /* see if this entry already exists */ - CookfsLog(printf("Cookfs_PageAdd: Matching page (size=%d bytes)", objLength)) - for (idx = 0; idx < p->dataNumPages; idx++, pageMD5 += 16) { - if (memcmp(pageMD5, md5sum, 16) == 0) { - /* even if MD5 checksums are the same, we still need to validate contents of the page */ - Cookfs_PageObj otherPageData; - int isMatched = 0; - - CookfsLog(printf("Cookfs_PageAdd: Comparing page %d", idx)) - - /* use -1000 weight as it is temporary page and we don't really need it in cache */ - otherPageData = Cookfs_PageGet(p, idx, -1000, err); - // Do not increment refcount for otherPageData, Cookfs_PageGet() - // returns a page with refcount=1. - - /* fail in case when decompression is not available - * - * if page with same checksum was found, verify its contents as we - * do not rely on MD5 checksum - this avoids issue with MD5 collissions */ - if (otherPageData == NULL) { - CookfsLog(printf("Cookfs_PageAdd: Unable to verify page with same MD5 checksum")); - return -1; - } else { - if (Cookfs_PageObjSize(otherPageData) != objLength) { - CookfsLog(printf("Cookfs_PageAdd: the length doesn't match")) - } else if (memcmp(bytes, otherPageData, objLength) != 0) { - CookfsLog(printf("Cookfs_PageAdd: the data doesn't match")) - } else { - isMatched = 1; - } - Cookfs_PageObjDecrRefCount(otherPageData); - } + CookfsLog(printf("Cookfs_PageAdd: Matching page (size=%d bytes)", objLength)); + idx = 0; + while (Cookfs_PgIndexSearchByMD5(p->pagesIndex, md5sum, objLength, &idx)) { + /* even if MD5 checksums are the same, we still need to validate contents of the page */ + Cookfs_PageObj otherPageData; + int isMatched = 0; + + CookfsLog(printf("Cookfs_PageAdd: Comparing page %d", idx)) + + /* use -1000 weight as it is temporary page and we don't really need it in cache */ + otherPageData = Cookfs_PageGet(p, idx, -1000, err); + // Do not increment refcount for otherPageData, Cookfs_PageGet() + // returns a page with refcount=1. + + /* fail in case when decompression is not available + * + * if page with same checksum was found, verify its contents as we + * do not rely on MD5 checksum - this avoids issue with MD5 collissions */ + if (otherPageData == NULL) { + CookfsLog(printf("Cookfs_PageAdd: Unable to verify page with same MD5 checksum")); + return -1; + } else { + // We are sure we will go out of bounds here because + // Cookfs_PgIndexSearchByMD5() returns true only if the page + // matches not only MD5, but also if its size is equal to + // objLength. + if (memcmp(bytes, otherPageData, objLength) != 0) { + CookfsLog(printf("Cookfs_PageAdd: the data doesn't match")) + } else { + isMatched = 1; + } + Cookfs_PageObjDecrRefCount(otherPageData); + } - if (isMatched) { - CookfsLog(printf("Cookfs_PageAdd: Matched page (size=%d bytes) as %d", objLength, idx)) - if (p->dataPagesIsAside) { - idx |= COOKFS_PAGES_ASIDE; - } - return idx; - } - } + if (isMatched) { + CookfsLog(printf("Cookfs_PageAdd: Matched page (size=%d bytes) as %d", objLength, idx)) + if (p->dataPagesIsAside) { + idx |= COOKFS_PAGES_ASIDE; + } + return idx; + } + + idx++; } /* if this page has an aside page set up, ask it to add new page */ @@ -1057,20 +1151,12 @@ int Cookfs_PageAddRaw(Cookfs_Pages *p, unsigned char *bytes, int objLength, return -1; } - /* store index for new page, increment number of pages */ - idx = p->dataNumPages; - (p->dataNumPages)++; - - /* reallocate list of page offsets if exceeded */ - CookfsPagesPageExtendIfNeeded(p, p->dataNumPages); - - memcpy(p->dataPagesMD5 + (idx * 16), md5sum, 16); - - CookfsLog(printf("MD5sum is %08x%08x%08x%08x\n", ((int *) md5sum)[0], ((int *) md5sum)[1], ((int *) md5sum)[2], ((int *) md5sum)[3])) + // Real compression, compressionLevel and sizeUncompressed will be updated + // by Cookfs_WritePage() + idx = Cookfs_PgIndexAddPage(p->pagesIndex, 0, 0, 0, -1, objLength, md5sum); if (Cookfs_AsyncPageAdd(p, idx, bytes, objLength)) { p->pagesUptodate = 0; - p->dataPagesSize[idx] = -1; } else { dataSize = Cookfs_WritePage(p, idx, bytes, objLength, NULL); if (dataSize < 0) { @@ -1079,7 +1165,6 @@ int Cookfs_PageAddRaw(Cookfs_Pages *p, unsigned char *bytes, int objLength, return -1; } p->pagesUptodate = 0; - p->dataPagesSize[idx] = dataSize; } if (p->dataPagesIsAside) { @@ -1130,7 +1215,7 @@ Cookfs_PageObj Cookfs_PageGet(Cookfs_Pages *p, int index, int weight, CookfsLog(printf("Cookfs_PageGet: index [%d] with weight [%d]", index, weight)) - for (; preloadIndex < p->dataNumPages ; preloadIndex++) { + for (; preloadIndex < Cookfs_PagesGetLength(p) ; preloadIndex++) { if (!Cookfs_AsyncPagePreload(p, preloadIndex)) { break; } @@ -1147,7 +1232,7 @@ Cookfs_PageObj Cookfs_PageGet(Cookfs_Pages *p, int index, int weight, Cookfs_AsyncDecompressWaitIfLoading(p, index); - for (; preloadIndex < p->dataNumPages ; preloadIndex++) { + for (; preloadIndex < Cookfs_PagesGetLength(p) ; preloadIndex++) { if (!Cookfs_AsyncPagePreload(p, preloadIndex)) { break; } @@ -1162,6 +1247,7 @@ Cookfs_PageObj Cookfs_PageGet(Cookfs_Pages *p, int index, int weight, #endif /* TCL_THREADS */ if (rc != NULL) { + Cookfs_PageObjIncrRefCount(rc); CookfsLog(printf("Cookfs_PageGet: Returning from cache [%p]", (void *)rc)); goto done; } @@ -1181,9 +1267,6 @@ Cookfs_PageObj Cookfs_PageGet(Cookfs_Pages *p, int index, int weight, } done: - if (rc != NULL) { - Cookfs_PageObjIncrRefCount(rc); - } return rc; } @@ -1758,9 +1841,11 @@ void Cookfs_PagesSetCacheSize(Cookfs_Pages *p, int size) { */ Tcl_WideInt Cookfs_GetFilesize(Cookfs_Pages *p) { - Tcl_WideInt rc; Cookfs_PagesWantRead(p); - rc = Cookfs_PagesGetPageOffset(p, p->dataNumPages); + int pagesCount = Cookfs_PgIndexGetLength(p->pagesIndex); + CookfsLog2(printf("enter, total pages count: %d", pagesCount)); + Tcl_WideInt rc = Cookfs_PagesGetPageOffset(p, pagesCount); + CookfsLog2(printf("return %" TCL_LL_MODIFIER "d", rc)); return rc; } @@ -1832,9 +1917,9 @@ void Cookfs_PagesSetAlwaysCompress(Cookfs_Pages *p, int alwaysCompress) { int Cookfs_PagesGetCompression(Cookfs_Pages *p, int *fileCompressionLevel) { Cookfs_PagesWantRead(p); if (fileCompressionLevel != NULL) { - *fileCompressionLevel = p->fileCompressionLevel; + *fileCompressionLevel = p->currentCompressionLevel; } - return p->fileCompression; + return p->currentCompression; } @@ -1858,13 +1943,13 @@ void Cookfs_PagesSetCompression(Cookfs_Pages *p, int fileCompression, int fileCompressionLevel) { Cookfs_PagesWantWrite(p); - if (p->fileCompression != fileCompression || - p->fileCompressionLevel != fileCompressionLevel) + if (p->currentCompression != fileCompression || + p->currentCompressionLevel != fileCompressionLevel) { // ensure all async pages are written while(Cookfs_AsyncCompressWait(p, 1)) {}; - p->fileCompression = fileCompression; - p->fileCompressionLevel = fileCompressionLevel; + p->currentCompression = fileCompression; + p->currentCompressionLevel = fileCompressionLevel; } } @@ -1888,11 +1973,9 @@ void Cookfs_PagesSetCompression(Cookfs_Pages *p, int fileCompression, Tcl_WideInt Cookfs_PagesGetPageOffset(Cookfs_Pages *p, int idx) { Cookfs_PagesWantRead(p); - /* TODO: optimize by cache'ing each N-th entry and start from there */ - int i; Tcl_WideInt rc = p->dataInitialOffset; - for (i = 0; i < idx; i++) { - rc += p->dataPagesSize[i]; + if (idx != 0) { + rc += Cookfs_PgIndexGetStartOffset(p->pagesIndex, idx); } return rc; } @@ -1950,20 +2033,27 @@ static Cookfs_PageObj CookfsPagesPageGetInt(Cookfs_Pages *p, int index, } /* if index is larger than number of pages, fail */ - if (index >= p->dataNumPages) { - CookfsLog(printf("GetInt failed: %d >= %d", index, p->dataNumPages)) + if (index >= Cookfs_PagesGetLength(p)) { + CookfsLog(printf("GetInt failed: %d >= %d", index, Cookfs_PagesGetLength(p))); return NULL; } buffer = Cookfs_AsyncPageGet(p, index); if (buffer != NULL) { + Cookfs_PageObjIncrRefCount(buffer); + CookfsLog2(printf("return: result from Cookfs_AsyncPageGet()")); return buffer; } #ifdef TCL_THREADS Tcl_MutexLock(&p->mxIO); #endif /* TCL_THREADS */ - buffer = Cookfs_ReadPage(p, index, -1, 1, COOKFS_COMPRESSION_ANY, err); + buffer = Cookfs_ReadPage(p, index, + Cookfs_PgIndexGetCompression(p->pagesIndex, index), + Cookfs_PgIndexGetSizeCompressed(p->pagesIndex, index), + Cookfs_PgIndexGetSizeUncompressed(p->pagesIndex, index), + Cookfs_PgIndexGetHashMD5(p->pagesIndex, index), + 1, err); #ifdef TCL_THREADS Tcl_MutexUnlock(&p->mxIO); #endif /* TCL_THREADS */ @@ -2007,20 +2097,23 @@ static Cookfs_PageObj CookfsPagesPageGetInt(Cookfs_Pages *p, int index, */ static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p, Tcl_Obj **err) { - unsigned char *bytes = NULL; - int count = 0; - int i; - int pageCount = 0; - int indexLength = 0; - int pageCompression = 0; - int pageCompressionLevel = 0; - Tcl_WideInt fileSize = 0; + Tcl_WideInt seekOffset = 0; - Tcl_Obj *buffer = NULL; + unsigned char buf[COOKFS_SUFFIX_BYTES]; UNUSED(err); - CookfsLog(printf("CookfsReadIndex 0 - %d", p->useFoffset)) + CookfsLog2(printf("base offset is %d", p->useFoffset)); + + if (p->dataIndex != NULL) { + Cookfs_PageObjDecrRefCount(p->dataIndex); + p->dataIndex = NULL; + } + + if (p->pagesIndex != NULL) { + Cookfs_PgIndexFini(p->pagesIndex); + p->pagesIndex = NULL; + } /* seek to beginning of suffix */ if (p->useFoffset) { @@ -2038,10 +2131,11 @@ static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p, Tcl_Obj **err) { CookfsLog(printf("CookfsReadIndex lookup seekOffset = %d", ((int) seekOffset))) Tcl_Seek(p->fileChannel, seekOffset, SEEK_SET); byteObj = Tcl_NewObj(); + Tcl_IncrRefCount(byteObj); if (Tcl_ReadChars(p->fileChannel, byteObj, 65536, 0) > 0) { Tcl_Size size; - bytes = Tcl_GetByteArrayFromObj(byteObj, &size); - for (i = 0 ; i <= (size - COOKFS_SIGNATURE_LENGTH) ; i++) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(byteObj, &size); + for (int i = 0 ; i <= (size - COOKFS_SIGNATURE_LENGTH) ; i++) { if (bytes[i] == p->fileSignature[0]) { if (memcmp(bytes + i, p->fileSignature, COOKFS_SIGNATURE_LENGTH) == 0) { lastMatch = bytes + i; @@ -2055,11 +2149,15 @@ static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p, Tcl_Obj **err) { CookfsLog(printf("CookfsReadIndex lookup done seekOffset = %d", ((int) seekOffset))) } } - Tcl_IncrRefCount(byteObj); Tcl_DecrRefCount(byteObj); if (lastMatch == NULL) { p->foffset = Tcl_Seek(p->fileChannel, 0, SEEK_END); CookfsLog(printf("CookfsReadIndex lookup failed")) + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": signature not found", -1)); + } + return 0; } } if (seekOffset >= 0) { @@ -2074,184 +2172,186 @@ static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p, Tcl_Obj **err) { } return 0; } - fileSize = seekOffset + COOKFS_SUFFIX_BYTES; - CookfsLog(printf("Size=%d", ((int) fileSize))) - /* read 16 bytes from end of cookfs archive */ - buffer = Tcl_NewObj(); - Tcl_IncrRefCount(buffer); - count = Tcl_ReadChars(p->fileChannel, buffer, COOKFS_SUFFIX_BYTES, 0); + /* read suffix from end of cookfs archive */ + int count = Tcl_Read(p->fileChannel, (char *)buf, COOKFS_SUFFIX_BYTES); if (count != COOKFS_SUFFIX_BYTES) { - Tcl_DecrRefCount(buffer); - CookfsLog(printf("Failed to read entire index tail: %d / %d", count, COOKFS_SUFFIX_BYTES)) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": unable to read index suffix", -1)); - } - return 0; + CookfsLog(printf("Failed to read entire index tail: %d / %d", count, + COOKFS_SUFFIX_BYTES)); + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": unable to read index suffix", -1)); + } + return 0; } - bytes = Tcl_GetByteArrayFromObj(buffer, NULL); - if (memcmp(bytes + 10, p->fileSignature, COOKFS_SIGNATURE_LENGTH) != 0) { - Tcl_DecrRefCount(buffer); - CookfsLog(printf("Invalid file signature found")) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": invalid file signature", -1)); - } - return 0; + + if (memcmp(&buf[COOKFS_SUFFIX_OFFSET_SIGNATURE], p->fileSignature, + COOKFS_SIGNATURE_LENGTH) != 0) + { + CookfsLog(printf("Invalid file signature found")); + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": invalid file signature", -1)); + } + return 0; } - /* get default compression, index length and number of pages */ - pageCompression = bytes[8] & 0xff; - pageCompressionLevel = bytes[9] & 0xff; - p->fileCompression = pageCompression; - p->fileCompressionLevel = pageCompressionLevel; - Cookfs_Binary2Int(bytes, &indexLength, 1); - Cookfs_Binary2Int(bytes + 4, &pageCount, 1); - CookfsLog(printf("Pages=%d; compression=%d level=%d", pageCount, - pageCompression, pageCompressionLevel)) - Tcl_DecrRefCount(buffer); + /* get default compression and encryption */ + p->baseCompression = buf[COOKFS_SUFFIX_OFFSET_BASE_COMPRESSION] & 0xff; + p->baseCompressionLevel = buf[COOKFS_SUFFIX_OFFSET_BASE_LEVEL] & 0xff; + p->encryption = buf[COOKFS_SUFFIX_OFFSET_ENCRYPTION] & 0xff; - CookfsLog(printf("indexLength=%d pageCount=%d foffset=%d", indexLength, pageCount, p->useFoffset)) + /* get pgindex and fsindex sizes */ + int pgindexSizeCompressed; + Cookfs_Binary2Int(&buf[COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_COMPR], + &pgindexSizeCompressed, 1); - /* read files index */ + int fsindexSizeCompressed; + Cookfs_Binary2Int(&buf[COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_COMPR], + &fsindexSizeCompressed, 1); - /* seek to beginning of index, depending on if foffset was specified */ - if (Tcl_Seek(p->fileChannel, p->foffset, SEEK_SET) < 0) { - goto indexReadError; - } - if (Tcl_Seek(p->fileChannel, -COOKFS_SUFFIX_BYTES - indexLength, SEEK_CUR) < 0) { - goto indexReadError; + // Validate the sizes of pgindex and fsindex. We must have enough bytes + // in the file before the cookfs suffix. + if ((pgindexSizeCompressed + fsindexSizeCompressed + + COOKFS_SUFFIX_BYTES) > p->foffset) + { + CookfsLog(printf("there are enough bytes in the file, pgindex size:" + " %d, fsindex size: %d, suffix size: %d, suffix offset: %" + TCL_LL_MODIFIER "d", pgindexSizeCompressed, fsindexSizeCompressed, + (int)COOKFS_SUFFIX_BYTES, p->foffset)); + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": failed to read index", -1)); + } + return 0; } - CookfsLog(printf("IndexOffset Read = %d", (int) seekOffset)) - if (p->dataIndex != NULL) { - Cookfs_PageObjDecrRefCount(p->dataIndex); + if (pgindexSizeCompressed == 0 && fsindexSizeCompressed == 0) { + CookfsLog2(printf("both pgindex and fsindex are empty and skipped")); + goto skipFsindex; } - p->dataIndex = Cookfs_ReadPage(p, -1, indexLength, 1, COOKFS_COMPRESSION_ANY, NULL); + CookfsLog2(printf("try to seek to index data...")); - if (p->dataIndex != NULL) { - goto indexReadOk; + /* seek to beginning of index data */ + if (Tcl_Seek(p->fileChannel, -COOKFS_SUFFIX_BYTES - pgindexSizeCompressed + - fsindexSizeCompressed, SEEK_CUR) < 0) + { + CookfsLog2(printf("unable to seek to index data")); + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": unable to seek to index data", -1)); + } + return 0; } -indexReadError: - - CookfsLog(printf("Unable to read index")) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": unable to read index", -1)); + if (pgindexSizeCompressed == 0) { + CookfsLog2(printf("pgindex is empty and skipped")); + goto skipPgindex; } - return 0; -indexReadOk: + CookfsLog2(printf("read pgindex, size: %d", pgindexSizeCompressed)); - Cookfs_PageObjIncrRefCount(p->dataIndex); - /* read page MD5 checksums and pages */ - /* seek to beginning of data, depending on if foffset was specified */ - Tcl_Seek(p->fileChannel, p->foffset, SEEK_SET); - seekOffset = Tcl_Seek(p->fileChannel, -COOKFS_SUFFIX_BYTES - (pageCount * 20) - indexLength, SEEK_CUR); + int pgindexSizeUncompressed; + Cookfs_Binary2Int(&buf[COOKFS_SUFFIX_OFFSET_PGINDEX_SIZE_UNCOMPR], + &pgindexSizeUncompressed, 1); + int pgindexCompression = buf[COOKFS_SUFFIX_OFFSET_PGINDEX_COMPRESSION] + & 0xff; + unsigned char *pgindexHashMD5 = &buf[COOKFS_SUFFIX_OFFSET_PGINDEX_HASH]; - /* if seeking fails, we assume no suffix exists */ - if (seekOffset < 0) { - CookfsLog(printf("Unable to seek for reading page sizes")) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": page sizes not found", -1)); - } - return 0; + Cookfs_PageObj pgindexDataObj = Cookfs_ReadPage(p, -1, pgindexCompression, + pgindexSizeCompressed, pgindexSizeUncompressed, pgindexHashMD5, 1, err); + + if (pgindexDataObj == NULL) { + CookfsLog2(printf("unable to read or decompress pgindex")); + goto pgindexReadError; } - /* extend pages buffer if needed */ - CookfsPagesPageExtendIfNeeded(p, pageCount); + p->pagesIndex = Cookfs_PgIndexImport(pgindexDataObj, + Cookfs_PageObjSize(pgindexDataObj), NULL); - /* read MD5 checksums */ - buffer = Tcl_NewObj(); - Tcl_IncrRefCount(buffer); + Cookfs_PageObjDecrRefCount(pgindexDataObj); - count = Tcl_ReadChars(p->fileChannel, buffer, 16 * pageCount, 0); - if (count != (16 * pageCount)) { - Tcl_DecrRefCount(buffer); - CookfsLog(printf("Failed to read md5 checksums")) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": unable to read page checksums", -1)); - } - return 0; + if (p->pagesIndex == NULL) { + goto pgindexReadError; } - bytes = Tcl_GetByteArrayFromObj(buffer, NULL); - memcpy(p->dataPagesMD5, bytes, 16 * pageCount); - Tcl_DecrRefCount(buffer); - - /* read page sizes */ - buffer = Tcl_NewObj(); - Tcl_IncrRefCount(buffer); - count = Tcl_ReadChars(p->fileChannel, buffer, 4 * pageCount, 0); - if (count != (4 * pageCount)) { - Tcl_DecrRefCount(buffer); - CookfsLog(printf("Failed to read page buffer")) - if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG ": unable to read page sizes", -1)); - } - return 0; + + // We have successfully read pgindex. Let's continue with fsindex. + + goto skipPgindex; + +pgindexReadError: + + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": unable to read pgindex", -1)); } - bytes = Tcl_GetByteArrayFromObj(buffer, NULL); - Cookfs_Binary2Int(bytes, p->dataPagesSize, pageCount); - Tcl_DecrRefCount(buffer); + return 0; - CookfsLog(printf("Cookfs ReadIndex first page size=%d", (pageCount > 0) ? p->dataPagesSize[0] : -1)) +skipPgindex: - /* set this to 0 so we can calculate actual size of all pages */ - p->dataInitialOffset = 0; - p->dataNumPages = pageCount; + if (fsindexSizeCompressed == 0) { + CookfsLog2(printf("fsindex is empty and skipped")); + goto skipFsindex; + } + + CookfsLog2(printf("read fsindex, size: %d", fsindexSizeCompressed)); - /* calculate size of all pages by requesting offset for page after the last existing page */ - p->dataAllPagesSize = Cookfs_PagesGetPageOffset(p, pageCount); + int fsindexSizeUncompressed; + Cookfs_Binary2Int(&buf[COOKFS_SUFFIX_OFFSET_FSINDEX_SIZE_UNCOMPR], + &fsindexSizeUncompressed, 1); + int fsindexCompression = buf[COOKFS_SUFFIX_OFFSET_FSINDEX_COMPRESSION] + & 0xff; + unsigned char *fsindexHashMD5 = &buf[COOKFS_SUFFIX_OFFSET_FSINDEX_HASH]; - /* calculate offset from data - offset to end of archive - * deducted by all index elements size and size of all pages */ - p->dataInitialOffset = fileSize - - (COOKFS_SUFFIX_BYTES + p->dataAllPagesSize + (p->dataNumPages * 20) + indexLength); + p->dataIndex = Cookfs_ReadPage(p, -1, fsindexCompression, + fsindexSizeCompressed, fsindexSizeUncompressed, fsindexHashMD5, 1, err); - CookfsLog(printf("Pages size=%d offset=%d", (int) p->dataAllPagesSize, (int) p->dataInitialOffset)) - for (i = 0; i < pageCount; i++) { - CookfsLog(printf("Offset %d is %d", i, (int) Cookfs_PagesGetPageOffset(p, i))) + if (p->dataIndex == NULL) { + CookfsLog2(printf("unable to read or decompress fsindex")); + goto fsindexReadError; } - return 1; -} + // We have successfully read fsindex. Let's continue. -/* - *---------------------------------------------------------------------- - * - * CookfsPagesPageExtendIfNeeded -- - * - * Reallocate dataPagesSize and dataPagesMD5 to fit count number - * of pages; reallocation is only made if current number of memory - * is smaller than count - * - * Results: - * None - * - * Side effects: - * dataPagesSize and dataPagesMD5 might be moved to new location(s) - * - *---------------------------------------------------------------------- - */ + goto skipFsindex; -static void CookfsPagesPageExtendIfNeeded(Cookfs_Pages *p, int count) { - int changed = 0; - CookfsLog(printf("CookfsPagesPageExtendIfNeeded(%d vs %d)", p->dataPagesDataSize, count)) +fsindexReadError: - /* find new data size that fits required number of pages */ - while (p->dataPagesDataSize < count) { - changed = 1; - p->dataPagesDataSize += p->dataPagesDataSize; + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": unable to read fsindex", -1)); } + return 0; - /* if changed, reallocate both structures */ - CookfsLog(printf("CookfsPagesPageExtendIfNeeded(%d vs %d) -> %d", p->dataPagesDataSize, count, changed)) - if (changed) { - p->dataPagesSize = (int *) ckrealloc((void *) p->dataPagesSize, - p->dataPagesDataSize * sizeof(int)); - p->dataPagesMD5 = (unsigned char *) ckrealloc((void *) p->dataPagesMD5, - p->dataPagesDataSize * 16); +skipFsindex: + + // Calculate the initial offset for the pages. First, let's find the offset + // of the end of the pages based on the end offset and subtracting + // the size of pgindex/fsindex, as well as subtracting the size of + // the cookfs suffix. + p->dataInitialOffset = p->foffset - pgindexSizeCompressed - + fsindexSizeCompressed - COOKFS_SUFFIX_BYTES; + + // If we have page data, subtract the size of all pages. + if (p->pagesIndex != NULL) { + p->dataInitialOffset -= Cookfs_PgIndexGetStartOffset(p->pagesIndex, + Cookfs_PgIndexGetLength(p->pagesIndex)); + } + + if (p->dataInitialOffset < 0) { + CookfsLog2(printf("ERROR: file doesn't have enough bytes for all" + " pages, calculated initial offset is %" TCL_LL_MODIFIER "d", + p->dataInitialOffset)); + p->dataInitialOffset = 0; + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_NewStringObj(COOKFS_PAGES_ERRORMSG + ": file does not contain enough bytes for all pages", -1)); + } + return 0; } + + return 1; } diff --git a/generic/pages.h b/generic/pages.h index fdbed4f..86a3fd5 100644 --- a/generic/pages.h +++ b/generic/pages.h @@ -12,7 +12,8 @@ typedef struct _Cookfs_Pages Cookfs_Pages; Cookfs_Pages *Cookfs_PagesGetHandle(Tcl_Interp *interp, const char *cmdName); Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, - int fileReadOnly, int fileCompression, int fileCompressionLevel, + int fileReadOnly, int baseCompression, int baseCompressionLevel, + int currentCompression, int currentCompressionLevel, char *fileSignature, int useFoffset, Tcl_WideInt foffset, int isAside, int asyncDecompressQueueSize, Tcl_Obj *compressCommand, Tcl_Obj *decompressCommand, Tcl_Obj *asyncCompressCommand, diff --git a/generic/pagesAsync.c b/generic/pagesAsync.c new file mode 100644 index 0000000..02d0422 --- /dev/null +++ b/generic/pagesAsync.c @@ -0,0 +1,475 @@ +/* + * pagesAsync.c + * + * Provides functions for pages compression + * + * (c) 2010-2011 Wojciech Kocjan, Pawel Salawa + * (c) 2011-2014 Wojciech Kocjan + * (c) 2024 Konstantin Kushnir + */ + +#include "cookfs.h" +#include "pagesAsync.h" +#include "pagesInt.h" +#include "pagesCompr.h" + +static Tcl_Obj *CookfsRunAsyncCompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg); +static Tcl_Obj *CookfsRunAsyncDecompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg); + +/* + *---------------------------------------------------------------------- + * + * Cookfs_AsyncPageGet -- + * + * Check if page is currently processed as async compression page + * or is pending async decompression and return it if it is. + * + * Results: + * Page contents if found; NULL if not found; + * The page contents' ref counter is increased before returning + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +Cookfs_PageObj Cookfs_AsyncPageGet(Cookfs_Pages *p, int idx) { + if ((p->currentCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { + int i; + for (i = 0 ; i < p->asyncPageSize ; i++) { + if (p->asyncPage[i].pageIdx == idx) { + return Cookfs_PageObjNewFromByteArray(p->asyncPage[i].pageContents);; + } + } + } + if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { + int i; + for (i = 0 ; i < p->asyncDecompressQueue ; i++) { + if (p->asyncDecompressIdx[i] == idx) { + while (p->asyncDecompressIdx[i] == idx) { + Cookfs_AsyncDecompressWait(p, idx, 1); + } + /* don't modify here cache entry weight, it will be set by Cookfs_PageGet */ + return Cookfs_PageCacheGet(p, idx, 0, 0); + } + } + } + return NULL; +} + + +/* + *---------------------------------------------------------------------- + * + * Cookfs_AsyncPageAdd -- + * + * Add page to be asynchronously processed if enabled. + * + * Results: + * Whether async compression is enabled or not + * + * Side effects: + * Checks if other pages have been processed and may remove + * other pages from the processing queue + * + *---------------------------------------------------------------------- + */ + +int Cookfs_AsyncPageAdd(Cookfs_Pages *p, int idx, unsigned char *bytes, int dataSize) { + if ((p->currentCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { + Tcl_Obj *newObjObj; + int asyncIdx; + // retrieve any already processed queues + while (Cookfs_AsyncCompressWait(p, 0)) {} + while (p->asyncPageSize >= COOKFS_PAGES_MAX_ASYNC) { + Cookfs_AsyncCompressWait(p, 0); + } + asyncIdx = p->asyncPageSize++; + newObjObj = Tcl_NewByteArrayObj(bytes, dataSize); + // copy the object to avoid increased memory usage + Tcl_IncrRefCount(newObjObj); + p->asyncPage[asyncIdx].pageIdx = idx; + p->asyncPage[asyncIdx].pageContents = newObjObj; + CookfsRunAsyncCompressCommand(p, p->asyncCommandProcess, idx, newObjObj); + return 1; + } else { + return 0; + } +} + + +/* + *---------------------------------------------------------------------- + * + * Cookfs_AsyncCompressWait -- + * + * Wait / check whether an asynchronous page has already been + * processed; if require is set, this indicates cookfs is + * finalizing and data is required + * + * Results: + * Whether further clal to this API is needed or not + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +int Cookfs_AsyncCompressWait(Cookfs_Pages *p, int require) { + // TODO: properly throw errors here? + if ((p->currentCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { + Tcl_Obj *result, *resObj; + Tcl_Size resultLength; + int i = 0; + int idx = -1; + + if (p->asyncPageSize == 0) { + if (!require) { + return 0; + } + } else { + idx = p->asyncPage[0].pageIdx; + } + + result = CookfsRunAsyncCompressCommand(p, p->asyncCommandWait, idx, Tcl_NewIntObj(require)); + if (result == NULL) { + resultLength = 0; + } else if (Tcl_ListObjLength(NULL, result, &resultLength) != TCL_OK) { + Tcl_DecrRefCount(result); + resultLength = 0; + } + + if (resultLength > 0) { + if (Tcl_ListObjIndex(NULL, result, 0, &resObj) != TCL_OK) { + Tcl_DecrRefCount(result); + return 0; + } + + if (Tcl_GetIntFromObj(NULL, resObj, &i) != TCL_OK) { + Tcl_DecrRefCount(result); + return 0; + } + + // TODO: throw error? + if (i != idx) { + Tcl_DecrRefCount(result); + return 1; + } + + if (Tcl_ListObjIndex(NULL, result, 1, &resObj) != TCL_OK) { + Tcl_DecrRefCount(result); + return 0; + } + + Tcl_IncrRefCount(resObj); + Cookfs_WriteTclObj(p, idx, p->asyncPage[0].pageContents, resObj); + Tcl_DecrRefCount(resObj); + Tcl_DecrRefCount(result); + + (p->asyncPageSize)--; + Tcl_DecrRefCount(p->asyncPage[0].pageContents); + for (i = 0 ; i < p->asyncPageSize ; i++) { + p->asyncPage[i].pageIdx = p->asyncPage[i + 1].pageIdx; + p->asyncPage[i].pageContents = p->asyncPage[i + 1].pageContents; + } + + return (p->asyncPageSize > 0); + } else { + if (p->asyncPageSize > 0) { + return require; + } else { + return 0; + } + } + } else { + return 0; + } +} + + +/* + *---------------------------------------------------------------------- + * + * Cookfs_AsyncCompressFinalize -- + * + * Call code to finalize async compression + * + * Results: + * None + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + + +void Cookfs_AsyncCompressFinalize(Cookfs_Pages *p) { + if ((p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { + CookfsRunAsyncCompressCommand(p, p->asyncCommandFinalize, -1, Tcl_NewIntObj(1)); + } +} + + +// TODO: document +int Cookfs_AsyncPagePreload(Cookfs_Pages *p, int idx) { + CookfsLog(printf("Cookfs_AsyncPagePreload: index [%d]", idx)) + if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { + Tcl_Obj *dataObj; + int i; + + for (i = 0 ; i < p->asyncDecompressQueue ; i++) { + if (p->asyncDecompressIdx[i] == idx) { + CookfsLog(printf("Cookfs_AsyncPagePreload: return 1 - Page %d already in async decompress queue", i)) + return 1; + } + } + + /* don't modify page weight in cache, as here we are just checking if it is already loaded */ + if (Cookfs_PageCacheGet(p, idx, 0, 0) != NULL) { + // page already in cache and we just moved it to top; do nothing + CookfsLog(printf("Cookfs_AsyncPagePreload: return 1 - Page already in cache and we just moved it to top")) + return 1; + } + + // if queue is full, do not preload + if (p->asyncDecompressQueue >= p->asyncDecompressQueueSize) { + CookfsLog(printf("Cookfs_AsyncPagePreload: return 0 - Queue is full, do not preload")) + return 0; + } + + CookfsLog(printf("Cookfs_AsyncPagePreload: Reading page %d for async decompress", idx)) + // TODO: do something with possible error message + Cookfs_PageObj dataPageObj = Cookfs_ReadPage(p, idx, + Cookfs_PgIndexGetCompression(p->pagesIndex, idx), + Cookfs_PgIndexGetSizeCompressed(p->pagesIndex, idx), + Cookfs_PgIndexGetSizeUncompressed(p->pagesIndex, idx), + Cookfs_PgIndexGetHashMD5(p->pagesIndex, idx), + 0, NULL); + if (dataPageObj == NULL) { + CookfsLog(printf("Cookfs_AsyncPagePreload: ERROR: Cookfs_ReadPage returned NULL, return 1")); + return 1; + } + + dataObj = Cookfs_PageObjCopyAsByteArray(dataPageObj); + Cookfs_PageObjDecrRefCount(dataPageObj); + + if (dataObj == NULL) { + CookfsLog(printf("Cookfs_AsyncPagePreload: ERROR: failed to convert Tcl_Obj->PageObj, return 1")); + return 1; + } + + Tcl_IncrRefCount(dataObj); + p->asyncDecompressIdx[p->asyncDecompressQueue] = idx; + p->asyncDecompressQueue++; + CookfsLog(printf("Adding page %d for async decompress", idx)) + CookfsRunAsyncDecompressCommand(p, p->asyncCommandProcess, idx, dataObj); + Tcl_DecrRefCount(dataObj); + + CookfsLog(printf("Cookfs_AsyncPagePreload: return 1")) + return 1; + } + CookfsLog(printf("Cookfs_AsyncPagePreload: return 0")) + return 0; +} + + +// TODO: document +void Cookfs_AsyncDecompressWaitIfLoading(Cookfs_Pages *p, int idx) { + if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { + int i; + + for (i = 0 ; i < p->asyncDecompressQueue ; i++) { + if (p->asyncDecompressIdx[i] == idx) { + Cookfs_AsyncDecompressWait(p, idx, 1); + break; + } + } + } +} + + +// TODO: document +int Cookfs_AsyncDecompressWait(Cookfs_Pages *p, int idx, int require) { + if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { + Tcl_Obj *result, *resObj; + Tcl_Size resultLength; + int i = 0; + + if (p->asyncDecompressQueue == 0) { + if (!require) { + return 0; + } + } + + CookfsLog(printf("Cookfs_AsyncDecompressWait: calling callback")) + + result = CookfsRunAsyncDecompressCommand(p, p->asyncCommandWait, idx, Tcl_NewIntObj(require)); + if ((result == NULL) || (Tcl_ListObjLength(NULL, result, &resultLength) != TCL_OK)) { + resultLength = 0; + } + + if (resultLength >= 2) { + int j, k; + if (Tcl_ListObjIndex(NULL, result, 0, &resObj) != TCL_OK) { + return 0; + } + + if (Tcl_GetIntFromObj(NULL, resObj, &i) != TCL_OK) { + return 0; + } + + if (Tcl_ListObjIndex(NULL, result, 1, &resObj) != TCL_OK) { + return 0; + } + + CookfsLog(printf("Cookfs_AsyncDecompressWait: callback returned data for %d", i)) + Tcl_IncrRefCount(resObj); + Cookfs_PageObj pageObj = Cookfs_PageObjNewFromByteArray(resObj); + Tcl_DecrRefCount(resObj); + if (pageObj != NULL) { + Cookfs_PageObjIncrRefCount(pageObj); + /* + Set the page weight to 1000 because it should be cached and used further. + If it will be displaced by other weighty pages, then preloading makes no sense. + Real page weight will be set by Cookfs_PageGet + */ + Cookfs_PageCacheSet(p, i, pageObj, 1000); + Cookfs_PageObjDecrRefCount(pageObj); + } + + Tcl_DecrRefCount(result); + + CookfsLog(printf("Cookfs_AsyncDecompressWait: cleaning up decompression queue")) + for (j = 0 ; j < p->asyncDecompressQueue ; j++) { + if (p->asyncDecompressIdx[j] == i) { + for (k = j ; k < (p->asyncDecompressQueue - 1) ; k++) { + p->asyncDecompressIdx[k] = p->asyncDecompressIdx[k + 1]; + } + (p->asyncDecompressQueue)--; + // needed to properly detect it in Cookfs_AsyncPageGet + p->asyncDecompressIdx[p->asyncDecompressQueue] = -1; + break; + } + } + + CookfsLog(printf("Cookfs_AsyncDecompressWait: cleaning up decompression queue done")) + + return (p->asyncDecompressQueue > 0); + } else { + if (result != NULL) { + Tcl_DecrRefCount(result); + } + if (p->asyncDecompressQueue > 0) { + return require; + } else { + return 0; + } + } + } else { + return 0; + } +} + + +// TODO: document +void Cookfs_AsyncDecompressFinalize(Cookfs_Pages *p) { + if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { + CookfsRunAsyncDecompressCommand(p, p->asyncCommandFinalize, -1, Tcl_NewIntObj(1)); + } +} + +/* + *---------------------------------------------------------------------- + * + * CookfsRunAsyncCompressCommand -- + * + * Helper to run the async compress command with specified + * arguments in interp from Cookfs_Pages object + * + * Reverts Tcl interpreter's result to one before + * this function was called. + * + * Results: + * result from command invocation or NULL in case of failure + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static Tcl_Obj *CookfsRunAsyncCompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg) { + Tcl_Obj *prevResult, *data; + prevResult = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(prevResult); + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 3] = cmd; + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2] = Tcl_NewIntObj(idx); + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1] = arg; + Tcl_IncrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); + Tcl_IncrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); + if (Tcl_EvalObjv(p->interp, p->asyncCompressCommandLen, p->asyncCompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { + Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); + Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); + return NULL; + } + Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); + Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 3] = NULL; + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2] = NULL; + p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1] = NULL; + data = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(data); + Tcl_SetObjResult(p->interp, prevResult); + Tcl_DecrRefCount(prevResult); + return data; +} + + +/* + *---------------------------------------------------------------------- + * + * CookfsRunAsyncDecompressCommand -- + * + * Helper to run the async decompress command with specified + * arguments in interp from Cookfs_Pages object + * + * Reverts Tcl interpreter's result to one before + * this function was called. + * + * Results: + * result from command invocation or NULL in case of failure + * + * Side effects: + * None + * + *---------------------------------------------------------------------- + */ + +static Tcl_Obj *CookfsRunAsyncDecompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg) { + Tcl_Obj *prevResult, *data; + prevResult = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(prevResult); + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 3] = cmd; + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2] = Tcl_NewIntObj(idx); + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1] = arg; + Tcl_IncrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); + Tcl_IncrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); + if (Tcl_EvalObjv(p->interp, p->asyncDecompressCommandLen, p->asyncDecompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { + Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); + Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); + return NULL; + } + Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); + Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 3] = NULL; + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2] = NULL; + p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1] = NULL; + data = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(data); + Tcl_SetObjResult(p->interp, prevResult); + Tcl_DecrRefCount(prevResult); + return data; +} diff --git a/generic/pagesAsync.h b/generic/pagesAsync.h new file mode 100644 index 0000000..fc3e319 --- /dev/null +++ b/generic/pagesAsync.h @@ -0,0 +1,20 @@ +/* + (c) 2010-2014 Wojciech Kocjan, Pawel Salawa + (c) 2024 Konstantin Kushnir +*/ + +#ifndef COOKFS_PAGESASYNC_H +#define COOKFS_PAGESASYNC_H 1 + +#include "pages.h" + +Cookfs_PageObj Cookfs_AsyncPageGet(Cookfs_Pages *p, int idx); +int Cookfs_AsyncPageAdd(Cookfs_Pages *p, int idx, unsigned char *bytes, int dataSize); +void Cookfs_AsyncDecompressWaitIfLoading(Cookfs_Pages *p, int idx); +int Cookfs_AsyncCompressWait(Cookfs_Pages *p, int require); +void Cookfs_AsyncCompressFinalize(Cookfs_Pages *p); +int Cookfs_AsyncPagePreload(Cookfs_Pages *p, int idx); +int Cookfs_AsyncDecompressWait(Cookfs_Pages *p, int idx, int require); +void Cookfs_AsyncDecompressFinalize(Cookfs_Pages *p); + +#endif /* COOKFS_PAGESASYNC_H */ diff --git a/generic/pagesCmd.c b/generic/pagesCmd.c index ee5ecbc..b1141e5 100644 --- a/generic/pagesCmd.c +++ b/generic/pagesCmd.c @@ -170,8 +170,8 @@ static int CookfsRegisterPagesObjectCmd(ClientData clientData, Tcl_Interp *inter Cookfs_Pages *pages; int idx; int oReadOnly = 0; - int oCompression; - int oCompressionLevel; + int oCompression = -1; + int oCompressionLevel = -1; int tobjc = objc; int oCachesize = -1; int useFoffset = 0; @@ -308,8 +308,12 @@ static int CookfsRegisterPagesObjectCmd(ClientData clientData, Tcl_Interp *inter tobjv++; } - if (Cookfs_CompressionFromObj(interp, compression, &oCompression, &oCompressionLevel) != TCL_OK) { - return TCL_ERROR; + if (compression != NULL) { + if (Cookfs_CompressionFromObj(interp, compression, &oCompression, + &oCompressionLevel) != TCL_OK) + { + return TCL_ERROR; + } } if (tobjc != 2) { @@ -319,8 +323,8 @@ static int CookfsRegisterPagesObjectCmd(ClientData clientData, Tcl_Interp *inter /* Create cookfs instance */ Tcl_Obj *err = NULL; pages = Cookfs_PagesInit(interp, tobjv[1], oReadOnly, oCompression, - oCompressionLevel, NULL, useFoffset, foffset, 0, - asyncDecompressQueueSize, compressCmd, decompressCmd, + oCompressionLevel, oCompression, oCompressionLevel, NULL, useFoffset, + foffset, 0, asyncDecompressQueueSize, compressCmd, decompressCmd, asyncCompressCmd, asyncDecompressCmd, &err); if (err != NULL) { Tcl_SetObjResult(interp, err); @@ -798,7 +802,8 @@ static int CookfsPagesCmdAside(Cookfs_Pages *pages, Tcl_Interp *interp, int objc the corresponding error message below if Cookfs_PagesInit() failed. */ asidePages = Cookfs_PagesInit(pages->interp, objv[2], 0, - pages->fileCompression, pages->fileCompressionLevel, + pages->baseCompression, pages->baseCompressionLevel, + pages->currentCompression, pages->currentCompressionLevel, NULL, 0, 0, 1, 0, NULL, NULL, NULL, NULL, NULL); if (asidePages == NULL) { diff --git a/generic/pagesCompr.c b/generic/pagesCompr.c index 9c6f1cf..057efc6 100644 --- a/generic/pagesCompr.c +++ b/generic/pagesCompr.c @@ -13,6 +13,7 @@ #include "pagesInt.h" #include "pagesCompr.h" #include "pagesComprZlib.h" +#include "pagesComprCustom.h" #ifdef COOKFS_USEBZ2 #include "pagesComprBz2.h" #endif @@ -28,13 +29,9 @@ /* declarations of static and/or internal functions */ static Tcl_Obj **CookfsCreateCompressionCommand(Tcl_Interp *interp, Tcl_Obj *cmd, int *lenPtr, int additionalElements); -static Cookfs_PageObj CookfsReadPageCustom(Cookfs_Pages *p, int size, Tcl_Obj **err); -static int CookfsWritePageCustom(Cookfs_Pages *p, unsigned char *bytes, int origSize); #ifdef USE_VFS_COMMANDS_FOR_ZIP static int CookfsCheckCommandExists(Tcl_Interp *interp, const char *commandName); #endif -static Tcl_Obj *CookfsRunAsyncCompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg); -static Tcl_Obj *CookfsRunAsyncDecompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg); /* compression data */ const char *cookfsCompressionOptions[] = { @@ -209,9 +206,11 @@ int Cookfs_CompressionFromObj(Tcl_Interp *interp, Tcl_Obj *obj, goto error; } if (compressionLevel < -1 || compressionLevel > 255) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("the compression level" - " is expected to be an integer between -1 and 255," - " but got \"%d\"", compressionLevel)); + if (interp != NULL) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf("the compression level" + " is expected to be an integer between -1 and 255," + " but got \"%d\"", compressionLevel)); + } goto error; } Tcl_DecrRefCount(level); @@ -468,102 +467,136 @@ int Cookfs_SetCompressCommands(Cookfs_Pages *p, Tcl_Obj *compressCommand, Tcl_Ob *---------------------------------------------------------------------- */ -Cookfs_PageObj Cookfs_ReadPage(Cookfs_Pages *p, int idx, int size, int decompress, int compressionType, Tcl_Obj **err) { +Cookfs_PageObj Cookfs_ReadPage(Cookfs_Pages *p, int idx, int compression, + int sizeCompressed, int sizeUncompressed, unsigned char *md5hash, + int decompress, Tcl_Obj **err) +{ - if (idx >= p->dataNumPages) { - return NULL; - } + UNUSED(md5hash); p->fileLastOp = COOKFS_LASTOP_READ; - CookfsLog(printf("Cookfs_ReadPage #%d size:%d compression:%d", idx, size, p->fileCompression)) - if (size == 0) { - /* if page was empty, no need to read anything */ - return Cookfs_PageObjAlloc(0); - } else { - /* read compression algorithm first */ - Tcl_Obj *byteObj; - byteObj = Tcl_NewObj(); - if (idx >= 0) { - Tcl_WideInt offset = Cookfs_PagesGetPageOffset(p, idx); - Tcl_Seek(p->fileChannel, offset, SEEK_SET); - if (size == -1) { - size = p->dataPagesSize[idx]; - } - } - if (Tcl_ReadChars(p->fileChannel, byteObj, 1, 0) != 1) { - CookfsLog(printf("Unable to read compression mark")) - Tcl_IncrRefCount(byteObj); - Tcl_DecrRefCount(byteObj); - return NULL; - } - int compression = Tcl_GetByteArrayFromObj(byteObj, NULL)[0]; - Tcl_IncrRefCount(byteObj); - Tcl_DecrRefCount(byteObj); + CookfsLog2(printf("page #%d compression:%d sizeCompressed:%d" + " sizeUncompressed:%d decompress:%d", idx, compression, sizeCompressed, + sizeUncompressed, decompress)); - /* need to decrease size by 1 byte we just read */ - size = size - 1; + assert(sizeCompressed >= 0); + assert(sizeUncompressed >= 0); + assert(decompress == 0 || decompress == 1); - /* if specific compression was required, exit if did not match */ - if ((compressionType != COOKFS_COMPRESSION_ANY) && (compressionType != compression)) { - return NULL; - } + if (!decompress) { + compression = COOKFS_COMPRESSION_NONE; + } - if (!decompress) { - compression = COOKFS_COMPRESSION_NONE; - } + if (sizeUncompressed == 0) { + /* if page was empty, no need to read anything */ + return Cookfs_PageObjAlloc(0); + } - CookfsLog(printf("Cookfs_ReadPage I=%d S=%d C=%d", idx, size, compression)) - - /* handle reading based on compression algorithm */ - switch (compression) { - case COOKFS_COMPRESSION_NONE: { - /* simply read raw data */ - Cookfs_PageObj data = Cookfs_PageObjAlloc(size); - int count = Tcl_Read(p->fileChannel, (char *)data, size); - if (count != size) { - CookfsLog(printf("Unable to read - %d != %d", count, size)) - Cookfs_PageObjIncrRefCount(data); - Cookfs_PageObjDecrRefCount(data); - return NULL; - } - if (!decompress) { - CookfsLog(printf("Cookfs_ReadPage retrieved chunk %d", idx)); - } - return data; - } - /* run proper reading functions */ - case COOKFS_COMPRESSION_ZLIB: - return CookfsReadPageZlib(p, size, err); - case COOKFS_COMPRESSION_CUSTOM: - return CookfsReadPageCustom(p, size, err); - case COOKFS_COMPRESSION_BZ2: + // Alloc memory for compressed data + Cookfs_PageObj dataCompressed = Cookfs_PageObjAlloc(sizeCompressed); + if (dataCompressed == NULL) { + CookfsLog2(printf("ERROR: unable to alloc %d bytes for page #%d", + sizeCompressed, idx)); + if (err != NULL) { + *err = Tcl_ObjPrintf("Cookfs_ReadPage(): unable to alloc %d bytes" + " for page #%d", sizeCompressed, idx); + } + return NULL; + } + Cookfs_PageObjIncrRefCount(dataCompressed); + + CookfsLog2(printf("read data...")); + if (idx >= 0) { + Cookfs_SeekToPage(p, idx); + } + + Tcl_Size read = Tcl_Read(p->fileChannel, (char *)dataCompressed, + sizeCompressed); + if (read != sizeCompressed) { + CookfsLog2(printf("ERROR: got only %" TCL_SIZE_MODIFIER "d bytes" + " from channel", read)); + if (err != NULL) { + *err = Tcl_ObjPrintf("Cookfs_ReadPage(): error while reading" + " compressed data from page#%d. Expected data size %d bytes," + // cppcheck-suppress unknownMacro + " got %" TCL_SIZE_MODIFIER "d bytes", idx, sizeCompressed, + read); + } + Cookfs_PageObjDecrRefCount(dataCompressed); + return NULL; + } + + if (compression == COOKFS_COMPRESSION_NONE) { + CookfsLog2(printf("return: ok (raw data)")); + return dataCompressed; + } + + // Alloc memory for decompressed data + Cookfs_PageObj dataUncompressed = Cookfs_PageObjAlloc(sizeUncompressed); + if (dataUncompressed == NULL) { + CookfsLog2(printf("ERROR: unable to alloc %d bytes for page #%d", + sizeUncompressed, idx)); + if (err != NULL) { + *err = Tcl_ObjPrintf("Cookfs_ReadPage(): unable to alloc %d bytes" + " for page #%d", sizeUncompressed, idx); + } + Cookfs_PageObjDecrRefCount(dataCompressed); + return NULL; + } + Cookfs_PageObjIncrRefCount(dataUncompressed); + + CookfsLog2(printf("uncompress data...")); + + int rc = TCL_ERROR; + + switch (compression) { + case COOKFS_COMPRESSION_ZLIB: + rc = CookfsReadPageZlib(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; + case COOKFS_COMPRESSION_CUSTOM: + rc = CookfsReadPageCustom(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; #ifdef COOKFS_USEBZ2 - return CookfsReadPageBz2(p, size, err); -#else - return NULL; + case COOKFS_COMPRESSION_BZ2: + rc = CookfsReadPageBz2(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; #endif /* COOKFS_USEBZ2 */ - case COOKFS_COMPRESSION_LZMA: #ifdef COOKFS_USELZMA - return CookfsReadPageLzma(p, size, err); -#else - return NULL; + case COOKFS_COMPRESSION_LZMA: + rc = CookfsReadPageLzma(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; #endif /* COOKFS_USELZMA */ - case COOKFS_COMPRESSION_ZSTD: #ifdef COOKFS_USEZSTD - return CookfsReadPageZstd(p, size, err); -#else - return NULL; + case COOKFS_COMPRESSION_ZSTD: + rc = CookfsReadPageZstd(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; #endif /* COOKFS_USEZSTD */ - case COOKFS_COMPRESSION_BROTLI: #ifdef COOKFS_USEBROTLI - return CookfsReadPageBrotli(p, size, err); -#else - return NULL; + case COOKFS_COMPRESSION_BROTLI: + rc = CookfsReadPageBrotli(p, dataCompressed, sizeCompressed, + dataUncompressed, sizeUncompressed, err); + break; #endif /* COOKFS_USEBROTLI */ - } } - return NULL; + + Cookfs_PageObjDecrRefCount(dataCompressed); + + if (rc != TCL_OK) { + Cookfs_PageObjDecrRefCount(dataUncompressed); + CookfsLog2(printf("return: ERROR")); + return NULL; + } + + // TODO: Add md5 hash check here + + CookfsLog2(printf("return: ok")); + return dataUncompressed; } @@ -586,7 +619,8 @@ Cookfs_PageObj Cookfs_ReadPage(Cookfs_Pages *p, int idx, int size, int decompres void Cookfs_SeekToPage(Cookfs_Pages *p, int idx) { Tcl_WideInt offset = Cookfs_PagesGetPageOffset(p, idx); Tcl_Seek(p->fileChannel, offset, SEEK_SET); - CookfsLog(printf("Seeking to EOF -> %d",(int) offset)) + CookfsLog2(printf("seek to page %d offset %" TCL_LL_MODIFIER "d", idx, + offset)) } @@ -595,24 +629,26 @@ void Cookfs_SeekToPage(Cookfs_Pages *p, int idx) { * * Cookfs_WritePage -- * - * Optionally compress and write page data + * Optionally compress and write page data * - * If compressedData is specified, the page is written as - * compressed or uncompressed, depending on size + * If bytesCompressed is specified, the page is written as + * compressed or uncompressed, depending on size * * Results: - * Size of page after compression + * Size of page after compression * * Side effects: - * Data might be freed if its reference counter was originally 0 + * pgCompressed will be released if its refcount is zero * *---------------------------------------------------------------------- */ -int Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, int origSize, Tcl_Obj *compressedData) { - Tcl_Size size = -1; +Tcl_Size Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, + Tcl_Size sizeUncompressed, Cookfs_PageObj pgCompressed) +{ - CookfsLog2(printf("page index #%d, original size: %d", idx, origSize)); + CookfsLog2(printf("page index #%d, original size: %" TCL_SIZE_MODIFIER "d", + idx, sizeUncompressed)); // Add initial stamp if needed Cookfs_PageAddStamp(p, 0); @@ -621,477 +657,115 @@ int Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, int origSiz /* if last operation was not write, we need to seek * to make sure we're at location where we should be writing */ if ((idx >= 0) && (p->fileLastOp != COOKFS_LASTOP_WRITE)) { - p->fileLastOp = COOKFS_LASTOP_WRITE; - Cookfs_SeekToPage(p, idx); + p->fileLastOp = COOKFS_LASTOP_WRITE; + Cookfs_SeekToPage(p, idx); } - if (origSize > 0) { - if (compressedData != NULL) { - Tcl_GetByteArrayFromObj(compressedData, &size); - - if (SHOULD_COMPRESS(p, origSize, size)) { - CookfsWriteCompression(p, p->fileCompression); - Tcl_WriteObj(p->fileChannel, compressedData); - size += 1; - } else { - CookfsWriteCompression(p, COOKFS_COMPRESSION_NONE); - Tcl_Write(p->fileChannel, (const char *)bytes, origSize); - size = origSize + 1; - } - } else { - /* try to write compressed if compression was enabled */ - switch (p->fileCompression) { - case COOKFS_COMPRESSION_ZLIB: - size = CookfsWritePageZlib(p, bytes, origSize); - break; - case COOKFS_COMPRESSION_CUSTOM: - size = CookfsWritePageCustom(p, bytes, origSize); - break; - case COOKFS_COMPRESSION_BZ2: + int resultCompression = p->currentCompression; + int resultCompressionLevel = p->currentCompressionLevel; + Tcl_Size resultSize; + + if (sizeUncompressed <= 0) { + CookfsLog2(printf("data size is zero, skip compression")); + resultSize = 0; + goto done; + } + + if (pgCompressed != NULL) { + CookfsLog2(printf("compression data is specified, skip compression")); + goto skipCompression; + } + + switch (resultCompression) { + case COOKFS_COMPRESSION_ZLIB: + pgCompressed = CookfsWritePageZlib(p, bytes, sizeUncompressed); + break; + case COOKFS_COMPRESSION_CUSTOM: + pgCompressed = CookfsWritePageCustom(p, bytes, sizeUncompressed); + break; #ifdef COOKFS_USEBZ2 - size = CookfsWritePageBz2(p, bytes, origSize); -#else - size = -1; + case COOKFS_COMPRESSION_BZ2: + pgCompressed = CookfsWritePageBz2(p, bytes, sizeUncompressed); + break; #endif /* COOKFS_USEBZ2 */ - break; - case COOKFS_COMPRESSION_LZMA: #ifdef COOKFS_USELZMA - size = CookfsWritePageLzma(p, bytes, origSize); -#else - size = -1; + case COOKFS_COMPRESSION_LZMA: + pgCompressed = CookfsWritePageLzma(p, bytes, sizeUncompressed); + break; #endif /* COOKFS_USELZMA */ - break; - case COOKFS_COMPRESSION_ZSTD: #ifdef COOKFS_USEZSTD - size = CookfsWritePageZstd(p, bytes, origSize); -#else - size = -1; + case COOKFS_COMPRESSION_ZSTD: + pgCompressed = CookfsWritePageZstd(p, bytes, sizeUncompressed); + break; #endif /* COOKFS_USEZSTD */ - break; - case COOKFS_COMPRESSION_BROTLI: #ifdef COOKFS_USEBROTLI - size = CookfsWritePageBrotli(p, bytes, origSize); -#else - size = -1; + case COOKFS_COMPRESSION_BROTLI: + pgCompressed = CookfsWritePageBrotli(p, bytes, sizeUncompressed); + break; #endif /* COOKFS_USEZSTD */ - break; - } - - /* if compression was not enabled or compressor decided it should - * be written as raw data, write it uncompressed */ - if (size == -1) { - CookfsWriteCompression(p, COOKFS_COMPRESSION_NONE); - Tcl_Write(p->fileChannel, (const char *)bytes, origSize); - size = origSize + 1; - } else { - /* otherwise add 1 byte for compression */ - size += 1; - } - } - } else { - size = 0; - } - return size; -} + }; +skipCompression: -int Cookfs_WritePageObj(Cookfs_Pages *p, int idx, Cookfs_PageObj data, Tcl_Obj *compressedData) { - CookfsLog(printf("Cookfs_WritePageObj: data: %p", (void *)data)); - return Cookfs_WritePage(p, idx, data, Cookfs_PageObjSize(data), compressedData); -} - -int Cookfs_WriteTclObj(Cookfs_Pages *p, int idx, Tcl_Obj *data, Tcl_Obj *compressedData) { - Tcl_Size size; - unsigned char *bytes = Tcl_GetByteArrayFromObj(data, &size); - return Cookfs_WritePage(p, idx, bytes, size, compressedData); -} - -/* - *---------------------------------------------------------------------- - * - * Cookfs_AsyncPageGet -- - * - * Check if page is currently processed as async compression page - * or is pending async decompression and return it if it is. - * - * Results: - * Page contents if found; NULL if not found; - * The page contents' ref counter is increased before returning - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -Cookfs_PageObj Cookfs_AsyncPageGet(Cookfs_Pages *p, int idx) { - if ((p->fileCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { - int i; - for (i = 0 ; i < p->asyncPageSize ; i++) { - if (p->asyncPage[i].pageIdx == idx) { - return Cookfs_PageObjNewFromByteArray(p->asyncPage[i].pageContents); - } - } - } - if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { - int i; - for (i = 0 ; i < p->asyncDecompressQueue ; i++) { - if (p->asyncDecompressIdx[i] == idx) { - while (p->asyncDecompressIdx[i] == idx) { - Cookfs_AsyncDecompressWait(p, idx, 1); - } - /* don't modify here cache entry weight, it will be set by Cookfs_PageGet */ - return Cookfs_PageCacheGet(p, idx, 0, 0); - } - } - } - return NULL; -} - - -/* - *---------------------------------------------------------------------- - * - * Cookfs_AsyncPageAdd -- - * - * Add page to be asynchronously processed if enabled. - * - * Results: - * Whether async compression is enabled or not - * - * Side effects: - * Checks if other pages have been processed and may remove - * other pages from the processing queue - * - *---------------------------------------------------------------------- - */ - -int Cookfs_AsyncPageAdd(Cookfs_Pages *p, int idx, unsigned char *bytes, int dataSize) { - if ((p->fileCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { - Tcl_Obj *newObjObj; - int asyncIdx; - // retrieve any already processed queues - while (Cookfs_AsyncCompressWait(p, 0)) {} - while (p->asyncPageSize >= COOKFS_PAGES_MAX_ASYNC) { - Cookfs_AsyncCompressWait(p, 0); - } - asyncIdx = p->asyncPageSize++; - newObjObj = Tcl_NewByteArrayObj(bytes, dataSize); - // copy the object to avoid increased memory usage - Tcl_IncrRefCount(newObjObj); - p->asyncPage[asyncIdx].pageIdx = idx; - p->asyncPage[asyncIdx].pageContents = newObjObj; - CookfsRunAsyncCompressCommand(p, p->asyncCommandProcess, idx, newObjObj); - return 1; - } else { - return 0; - } -} - - -/* - *---------------------------------------------------------------------- - * - * Cookfs_AsyncCompressWait -- - * - * Wait / check whether an asynchronous page has already been - * processed; if require is set, this indicates cookfs is - * finalizing and data is required - * - * Results: - * Whether further clal to this API is needed or not - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -int Cookfs_AsyncCompressWait(Cookfs_Pages *p, int require) { - // TODO: properly throw errors here? - if ((p->fileCompression == COOKFS_COMPRESSION_CUSTOM) && (p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { - Tcl_Obj *result, *resObj; - Tcl_Size resultLength; - int i = 0; - int idx = -1; - - if (p->asyncPageSize == 0) { - if (!require) { - return 0; - } - } else { - idx = p->asyncPage[0].pageIdx; - } - - result = CookfsRunAsyncCompressCommand(p, p->asyncCommandWait, idx, Tcl_NewIntObj(require)); - if (result == NULL) { - resultLength = 0; - } else if (Tcl_ListObjLength(NULL, result, &resultLength) != TCL_OK) { - Tcl_DecrRefCount(result); - resultLength = 0; - } - - if (resultLength > 0) { - if (Tcl_ListObjIndex(NULL, result, 0, &resObj) != TCL_OK) { - Tcl_DecrRefCount(result); - return 0; - } - - if (Tcl_GetIntFromObj(NULL, resObj, &i) != TCL_OK) { - Tcl_DecrRefCount(result); - return 0; - } - - // TODO: throw error? - if (i != idx) { - Tcl_DecrRefCount(result); - return 1; - } - - if (Tcl_ListObjIndex(NULL, result, 1, &resObj) != TCL_OK) { - Tcl_DecrRefCount(result); - return 0; - } - - Tcl_IncrRefCount(resObj); - int size = Cookfs_WriteTclObj(p, idx, p->asyncPage[0].pageContents, resObj); - p->dataPagesSize[idx] = size; - Tcl_DecrRefCount(resObj); - Tcl_DecrRefCount(result); - - (p->asyncPageSize)--; - Tcl_DecrRefCount(p->asyncPage[0].pageContents); - for (i = 0 ; i < p->asyncPageSize ; i++) { - p->asyncPage[i].pageIdx = p->asyncPage[i + 1].pageIdx; - p->asyncPage[i].pageContents = p->asyncPage[i + 1].pageContents; - } - - return (p->asyncPageSize > 0); - } else { - if (p->asyncPageSize > 0) { - return require; - } else { - return 0; - } - } - } else { - return 0; + if (pgCompressed != NULL) { + CookfsLog2(printf("got %" TCL_SIZE_MODIFIER "d bytes from compression" + " engine", Cookfs_PageObjSize(pgCompressed))); + if (!SHOULD_COMPRESS(p, sizeUncompressed, + Cookfs_PageObjSize(pgCompressed))) + { + CookfsLog2(printf("compression is inefficient, store as" + " uncompressed")); + Cookfs_PageObjBounceRefCount(pgCompressed); + pgCompressed = NULL; + } } -} - -/* - *---------------------------------------------------------------------- - * - * Cookfs_AsyncCompressFinalize -- - * - * Call code to finalize async compression - * - * Results: - * None - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - - -void Cookfs_AsyncCompressFinalize(Cookfs_Pages *p) { - if ((p->asyncCompressCommandPtr != NULL) && (p->asyncCompressCommandLen > 3)) { - CookfsRunAsyncCompressCommand(p, p->asyncCommandFinalize, -1, Tcl_NewIntObj(1)); + if (pgCompressed == NULL) { + CookfsLog2(printf("there is no compressed data, create output buffer" + " from original data")); + pgCompressed = Cookfs_PageObjNewFromString(bytes, sizeUncompressed); + if (pgCompressed == NULL) { + Tcl_Panic("Cookfs_WritePage(): failed to alloc"); + // Just to make cppcheck happy. It doesn't realize that Tcl_Panic + // closes the application. + return 0; + } + resultCompression = COOKFS_COMPRESSION_NONE; + resultCompressionLevel = 0; } -} - -// TODO: document -int Cookfs_AsyncPagePreload(Cookfs_Pages *p, int idx) { - CookfsLog(printf("Cookfs_AsyncPagePreload: index [%d]", idx)) - if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { - Tcl_Obj *dataObj; - int i; + resultSize = Cookfs_PageObjSize(pgCompressed); - for (i = 0 ; i < p->asyncDecompressQueue ; i++) { - if (p->asyncDecompressIdx[i] == idx) { - CookfsLog(printf("Cookfs_AsyncPagePreload: return 1 - Page %d already in async decompress queue", i)) - return 1; - } - } - - /* don't modify page weight in cache, as here we are just checking if it is already loaded */ - if (Cookfs_PageCacheGet(p, idx, 0, 0) != NULL) { - // page already in cache and we just moved it to top; do nothing - CookfsLog(printf("Cookfs_AsyncPagePreload: return 1 - Page already in cache and we just moved it to top")) - return 1; - } - - // if queue is full, do not preload - if (p->asyncDecompressQueue >= p->asyncDecompressQueueSize) { - CookfsLog(printf("Cookfs_AsyncPagePreload: return 0 - Queue is full, do not preload")) - return 0; - } - - CookfsLog(printf("Cookfs_AsyncPagePreload: Reading page %d for async decompress", idx)) - // TODO: do something with possible error message - Cookfs_PageObj dataPageObj = Cookfs_ReadPage(p, idx, -1, 0, COOKFS_COMPRESSION_CUSTOM, NULL); - if (dataPageObj == NULL) { - CookfsLog(printf("Cookfs_AsyncPagePreload: ERROR: Cookfs_ReadPage returned NULL, return 1")); - return 1; - } + Tcl_Size written = Tcl_Write(p->fileChannel, (const char *)pgCompressed, + resultSize); - Cookfs_PageObjIncrRefCount(dataPageObj); - dataObj = Cookfs_PageObjCopyAsByteArray(dataPageObj); - Cookfs_PageObjDecrRefCount(dataPageObj); - - if (dataObj == NULL) { - CookfsLog(printf("Cookfs_AsyncPagePreload: ERROR: failed to convert Tcl_Obj->PageObj, return 1")); - return 1; - } + CookfsLog2(printf("wrote %" TCL_SIZE_MODIFIER "d bytes", written)); - Tcl_IncrRefCount(dataObj); - p->asyncDecompressIdx[p->asyncDecompressQueue] = idx; - p->asyncDecompressQueue++; - CookfsLog(printf("Adding page %d for async decompress", idx)) - CookfsRunAsyncDecompressCommand(p, p->asyncCommandProcess, idx, dataObj); - Tcl_DecrRefCount(dataObj); - - CookfsLog(printf("Cookfs_AsyncPagePreload: return 1")) - return 1; - } - CookfsLog(printf("Cookfs_AsyncPagePreload: return 0")) - return 0; -} - - -// TODO: document -void Cookfs_AsyncDecompressWaitIfLoading(Cookfs_Pages *p, int idx) { - if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { - int i; - - for (i = 0 ; i < p->asyncDecompressQueue ; i++) { - if (p->asyncDecompressIdx[i] == idx) { - Cookfs_AsyncDecompressWait(p, idx, 1); - break; - } - } + if (written != resultSize) { + Tcl_Panic("Cookfs_WritePage(): failed to write"); } -} - -// TODO: document -int Cookfs_AsyncDecompressWait(Cookfs_Pages *p, int idx, int require) { - if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { - Tcl_Obj *result, *resObj; - Tcl_Size resultLength; - int i = 0; + Cookfs_PageObjBounceRefCount(pgCompressed); - if (p->asyncDecompressQueue == 0) { - if (!require) { - return 0; - } - } - - CookfsLog(printf("Cookfs_AsyncDecompressWait: calling callback")) - - result = CookfsRunAsyncDecompressCommand(p, p->asyncCommandWait, idx, Tcl_NewIntObj(require)); - if ((result == NULL) || (Tcl_ListObjLength(NULL, result, &resultLength) != TCL_OK)) { - resultLength = 0; - } - - if (resultLength >= 2) { - int j, k; - if (Tcl_ListObjIndex(NULL, result, 0, &resObj) != TCL_OK) { - return 0; - } - - if (Tcl_GetIntFromObj(NULL, resObj, &i) != TCL_OK) { - return 0; - } - - if (Tcl_ListObjIndex(NULL, result, 1, &resObj) != TCL_OK) { - return 0; - } - - CookfsLog(printf("Cookfs_AsyncDecompressWait: callback returned data for %d", i)) - Tcl_IncrRefCount(resObj); - Cookfs_PageObj pageObj = Cookfs_PageObjNewFromByteArray(resObj); - Tcl_DecrRefCount(resObj); - if (pageObj != NULL) { - Cookfs_PageObjIncrRefCount(pageObj); - /* - Set the page weight to 1000 because it should be cached and used further. - If it will be displaced by other weighty pages, then preloading makes no sense. - Real page weight will be set by Cookfs_PageGet - */ - Cookfs_PageCacheSet(p, i, pageObj, 1000); - Cookfs_PageObjDecrRefCount(pageObj); - } - - Tcl_DecrRefCount(result); - - CookfsLog(printf("Cookfs_AsyncDecompressWait: cleaning up decompression queue")) - for (j = 0 ; j < p->asyncDecompressQueue ; j++) { - if (p->asyncDecompressIdx[j] == i) { - for (k = j ; k < (p->asyncDecompressQueue - 1) ; k++) { - p->asyncDecompressIdx[k] = p->asyncDecompressIdx[k + 1]; - } - (p->asyncDecompressQueue)--; - // needed to properly detect it in Cookfs_AsyncPageGet - p->asyncDecompressIdx[p->asyncDecompressQueue] = -1; - break; - } - } - - CookfsLog(printf("Cookfs_AsyncDecompressWait: cleaning up decompression queue done")) - - return (p->asyncDecompressQueue > 0); - } else { - if (result != NULL) { - Tcl_DecrRefCount(result); - } - if (p->asyncDecompressQueue > 0) { - return require; - } else { - return 0; - } - } - } else { - return 0; - } +done: + Cookfs_PgIndexSetCompression(p->pagesIndex, idx, resultCompression, + resultCompressionLevel); + Cookfs_PgIndexSetSizeCompressed(p->pagesIndex, idx, resultSize); + return resultSize; } -// TODO: document -void Cookfs_AsyncDecompressFinalize(Cookfs_Pages *p) { - if ((p->asyncDecompressQueueSize > 0) && (p->asyncDecompressCommandPtr != NULL) && (p->asyncDecompressCommandLen > 3)) { - CookfsRunAsyncDecompressCommand(p, p->asyncCommandFinalize, -1, Tcl_NewIntObj(1)); - } +int Cookfs_WritePageObj(Cookfs_Pages *p, int idx, Cookfs_PageObj data) { + CookfsLog2(printf("data: %p", (void *)data)); + return Cookfs_WritePage(p, idx, data, Cookfs_PageObjSize(data), NULL); } -/* - *---------------------------------------------------------------------- - * - * CookfsWriteCompression -- - * - * Write compression id to file as 1 byte - * - * Results: - * None - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -void CookfsWriteCompression(Cookfs_Pages *p, int compression) { - Tcl_Obj *byteObj; - unsigned char byte[4]; - byte[0] = (unsigned char) compression; - byteObj = Tcl_NewByteArrayObj(byte, 1); - Tcl_IncrRefCount(byteObj); - Tcl_WriteObj(p->fileChannel, byteObj); - Tcl_DecrRefCount(byteObj); +int Cookfs_WriteTclObj(Cookfs_Pages *p, int idx, Tcl_Obj *data, Tcl_Obj *compressedData) { + Tcl_Size size; + unsigned char *bytes = Tcl_GetByteArrayFromObj(data, &size); + CookfsLog2(printf("data: %p", (void *)bytes)); + return Cookfs_WritePage(p, idx, bytes, size, + Cookfs_PageObjNewFromByteArray(compressedData)); } /* definitions of static and/or internal functions */ @@ -1134,124 +808,6 @@ static Tcl_Obj **CookfsCreateCompressionCommand(Tcl_Interp *interp, Tcl_Obj *cmd return rc; } -/* - *---------------------------------------------------------------------- - * - * CookfsReadPageCustom -- - * - * Read page stored using custom compression - * - * Results: - * Binary data as Tcl_Obj - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -static Cookfs_PageObj CookfsReadPageCustom(Cookfs_Pages *p, int size, Tcl_Obj **err) { - /* use vfs::zip command for decompression */ - Tcl_Obj *prevResult; - Tcl_Obj *compressed; - Tcl_Obj *data; - int count; - - if (p->decompressCommandPtr == NULL) { - SET_ERROR_STR("No decompresscommand specified"); - return NULL; - } - - compressed = Tcl_NewObj(); - Tcl_IncrRefCount(compressed); - count = Tcl_ReadChars(p->fileChannel, compressed, size, 0); - - CookfsLog(printf("Reading - %d vs %d", count, size)) - if (count != size) { - CookfsLog(printf("Unable to read - %d != %d", count, size)) - Tcl_DecrRefCount(compressed); - return NULL; - } - p->decompressCommandPtr[p->decompressCommandLen - 1] = compressed; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - if (Tcl_EvalObjv(p->interp, p->decompressCommandLen, p->decompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - p->decompressCommandPtr[p->decompressCommandLen - 1] = NULL; - CookfsLog(printf("Unable to decompress")) - Tcl_DecrRefCount(compressed); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - return NULL; - } - p->decompressCommandPtr[p->decompressCommandLen - 1] = NULL; - Tcl_DecrRefCount(compressed); - data = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(data); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - Cookfs_PageObj rc = Cookfs_PageObjNewFromByteArray(data); - Tcl_DecrRefCount(data); - return rc; -} - - -/* - *---------------------------------------------------------------------- - * - * CookfsWritePageCustom -- - * - * Write page using custom compression - * - * Results: - * Number of bytes written; -1 in case compression failed or - * compressing was not efficient enough (see SHOULD_COMPRESS macro) - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -static int CookfsWritePageCustom(Cookfs_Pages *p, unsigned char *bytes, int origSize) { - Tcl_Size size = origSize; - /* use vfs::zip command for compression */ - Tcl_Obj *prevResult; - Tcl_Obj *compressed; - - if (p->compressCommandPtr == NULL) { - return -1; - } - - Tcl_Obj *data = Tcl_NewByteArrayObj(bytes, origSize); - Tcl_IncrRefCount(data); - p->compressCommandPtr[p->compressCommandLen - 1] = data; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - if (Tcl_EvalObjv(p->interp, p->compressCommandLen, p->compressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - CookfsLog(printf("Unable to compress: %s", Tcl_GetString(Tcl_GetObjResult(p->interp)))); - p->compressCommandPtr[p->compressCommandLen - 1] = NULL; - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(data); - return -1; - } - p->compressCommandPtr[p->compressCommandLen - 1] = NULL; - Tcl_DecrRefCount(data); - compressed = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(compressed); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - Tcl_GetByteArrayFromObj(compressed, &size); - - if (SHOULD_COMPRESS(p, origSize, size)) { - CookfsWriteCompression(p, COOKFS_COMPRESSION_CUSTOM); - Tcl_WriteObj(p->fileChannel, compressed); - } else { - size = -1; - } - Tcl_DecrRefCount(compressed); - return size; -} - #ifdef USE_VFS_COMMANDS_FOR_ZIP /* @@ -1283,95 +839,3 @@ static int CookfsCheckCommandExists(Tcl_Interp *interp, const char *commandName) #endif -/* - *---------------------------------------------------------------------- - * - * CookfsRunAsyncCompressCommand -- - * - * Helper to run the async compress command with specified - * arguments in interp from Cookfs_Pages object - * - * Reverts Tcl interpreter's result to one before - * this function was called. - * - * Results: - * result from command invocation or NULL in case of failure - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -static Tcl_Obj *CookfsRunAsyncCompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg) { - Tcl_Obj *prevResult, *data; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 3] = cmd; - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2] = Tcl_NewIntObj(idx); - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1] = arg; - Tcl_IncrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); - Tcl_IncrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); - if (Tcl_EvalObjv(p->interp, p->asyncCompressCommandLen, p->asyncCompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); - Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); - return NULL; - } - Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2]); - Tcl_DecrRefCount(p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1]); - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 3] = NULL; - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 2] = NULL; - p->asyncCompressCommandPtr[p->asyncCompressCommandLen - 1] = NULL; - data = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(data); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - return data; -} - - -/* - *---------------------------------------------------------------------- - * - * CookfsRunAsyncDecompressCommand -- - * - * Helper to run the async decompress command with specified - * arguments in interp from Cookfs_Pages object - * - * Reverts Tcl interpreter's result to one before - * this function was called. - * - * Results: - * result from command invocation or NULL in case of failure - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -static Tcl_Obj *CookfsRunAsyncDecompressCommand(Cookfs_Pages *p, Tcl_Obj *cmd, int idx, Tcl_Obj *arg) { - Tcl_Obj *prevResult, *data; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 3] = cmd; - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2] = Tcl_NewIntObj(idx); - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1] = arg; - Tcl_IncrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); - Tcl_IncrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); - if (Tcl_EvalObjv(p->interp, p->asyncDecompressCommandLen, p->asyncDecompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); - Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); - return NULL; - } - Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2]); - Tcl_DecrRefCount(p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1]); - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 3] = NULL; - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 2] = NULL; - p->asyncDecompressCommandPtr[p->asyncDecompressCommandLen - 1] = NULL; - data = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(data); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - return data; -} diff --git a/generic/pagesCompr.h b/generic/pagesCompr.h index e20bd35..4ab55a7 100644 --- a/generic/pagesCompr.h +++ b/generic/pagesCompr.h @@ -20,28 +20,27 @@ const char *Cookfs_CompressionGetName(int compression); -void CookfsWriteCompression(Cookfs_Pages *p, int compression); - int Cookfs_CompressionFromObj(Tcl_Interp *interp, Tcl_Obj *obj, int *compressionPtr, int *compressionLevelPtr); void Cookfs_PagesInitCompr(Cookfs_Pages *rc); void Cookfs_PagesFiniCompr(Cookfs_Pages *rc); -int Cookfs_SetCompressCommands(Cookfs_Pages *p, Tcl_Obj *compressCommand, Tcl_Obj *decompressCommand, Tcl_Obj *asyncCompressCommand, Tcl_Obj *asyncDecompressCommand); +int Cookfs_SetCompressCommands(Cookfs_Pages *p, + Tcl_Obj *compressCommand, + Tcl_Obj *decompressCommand, + Tcl_Obj *asyncCompressCommand, + Tcl_Obj *asyncDecompressCommand); void Cookfs_SeekToPage(Cookfs_Pages *p, int idx); -int Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, int origSize, Tcl_Obj *compressedData); -int Cookfs_WritePageObj(Cookfs_Pages *p, int idx, Cookfs_PageObj data, Tcl_Obj *compressedData); + +Tcl_Size Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, + Tcl_Size sizeUncompressed, Cookfs_PageObj pgCompressed); +int Cookfs_WritePageObj(Cookfs_Pages *p, int idx, Cookfs_PageObj data); int Cookfs_WriteTclObj(Cookfs_Pages *p, int idx, Tcl_Obj *data, Tcl_Obj *compressedData); -Cookfs_PageObj Cookfs_ReadPage(Cookfs_Pages *p, int idx, int size, int decompress, int compressionType, Tcl_Obj **err); -Cookfs_PageObj Cookfs_AsyncPageGet(Cookfs_Pages *p, int idx); -int Cookfs_AsyncPageAdd(Cookfs_Pages *p, int idx, unsigned char *bytes, int dataSize); -void Cookfs_AsyncDecompressWaitIfLoading(Cookfs_Pages *p, int idx); -int Cookfs_AsyncCompressWait(Cookfs_Pages *p, int require); -void Cookfs_AsyncCompressFinalize(Cookfs_Pages *p); -int Cookfs_AsyncPagePreload(Cookfs_Pages *p, int idx); -int Cookfs_AsyncDecompressWait(Cookfs_Pages *p, int idx, int require); -void Cookfs_AsyncDecompressFinalize(Cookfs_Pages *p); + +Cookfs_PageObj Cookfs_ReadPage(Cookfs_Pages *p, int idx, int compression, + int sizeCompressed, int sizeUncompressed, unsigned char *md5hash, + int decompress, Tcl_Obj **err); #endif /* COOKFS_PAGESCOMPR_H */ diff --git a/generic/pagesComprBrotli.c b/generic/pagesComprBrotli.c index 1d8bf44..44d0cae 100644 --- a/generic/pagesComprBrotli.c +++ b/generic/pagesComprBrotli.c @@ -15,26 +15,25 @@ #include "pagesCompr.h" #include "pagesComprBrotli.h" -int CookfsWritePageBrotli(Cookfs_Pages *p, unsigned char *bytes, int origSize) { +Cookfs_PageObj CookfsWritePageBrotli(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ - CookfsLog(printf("CookfsWritePageBrotli: want to compress %d bytes", + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", origSize)); - Tcl_Obj *destObj = Tcl_NewByteArrayObj(NULL, 0); - Tcl_IncrRefCount(destObj); - size_t resultSize = BrotliEncoderMaxCompressedSize((size_t) origSize); if (!resultSize) { - CookfsLog(printf("CookfsWritePageBrotli:" - " BrotliEncoderMaxCompressedSize failed")); - resultSize = 0; - goto done; + CookfsLog2(printf("ERROR: BrotliEncoderMaxCompressedSize failed")); + return NULL; + } + Cookfs_PageObj rc = Cookfs_PageObjAlloc(resultSize); + if (rc == NULL) { + CookfsLog2(printf("ERROR: could not alloc output buffer")); + return NULL; } - // Allocate additional 4 bytes for uncompressed page size - unsigned char *dest = Tcl_SetByteArrayLength(destObj, resultSize + 4); - - int level = p->fileCompressionLevel; + int level = p->currentCompressionLevel; if (level < 0) { level = 0; @@ -42,106 +41,58 @@ int CookfsWritePageBrotli(Cookfs_Pages *p, unsigned char *bytes, int origSize) { level = 11; } - CookfsLog(printf("CookfsWritePageBrotli: call BrotliEncoderCompress()" - " level %d ...", level)); + CookfsLog2(printf("call BrotliEncoderCompress() level %d ...", level)); // Leave 4 bytes in the buffer for uncompressed page size BROTLI_BOOL res = BrotliEncoderCompress(level, BROTLI_DEFAULT_WINDOW, - BROTLI_DEFAULT_MODE, (size_t)origSize, bytes, &resultSize, dest + 4); + BROTLI_DEFAULT_MODE, (size_t)origSize, bytes, &resultSize, rc); if (res != BROTLI_TRUE) { - CookfsLog(printf("CookfsWritePageBrotli: call got ERROR")); - resultSize = 0; - goto done; - } - - // Add 4 bytes to resultSize, which is the size of the uncompressed page - resultSize += 4; - CookfsLog(printf("CookfsWritePageBrotli: got encoded size: %zu", - resultSize)); - if (SHOULD_COMPRESS(p, (unsigned int)origSize, resultSize)) { - CookfsLog(printf("CookfsWritePageBrotli: write page")); - Tcl_SetByteArrayLength(destObj, resultSize); - // Write the original size to the beginning of the buffer - Cookfs_Int2Binary(&origSize, dest, 1); - CookfsWriteCompression(p, COOKFS_COMPRESSION_BROTLI); - Tcl_WriteObj(p->fileChannel, destObj); - } else { - CookfsLog(printf("CookfsWritePageBrotli: compression is inefficient")); - resultSize = 0; - } - -done: - Tcl_DecrRefCount(destObj); - if (resultSize) { - return resultSize; - } else { - return -1; + CookfsLog2(printf("call got ERROR")); + Cookfs_PageObjBounceRefCount(rc); + return NULL; } -} -Cookfs_PageObj CookfsReadPageBrotli(Cookfs_Pages *p, int size, Tcl_Obj **err) { - UNUSED(err); + CookfsLog2(printf("got encoded size: %zu", resultSize)); + Cookfs_PageObjSetSize(rc, resultSize); - CookfsLog(printf("CookfsReadPageBrotli: start. Want to read %d bytes.", size)); + return rc; - Tcl_Obj *data = Tcl_NewObj(); - Tcl_IncrRefCount(data); - int count = Tcl_ReadChars(p->fileChannel, data, size, 0); +} - if (count != size) { - CookfsLog(printf("CookfsReadPageBrotli: failed to read, got only %d" - " bytes", count)); - Tcl_DecrRefCount(data); - return NULL; - } +int CookfsReadPageBrotli(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ - unsigned char *source = Tcl_GetByteArrayFromObj(data, NULL); - if (source == NULL) { - CookfsLog(printf("CookfsReadPageBrotli: Tcl_GetByteArrayFromObj failed")); - Tcl_DecrRefCount(data); - return NULL; - } + UNUSED(err); + UNUSED(p); - int destSize; - Cookfs_Binary2Int(source, &destSize, 1); + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); - Cookfs_PageObj destObj = Cookfs_PageObjAlloc(destSize); - if (destObj == NULL) { - CookfsLog(printf("CookfsReadPageBrotli: ERROR: failed to alloc")); - Tcl_DecrRefCount(data); - return NULL; - } + size_t resultSize = (size_t)sizeUncompressed; - CookfsLog(printf("CookfsReadPageBrotli: uncompressed size=%d from %d", - destSize, size)); - - CookfsLog(printf("CookfsReadPageBrotli: call BrotliDecoderDecompress()" - " ...")); - size_t resultSize = destSize; - BrotliDecoderResult res = BrotliDecoderDecompress(size - 4, source + 4, - &resultSize, destObj); - Tcl_DecrRefCount(data); + CookfsLog2(printf("call BrotliDecoderDecompress() ...")); + BrotliDecoderResult res = BrotliDecoderDecompress(sizeCompressed, + dataCompressed, &resultSize, dataUncompressed); if (res != BROTLI_DECODER_RESULT_SUCCESS) { - CookfsLog(printf("CookfsReadPageBrotli: call got ERROR")); - goto unpackError; + CookfsLog(printf("result: ERROR")); + return TCL_ERROR; } - CookfsLog(printf("CookfsReadPageBrotli: got %zu bytes", resultSize)); + CookfsLog2(printf("got %zu bytes", resultSize)); - if (resultSize != (unsigned int)destSize) { - CookfsLog(printf("CookfsReadPageBrotli: ERROR: result size doesn't" - " match original size")); - goto unpackError; + if (resultSize != (size_t)sizeUncompressed) { + CookfsLog2(printf("ERROR: result size doesn't match original size")); + return TCL_ERROR; } - return destObj; - -unpackError: + CookfsLog2(printf("return: ok")); + return TCL_OK; - Cookfs_PageObjIncrRefCount(destObj); - Cookfs_PageObjDecrRefCount(destObj); - return NULL; } diff --git a/generic/pagesComprBrotli.h b/generic/pagesComprBrotli.h index b206aff..d5baded 100644 --- a/generic/pagesComprBrotli.h +++ b/generic/pagesComprBrotli.h @@ -9,8 +9,12 @@ #ifndef COOKFS_PAGESCOMPRBROTLI_H #define COOKFS_PAGESCOMPRBROTLI_H 1 -Cookfs_PageObj CookfsReadPageBrotli(Cookfs_Pages *p, int size, Tcl_Obj **err); -int CookfsWritePageBrotli(Cookfs_Pages *p, unsigned char *bytes, int origSize); +int CookfsReadPageBrotli(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageBrotli(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); #define COOKFS_DEFAULT_COMPRESSION_LEVEL_BROTLI 6 diff --git a/generic/pagesComprBz2.c b/generic/pagesComprBz2.c index 52f2eae..22ecd2c 100644 --- a/generic/pagesComprBz2.c +++ b/generic/pagesComprBz2.c @@ -3,8 +3,6 @@ * * Provides bzip2 functions for pages compression * - * (c) 2010-2011 Wojciech Kocjan, Pawel Salawa - * (c) 2011-2014 Wojciech Kocjan * (c) 2024 Konstantin Kushnir */ @@ -15,123 +13,77 @@ #include "pagesCompr.h" #include "pagesComprBz2.h" -/* - *---------------------------------------------------------------------- - * - * CookfsReadPageBz2 -- - * - * Read bzip2 compressed page - * - * Results: - * Binary data as Tcl_Obj - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ +int CookfsReadPageBz2(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ -Cookfs_PageObj CookfsReadPageBz2(Cookfs_Pages *p, int size, Tcl_Obj **err) { UNUSED(err); - int destSize; - int count; - unsigned char *source; - Tcl_Obj *data; - - data = Tcl_NewObj(); - Tcl_IncrRefCount(data); - count = Tcl_ReadChars(p->fileChannel, data, size, 0); - - if (count != size) { - Tcl_DecrRefCount(data); - return NULL; - } + UNUSED(p); - source = Tcl_GetByteArrayFromObj(data, NULL); - if (source == NULL) { - Tcl_DecrRefCount(data); - CookfsLog(printf("CookfsReadPageBz2: Tcl_GetByteArrayFromObj failed")) - return NULL; - } + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); + + unsigned int resultSize = (unsigned int)sizeUncompressed; + + CookfsLog2(printf("call BZ2_bzBuffToBuffDecompress() ...")); + int res = BZ2_bzBuffToBuffDecompress((char *)dataUncompressed, &resultSize, + (char *)dataCompressed, (unsigned int)sizeCompressed, 0, 0); - Cookfs_Binary2Int(source, &destSize, 1); - Cookfs_PageObj destObj = Cookfs_PageObjAlloc(destSize); - if (destObj == NULL) { - Tcl_DecrRefCount(data); - CookfsLog(printf("CookfsReadPageBz2: failed to alloc")) - return NULL; + if (res != BZ_OK) { + CookfsLog(printf("result: ERROR")); + return TCL_ERROR; } - CookfsLog(printf("CookfsReadPageBz2: uncompressed size=%d from %d", destSize, size)) + CookfsLog2(printf("got %u bytes", resultSize)); - if (BZ2_bzBuffToBuffDecompress((char *) destObj, (unsigned int *) &destSize, (char *) source + 4, (unsigned int) size - 4, 0, 0) != BZ_OK) { - Tcl_DecrRefCount(data); - Cookfs_PageObjIncrRefCount(destObj); - Cookfs_PageObjDecrRefCount(destObj); - CookfsLog(printf("CookfsReadPageBz2: BZ2_bzBuffToBuffDecompress failed")) - return NULL; + if (resultSize != (unsigned int)sizeUncompressed) { + CookfsLog2(printf("ERROR: result size doesn't match original size")); + return TCL_ERROR; } - Tcl_DecrRefCount(data); + CookfsLog2(printf("return: ok")); + return TCL_OK; - return destObj; } -/* - *---------------------------------------------------------------------- - * - * CookfsWritePageBz2 -- - * - * Write page using bzip2 compression - * - * Results: - * Number of bytes written; -1 in case compression failed or - * compressing was not efficient enough (see SHOULD_COMPRESS macro) - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ +Cookfs_PageObj CookfsWritePageBz2(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ -int CookfsWritePageBz2(Cookfs_Pages *p, unsigned char *bytes, int origSize) { - int size; - unsigned char *source; - unsigned char *dest; - Tcl_Obj *destObj; + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", + origSize)); - int level = p->fileCompressionLevel; + unsigned int resultSize = (unsigned int)origSize * 2 + 1024; + Cookfs_PageObj rc = Cookfs_PageObjAlloc(resultSize); + if (rc == NULL) { + CookfsLog2(printf("ERROR: could not alloc output buffer")); + return NULL; + } + + int level = p->currentCompressionLevel; if (level < 1) { level = 1; } else if (level >= 255) { level = 9; } - source = bytes; - destObj = Tcl_NewByteArrayObj(NULL, 0); - size = origSize * 2 + 1024; - Tcl_SetByteArrayLength(destObj, size + 4); - dest = Tcl_GetByteArrayFromObj(destObj, NULL); + CookfsLog2(printf("call BZ2_bzBuffToBuffCompress() level %d ...", level)); + int res = BZ2_bzBuffToBuffCompress((char *)rc, &resultSize, (char *)bytes, + (unsigned int)origSize, level, 0, 0); - Cookfs_Int2Binary(&origSize, (unsigned char *) dest, 1); - if (BZ2_bzBuffToBuffCompress((char *) (dest + 4), (unsigned int *) &size, (char *) source, (unsigned int) origSize, level, 0, 0) != BZ_OK) { - CookfsLog(printf("Cookfs_WritePage: BZ2_bzBuffToBuffCompress failed")) - return -1; + if (res != BZ_OK) { + CookfsLog2(printf("call got ERROR")); + Cookfs_PageObjBounceRefCount(rc); + return NULL; } - CookfsLog(printf("Cookfs_WritePage: size=%d (to %d)", origSize, size)) - size += 4; - Tcl_SetByteArrayLength(destObj, size); + CookfsLog2(printf("got encoded size: %u", resultSize)); + Cookfs_PageObjSetSize(rc, resultSize); - Tcl_IncrRefCount(destObj); - if (SHOULD_COMPRESS(p, origSize, size)) { - CookfsWriteCompression(p, COOKFS_COMPRESSION_BZ2); - Tcl_WriteObj(p->fileChannel, destObj); - } else { - size = -1; - } - Tcl_DecrRefCount(destObj); + return rc; - return size; } diff --git a/generic/pagesComprBz2.h b/generic/pagesComprBz2.h index cf914e3..67f0688 100644 --- a/generic/pagesComprBz2.h +++ b/generic/pagesComprBz2.h @@ -13,7 +13,11 @@ #define COOKFS_DEFAULT_COMPRESSION_LEVEL_BZ2 9 -Cookfs_PageObj CookfsReadPageBz2(Cookfs_Pages *p, int size, Tcl_Obj **err); -int CookfsWritePageBz2(Cookfs_Pages *p, unsigned char *bytes, int origSize); +int CookfsReadPageBz2(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageBz2(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); #endif /* COOKFS_PAGESCOMPRBZ2_H */ diff --git a/generic/pagesComprCustom.c b/generic/pagesComprCustom.c new file mode 100644 index 0000000..8d3ff0b --- /dev/null +++ b/generic/pagesComprCustom.c @@ -0,0 +1,120 @@ +/* + * pagesComprCustom.c + * + * Provides functions for custom pages compression + * + * (c) 2024 Konstantin Kushnir + */ + +#include "cookfs.h" +#include "pages.h" +#include "pagesInt.h" +#include "pagesCompr.h" +#include "pagesComprCustom.h" + +int CookfsReadPageCustom(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ + + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); + + if (p->decompressCommandPtr == NULL) { + CookfsLog2(printf("ERROR: No decompresscommand specified")); + SET_ERROR_STR("No decompresscommand specified"); + return TCL_ERROR; + } + + int res; + + Tcl_Obj *sourceObj = Tcl_NewByteArrayObj(dataCompressed, sizeCompressed); + Tcl_IncrRefCount(sourceObj); + + CookfsLog2(printf("p = %p", (void *)p)); + p->decompressCommandPtr[p->decompressCommandLen - 1] = sourceObj; + + CookfsLog2(printf("call custom decompression command ...")); + res = Tcl_EvalObjv(p->interp, p->decompressCommandLen, + p->decompressCommandPtr, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL); + + p->decompressCommandPtr[p->decompressCommandLen - 1] = NULL; + Tcl_DecrRefCount(sourceObj); + + if (res != TCL_OK) { + CookfsLog2(printf("return: ERROR")); + return TCL_ERROR; + } + + Tcl_Obj *destObj = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(destObj); + Tcl_ResetResult(p->interp); + + Tcl_Size destObjSize; + unsigned char *destStr = Tcl_GetByteArrayFromObj(destObj, &destObjSize); + + if (destObjSize != sizeUncompressed) { + CookfsLog2(printf("ERROR: result size doesn't match original size")); + Tcl_DecrRefCount(destObj); + return TCL_ERROR; + } + + CookfsLog2(printf("copy data to the output buffer")); + memcpy(dataUncompressed, destStr, destObjSize); + + Tcl_DecrRefCount(destObj); + + CookfsLog2(printf("return: ok")); + return TCL_OK; + +} + +Cookfs_PageObj CookfsWritePageCustom(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ + + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", + origSize)); + + if (p->compressCommandPtr == NULL) { + CookfsLog2(printf("ERROR: No compresscommand specified")); + return NULL; + } + + int res; + + Tcl_Obj *inputData = Tcl_NewByteArrayObj(bytes, origSize); + Tcl_IncrRefCount(inputData); + + p->compressCommandPtr[p->compressCommandLen - 1] = inputData; + CookfsLog2(printf("call custom compression command ...")); + res = Tcl_EvalObjv(p->interp, p->compressCommandLen, p->compressCommandPtr, + TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL); + + p->compressCommandPtr[p->compressCommandLen - 1] = NULL; + Tcl_DecrRefCount(inputData); + + if (res != TCL_OK) { + CookfsLog2(printf("return: ERROR")); + return NULL; + } + + Tcl_Obj *outputObj = Tcl_GetObjResult(p->interp); + Tcl_IncrRefCount(outputObj); + Tcl_ResetResult(p->interp); + + Cookfs_PageObj rc = Cookfs_PageObjNewFromByteArray(outputObj); + Tcl_DecrRefCount(outputObj); + if (rc == NULL) { + CookfsLog2(printf("return: ERROR (failed to alloc)")); + return NULL; + } + + CookfsLog2(printf("got encoded size: %" TCL_SIZE_MODIFIER "d", + Cookfs_PageObjSize(rc))); + + return rc; + +} diff --git a/generic/pagesComprCustom.h b/generic/pagesComprCustom.h new file mode 100644 index 0000000..daa94d3 --- /dev/null +++ b/generic/pagesComprCustom.h @@ -0,0 +1,21 @@ +/* + * pagesComprCustom.h + * + * Provides custom functions for pages compression + * + * (c) 2010-2011 Wojciech Kocjan, Pawel Salawa + * (c) 2011-2014 Wojciech Kocjan + * (c) 2024 Konstantin Kushnir + */ + +#ifndef COOKFS_PAGESCOMPRCUSTOM_H +#define COOKFS_PAGESCOMPRCUSTOM_H 1 + +int CookfsReadPageCustom(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageCustom(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); + +#endif /* COOKFS_PAGESCOMPRCUSTOM_H */ diff --git a/generic/pagesComprLzma.c b/generic/pagesComprLzma.c index 7664222..0073f97 100644 --- a/generic/pagesComprLzma.c +++ b/generic/pagesComprLzma.c @@ -30,41 +30,26 @@ const ISzAlloc g_CookfsLzmaAlloc = { CookfsLzmaFree }; -/* - *---------------------------------------------------------------------- - * - * CookfsWritePageLzma -- - * - * Write page using lzma compression - * - * Results: - * Number of bytes written; -1 in case compression failed. This function - * doesn't return -1 compressing was not efficient enough. - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -int CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, int origSize) { +Cookfs_PageObj CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ - CookfsLog(printf("CookfsWritePageLzma: want to compress %d bytes", + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", origSize)); - // Output size should be 4 (original size) + 5 (LZMA_PROPS_SIZE) - // + compressed data - // So, the minimum compressed size is 9 bytes + compressed data. - // Let's refuse to compress 16 bytes or less. - if (origSize <= 16) { - CookfsLog(printf("CookfsWritePageLzma: too few bytes for" - " compression")); - return -1; + // The destination buffer should contain lzma properties + compression + // data. + size_t resultSize = LZMA_PROPS_SIZE + (size_t)origSize + + (size_t)origSize / 1024 + 128; + Cookfs_PageObj rc = Cookfs_PageObjAlloc(resultSize); + if (rc == NULL) { + CookfsLog2(printf("ERROR: could not alloc output buffer")); + return NULL; } CLzmaEncProps props; LzmaEncProps_Init(&props); - props.level = p->fileCompressionLevel; + props.level = p->currentCompressionLevel; if (props.level < 0) { props.level = 0; } else if (props.level >= 255) { @@ -73,26 +58,15 @@ int CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, int origSize) { props.reduceSize = origSize; LzmaEncProps_Normalize(&props); - Tcl_Obj *destObj = Tcl_NewByteArrayObj(NULL, 0); - Tcl_IncrRefCount(destObj); - - // Let's allocate a destination buffer of the same size as the source - // buffer. In case of buffer overflow, lzma should return - // SZ_ERROR_OUTPUT_EOF. Then we'll know that the compression was - // ineffective. - - unsigned char *dest = Tcl_SetByteArrayLength(destObj, origSize); - - // The destination buffer will also contain the original size + lzma - // properties. Calculate here what buffer size should be passed to - // LzmaEncode() - size_t destLen = origSize - 4 - LZMA_PROPS_SIZE; + SizeT propsSize = LZMA_PROPS_SIZE; + // Reduce the size of the destination buffer by the size of the lzma + // properties. + resultSize -= LZMA_PROPS_SIZE; CookfsLog(printf("CookfsWritePageLzma: call LzmaEncode() level %d ...", props.level)); - SizeT propsSize = LZMA_PROPS_SIZE; - SRes res = LzmaEncode(&dest[4 + LZMA_PROPS_SIZE], &destLen, bytes, - origSize, &props, &dest[4], &propsSize, 0, NULL, &g_CookfsLzmaAlloc, - &g_CookfsLzmaAlloc); + SRes res = LzmaEncode(&rc[LZMA_PROPS_SIZE], &resultSize, bytes, origSize, + &props, rc, &propsSize, 0, NULL, &g_CookfsLzmaAlloc, &g_CookfsLzmaAlloc); + CookfsLog(printf("CookfsWritePageLzma: got: %s", (res == SZ_OK ? "SZ_OK" : (res == SZ_ERROR_MEM ? "SZ_ERROR_MEM" : @@ -102,95 +76,47 @@ int CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, int origSize) { (res == SZ_ERROR_PROGRESS ? "SZ_ERROR_PROGRESS" : (res == SZ_ERROR_THREAD ? "SZ_ERROR_THREAD" : "UNKNOWN"))))))))); - // Continue with the compression only when res == SZ_OK - if (res == SZ_OK) { - - // Increase output size by pre-defined bytes (origSize + lzma - // properties) - destLen += 4 + LZMA_PROPS_SIZE; - CookfsLog(printf("CookfsWritePageLzma: got encoded size: %zu", destLen)); - if (SHOULD_COMPRESS(p, (unsigned int)origSize, destLen)) { - // Write the original size to the beginning of the buffer - CookfsLog(printf("CookfsWritePageLzma: write page")); - Tcl_SetByteArrayLength(destObj, destLen); - Cookfs_Int2Binary(&origSize, dest, 1); - CookfsWriteCompression(p, COOKFS_COMPRESSION_LZMA); - Tcl_WriteObj(p->fileChannel, destObj); - } else { - res = SZ_ERROR_OUTPUT_EOF; - } - + if (res != SZ_OK) { + CookfsLog2(printf("return: ERROR")); + Cookfs_PageObjBounceRefCount(rc); + return NULL; } - Tcl_DecrRefCount(destObj); - if (res == SZ_OK) { - return destLen; - } else { - return -1; - } -} + // Increase the size of the destination buffer by the size of the lzma + // properties. + resultSize += LZMA_PROPS_SIZE; -/* - *---------------------------------------------------------------------- - * - * CookfsReadPageLzma -- - * - * Read lzma compressed page - * - * Results: - * Binary data as Tcl_Obj - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ - -Cookfs_PageObj CookfsReadPageLzma(Cookfs_Pages *p, int size, Tcl_Obj **err) { - UNUSED(err); + CookfsLog2(printf("got encoded size: %zu", resultSize)); + Cookfs_PageObjSetSize(rc, resultSize); - CookfsLog(printf("CookfsReadPageLzma: start. Want to read %d bytes.", size)); + return rc; - Tcl_Obj *data = Tcl_NewObj(); - Tcl_IncrRefCount(data); - int count = Tcl_ReadChars(p->fileChannel, data, size, 0); - - if (count != size) { - CookfsLog(printf("CookfsReadPageLzma: failed to read, got only %d" - " bytes", count)); - Tcl_DecrRefCount(data); - return NULL; - } - - unsigned char *source = Tcl_GetByteArrayFromObj(data, NULL); - if (source == NULL) { - CookfsLog(printf("CookfsReadPageLzma: Tcl_GetByteArrayFromObj failed")); - Tcl_DecrRefCount(data); - return NULL; - } +} - int destSize; - Cookfs_Binary2Int(source, &destSize, 1); +int CookfsReadPageLzma(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ - Cookfs_PageObj destObj = Cookfs_PageObjAlloc(destSize); - if (destObj == NULL) { - CookfsLog(printf("CookfsReadPageLzma: ERROR: failed to alloc")); - Tcl_DecrRefCount(data); - return NULL; - } + UNUSED(err); + UNUSED(p); - CookfsLog(printf("CookfsReadPageLzma: uncompressed size=%d from %d", - destSize, size)); + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); - CookfsLog(printf("CookfsReadPageLzma: call LzmaDecode() ...")); - SizeT destSizeResult = destSize; + SizeT destSizeResult = (SizeT)sizeUncompressed; ELzmaStatus status; // Source buffer also contains the original size and lzma properties - SizeT srcLen = size - 4 - LZMA_PROPS_SIZE; - SRes res = LzmaDecode(destObj, &destSizeResult, - &source[4 + LZMA_PROPS_SIZE], &srcLen, &source[4], LZMA_PROPS_SIZE, - LZMA_FINISH_END, &status, &g_CookfsLzmaAlloc); - CookfsLog(printf("CookfsReadPageLzma: result: %s; status: %s", + SizeT srcLen = (SizeT)sizeCompressed - LZMA_PROPS_SIZE; + + CookfsLog2(printf("call LzmaDecode() ...")); + SRes res = LzmaDecode(dataUncompressed, &destSizeResult, + &dataCompressed[LZMA_PROPS_SIZE], &srcLen, dataCompressed, + LZMA_PROPS_SIZE, LZMA_FINISH_END, &status, &g_CookfsLzmaAlloc); + + CookfsLog2(printf("result: %s; status: %s", (res == SZ_OK ? "SZ_OK" : (res == SZ_ERROR_DATA ? "SZ_ERROR_DATA" : (res == SZ_ERROR_MEM ? "SZ_ERROR_MEM" : @@ -202,22 +128,20 @@ Cookfs_PageObj CookfsReadPageLzma(Cookfs_Pages *p, int size, Tcl_Obj **err) { (status == LZMA_STATUS_NOT_FINISHED ? "NOT_FINISHED" : (status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK ? "MAYBE_FINISHED_WITHOUT_MARK" : "UNKNOWN-OK"))) : "UNKNOWN"))); - CookfsLog(printf("CookfsReadPageLzma: consumed bytes %zu got bytes %zu", - srcLen, destSizeResult)); - Tcl_DecrRefCount(data); + CookfsLog2(printf("consumed bytes %zu got bytes %zu", srcLen, destSizeResult)); - if ((res != SZ_OK) || (destSizeResult != (unsigned int)destSize) || - (srcLen != ((unsigned int)size - 4 - LZMA_PROPS_SIZE)) || + if ((res != SZ_OK) || (destSizeResult != (unsigned int)sizeUncompressed) || + (srcLen != ((unsigned int)sizeCompressed - LZMA_PROPS_SIZE)) || ((status != LZMA_STATUS_FINISHED_WITH_MARK) && (status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK))) { - CookfsLog(printf("CookfsReadPageLzma: failed")) - Cookfs_PageObjIncrRefCount(destObj); - Cookfs_PageObjDecrRefCount(destObj); - return NULL; + CookfsLog2(printf("return: ERROR")); + return TCL_ERROR; } - return destObj; + CookfsLog2(printf("return: ok")); + return TCL_OK; + } diff --git a/generic/pagesComprLzma.h b/generic/pagesComprLzma.h index ebbc5d0..e192857 100644 --- a/generic/pagesComprLzma.h +++ b/generic/pagesComprLzma.h @@ -11,7 +11,11 @@ #define COOKFS_DEFAULT_COMPRESSION_LEVEL_LZMA 5 -Cookfs_PageObj CookfsReadPageLzma(Cookfs_Pages *p, int size, Tcl_Obj **err); -int CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, int origSize); +int CookfsReadPageLzma(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageLzma(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); #endif /* COOKFS_PAGESCOMPRBZ2_H */ diff --git a/generic/pagesComprZlib.c b/generic/pagesComprZlib.c index 13ece2b..0fa8d8d 100644 --- a/generic/pagesComprZlib.c +++ b/generic/pagesComprZlib.c @@ -3,8 +3,6 @@ * * Provides zlib functions for pages compression * - * (c) 2010-2011 Wojciech Kocjan, Pawel Salawa - * (c) 2011-2014 Wojciech Kocjan * (c) 2024 Konstantin Kushnir */ @@ -14,206 +12,147 @@ #include "pagesCompr.h" #include "pagesComprZlib.h" -/* - *---------------------------------------------------------------------- - * - * CookfsReadPageZlib -- - * - * Read zlib compressed page - * - * Results: - * Binary data as Tcl_Obj - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ +int CookfsReadPageZlib(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ -Cookfs_PageObj CookfsReadPageZlib(Cookfs_Pages *p, int size, Tcl_Obj **err) { UNUSED(err); + UNUSED(p); + + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); + #ifdef USE_ZLIB_TCL86 /* use Tcl 8.6 API for decompression */ - Tcl_Obj *data; - Tcl_Obj *cobj; + int res; Tcl_ZlibStream zshandle; - int count; - if (Tcl_ZlibStreamInit(NULL, TCL_ZLIB_STREAM_INFLATE, TCL_ZLIB_FORMAT_RAW, 9, NULL, &zshandle) != TCL_OK) { - CookfsLog(printf("Unable to initialize zlib")) - return NULL; - } - data = Tcl_NewObj(); - Tcl_IncrRefCount(data); - count = Tcl_ReadChars(p->fileChannel, data, size, 0); - - CookfsLog(printf("Reading - %d vs %d", count, size)) - if (count != size) { - CookfsLog(printf("Unable to read - %d != %d", count, size)) - Tcl_DecrRefCount(data); - return NULL; + CookfsLog2(printf("initialize zlib handle")) + res = Tcl_ZlibStreamInit(NULL, TCL_ZLIB_STREAM_INFLATE, + TCL_ZLIB_FORMAT_RAW, 9, NULL, &zshandle); + if (res != TCL_OK) { + CookfsLog2(printf("Unable to initialize zlib")); + return TCL_ERROR; } - CookfsLog(printf("Writing")) - /* write compressed information */ - if (Tcl_ZlibStreamPut(zshandle, data, TCL_ZLIB_FINALIZE) != TCL_OK) { - CookfsLog(printf("Unable to decompress - writing")) - Tcl_ZlibStreamClose(zshandle); - Tcl_DecrRefCount(data); - return NULL; + Tcl_Obj *sourceObj = Tcl_NewByteArrayObj(dataCompressed, sizeCompressed); + Tcl_IncrRefCount(sourceObj); + + CookfsLog2(printf("call Tcl_ZlibStreamPut() ...")); + res = Tcl_ZlibStreamPut(zshandle, sourceObj, TCL_ZLIB_FINALIZE); + Tcl_DecrRefCount(sourceObj); + + if (res != TCL_OK) { + CookfsLog2(printf("return: ERROR")); + Tcl_ZlibStreamClose(zshandle); + return TCL_ERROR; } - Tcl_DecrRefCount(data); - CookfsLog(printf("Reading")) - /* read resulting object */ - cobj = Tcl_NewObj(); - Tcl_IncrRefCount(cobj); + Tcl_Obj *destObj = Tcl_NewObj(); + Tcl_IncrRefCount(destObj); + + CookfsLog2(printf("reading from the handle...")); while (!Tcl_ZlibStreamEof(zshandle)) { - if (Tcl_ZlibStreamGet(zshandle, cobj, -1) != TCL_OK) { - Tcl_DecrRefCount(cobj); - Tcl_ZlibStreamClose(zshandle); - CookfsLog(printf("Unable to decompress - reading")) - return NULL; - } + if (Tcl_ZlibStreamGet(zshandle, destObj, -1) != TCL_OK) { + Tcl_DecrRefCount(destObj); + Tcl_ZlibStreamClose(zshandle); + CookfsLog2(printf("return: ERROR (while reading)")); + return TCL_ERROR; + } } - Cookfs_PageObj rc = Cookfs_PageObjNewFromByteArray(cobj); - Tcl_DecrRefCount(cobj); + Tcl_ZlibStreamClose(zshandle); - CookfsLog(printf("Returning = [%s]", rc == NULL ? "NULL" : "SET")) - return rc; -#else - /* use vfs::zip command for decompression */ - Tcl_Obj *prevResult; - Tcl_Obj *compressed; - Tcl_Obj *data; - int count; - - compressed = Tcl_NewObj(); - Tcl_IncrRefCount(compressed); - count = Tcl_ReadChars(p->fileChannel, compressed, size, 0); - - CookfsLog(printf("Reading - %d vs %d", count, size)) - if (count != size) { - CookfsLog(printf("Unable to read - %d != %d", count, size)) - Tcl_DecrRefCount(compressed); - return NULL; - } - p->zipCmdDecompress[p->zipCmdOffset] = compressed; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - if (Tcl_EvalObjv(p->interp, p->zipCmdLength, p->zipCmdDecompress, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - CookfsLog(printf("Unable to decompress")) - Tcl_DecrRefCount(compressed); - return NULL; + + Tcl_Size destObjSize; + unsigned char *destStr = Tcl_GetByteArrayFromObj(destObj, &destObjSize); + + if (destObjSize != sizeUncompressed) { + CookfsLog2(printf("ERROR: result size doesn't match original size")); + Tcl_DecrRefCount(destObj); + return TCL_ERROR; } - Tcl_DecrRefCount(compressed); - data = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(data); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - Cookfs_PageObj rc = Cookfs_PageObjNewFromByteArray(data); - Tcl_DecrRefCount(data); - return rc; -#endif + + CookfsLog2(printf("copy data to the output buffer")); + memcpy(dataUncompressed, destStr, destObjSize); + + Tcl_DecrRefCount(destObj); + + CookfsLog2(printf("return: ok")); + return TCL_OK; + +#else +#error Only Tcl8.6 with zlib is supported. +#endif /* USE_ZLIB_TCL86 */ } -/* - *---------------------------------------------------------------------- - * - * CookfsWritePageZlib -- - * - * Write page using zlib compression - * - * Results: - * Number of bytes written; -1 in case compression failed or - * compressing was not efficient enough (see SHOULD_COMPRESS macro) - * - * Side effects: - * None - * - *---------------------------------------------------------------------- - */ +Cookfs_PageObj CookfsWritePageZlib(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ + + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", + origSize)); -int CookfsWritePageZlib(Cookfs_Pages *p, unsigned char *bytes, int origSize) { #ifdef USE_ZLIB_TCL86 - Tcl_Size size = origSize; /* use Tcl 8.6 API for zlib compression */ - Tcl_Obj *cobj; Tcl_ZlibStream zshandle; + int res; - int level = p->fileCompressionLevel; + int level = p->currentCompressionLevel; if (level < 1) { level = 1; } else if (level >= 255) { level = 9; } - if (Tcl_ZlibStreamInit(NULL, TCL_ZLIB_STREAM_DEFLATE, TCL_ZLIB_FORMAT_RAW, level, NULL, &zshandle) != TCL_OK) { - CookfsLog(printf("Cookfs_WritePage: Tcl_ZlibStreamInit failed!")) - return -1; + CookfsLog2(printf("initialize zlib handle")) + res = Tcl_ZlibStreamInit(NULL, TCL_ZLIB_STREAM_DEFLATE, + TCL_ZLIB_FORMAT_RAW, level, NULL, &zshandle); + if (res != TCL_OK) { + CookfsLog2(printf("ERROR: Tcl_ZlibStreamInit failed")); + return NULL; } - Tcl_Obj *data = Tcl_NewByteArrayObj(bytes, origSize); - Tcl_IncrRefCount(data); - if (Tcl_ZlibStreamPut(zshandle, data, TCL_ZLIB_FINALIZE) != TCL_OK) { - Tcl_DecrRefCount(data); - Tcl_ZlibStreamClose(zshandle); - CookfsLog(printf("Cookfs_WritePage: Tcl_ZlibStreamPut failed")) - return -1; - } - Tcl_DecrRefCount(data); - - cobj = Tcl_NewObj(); - if (Tcl_ZlibStreamGet(zshandle, cobj, -1) != TCL_OK) { - Tcl_IncrRefCount(cobj); - Tcl_DecrRefCount(cobj); - Tcl_ZlibStreamClose(zshandle); - CookfsLog(printf("Cookfs_WritePage: Tcl_ZlibStreamGet failed")) - return -1; + Tcl_Obj *inputData = Tcl_NewByteArrayObj(bytes, origSize); + Tcl_IncrRefCount(inputData); + + CookfsLog2(printf("call Tcl_ZlibStreamPut() ...")); + res = Tcl_ZlibStreamPut(zshandle, inputData, TCL_ZLIB_FINALIZE); + Tcl_DecrRefCount(inputData); + if (res != TCL_OK) { + CookfsLog2(printf("ERROR: failed")); + Tcl_ZlibStreamClose(zshandle); + return NULL; } + + Tcl_Obj *outputObj = Tcl_NewObj(); + Tcl_IncrRefCount(outputObj); + + CookfsLog2(printf("reading from the handle...")); + res = Tcl_ZlibStreamGet(zshandle, outputObj, -1); Tcl_ZlibStreamClose(zshandle); - Tcl_IncrRefCount(cobj); - Tcl_GetByteArrayFromObj(cobj, &size); - - if (SHOULD_COMPRESS(p, origSize, size)) { - CookfsWriteCompression(p, COOKFS_COMPRESSION_ZLIB); - Tcl_WriteObj(p->fileChannel, cobj); - } else { - size = -1; + if (res != TCL_OK) { + CookfsLog2(printf("return: ERROR (while reading)")); + Tcl_DecrRefCount(outputObj); + return NULL; } - Tcl_DecrRefCount(cobj); -#else - int size = origSize; - /* use vfs::zip command for compression */ - Tcl_Obj *prevResult; - Tcl_Obj *compressed; - - Tcl_Obj data = Tcl_NewByteArrayObj(bytes, origSize); - Tcl_IncrRefCount(data); - p->zipCmdCompress[p->zipCmdOffset] = data; - prevResult = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(prevResult); - if (Tcl_EvalObjv(p->interp, p->zipCmdLength, p->zipCmdCompress, TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL) != TCL_OK) { - CookfsLog(printf("Unable to compress: %s", Tcl_GetString(Tcl_GetObjResult(p->interp)))); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(data); - return -1; - } - Tcl_DecrRefCount(data); - compressed = Tcl_GetObjResult(p->interp); - Tcl_IncrRefCount(compressed); - Tcl_SetObjResult(p->interp, prevResult); - Tcl_DecrRefCount(prevResult); - Tcl_GetByteArrayFromObj(compressed, &size); - - if (SHOULD_COMPRESS(p, origSize, size)) { - CookfsWriteCompression(p, COOKFS_COMPRESSION_ZLIB); - Tcl_WriteObj(p->fileChannel, compressed); - } else { - size = -1; + + Cookfs_PageObj rc = Cookfs_PageObjNewFromByteArray(outputObj); + Tcl_DecrRefCount(outputObj); + if (rc == NULL) { + CookfsLog2(printf("return: ERROR (failed to alloc)")); + return NULL; } - Tcl_DecrRefCount(compressed); -#endif - return size; + + CookfsLog2(printf("got encoded size: %" TCL_SIZE_MODIFIER "d", + Cookfs_PageObjSize(rc))); + + return rc; + +#else +#error Only Tcl8.6 with zlib is supported. +#endif /* USE_ZLIB_TCL86 */ } diff --git a/generic/pagesComprZlib.h b/generic/pagesComprZlib.h index b2deb50..3b848c5 100644 --- a/generic/pagesComprZlib.h +++ b/generic/pagesComprZlib.h @@ -13,7 +13,11 @@ #define COOKFS_DEFAULT_COMPRESSION_LEVEL_ZLIB 6 -Cookfs_PageObj CookfsReadPageZlib(Cookfs_Pages *p, int size, Tcl_Obj **err); -int CookfsWritePageZlib(Cookfs_Pages *p, unsigned char *bytes, int origSize); +int CookfsReadPageZlib(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageZlib(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); #endif /* COOKFS_PAGESCOMPRZLIB_H */ diff --git a/generic/pagesComprZstd.c b/generic/pagesComprZstd.c index 229a14f..e2b9c41 100644 --- a/generic/pagesComprZstd.c +++ b/generic/pagesComprZstd.c @@ -13,133 +13,79 @@ #include "pagesCompr.h" #include "pagesComprZstd.h" -int CookfsWritePageZstd(Cookfs_Pages *p, unsigned char *bytes, int origSize) { +Cookfs_PageObj CookfsWritePageZstd(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize) +{ - CookfsLog(printf("CookfsWritePageZstd: want to compress %d bytes", + CookfsLog2(printf("want to compress %" TCL_SIZE_MODIFIER "d bytes", origSize)); - size_t resultSize; - - Tcl_Obj *destObj = Tcl_NewByteArrayObj(NULL, 0); - Tcl_IncrRefCount(destObj); - - size_t destSize = ZSTD_compressBound((size_t) origSize); - if (ZSTD_isError(destSize)) { - CookfsLog(printf("CookfsWritePageZstd: ZSTD_compressBound()" - " failed with: %s", ZSTD_getErrorName(destSize))); - resultSize = 0; - goto done; + size_t resultSize = ZSTD_compressBound((size_t) origSize); + if (ZSTD_isError(resultSize)) { + CookfsLog2(printf("ZSTD_compressBound() failed with: %s", + ZSTD_getErrorName(resultSize))); + return NULL; + } + Cookfs_PageObj rc = Cookfs_PageObjAlloc(resultSize); + if (rc == NULL) { + CookfsLog2(printf("ERROR: could not alloc output buffer")); + return NULL; } - // Allocate additional 4 bytes for uncompressed page size - unsigned char *dest = Tcl_SetByteArrayLength(destObj, destSize + 4); - - int level = p->fileCompressionLevel; - + int level = p->currentCompressionLevel; if (level < 1) { level = 1; } else if (level > 22) { level = 22; } - CookfsLog(printf("CookfsWritePageZstd: call ZSTD_compress() level %d ...", - level)); - - // Leave 4 bytes in the buffer for uncompressed page size - resultSize = ZSTD_compress(dest + 4, destSize, bytes, origSize, level); + CookfsLog2(printf("call ZSTD_compress() level %d ...", level)); + resultSize = ZSTD_compress(rc, resultSize, bytes, origSize, level); if (ZSTD_isError(resultSize)) { - CookfsLog(printf("CookfsWritePageZstd: call got error: %s", - ZSTD_getErrorName(resultSize))); - resultSize = 0; - goto done; - } - - // Add 4 bytes to resultSize, which is the size of the uncompressed page - resultSize += 4; - CookfsLog(printf("CookfsWritePageZstd: got encoded size: %zu", resultSize)); - if (SHOULD_COMPRESS(p, (unsigned int)origSize, resultSize)) { - CookfsLog(printf("CookfsWritePageZstd: write page")); - Tcl_SetByteArrayLength(destObj, resultSize); - // Write the original size to the beginning of the buffer - Cookfs_Int2Binary(&origSize, dest, 1); - CookfsWriteCompression(p, COOKFS_COMPRESSION_ZSTD); - Tcl_WriteObj(p->fileChannel, destObj); - } else { - CookfsLog(printf("CookfsWritePageZstd: compression is inefficient")); - resultSize = 0; - } - -done: - Tcl_DecrRefCount(destObj); - if (resultSize) { - return resultSize; - } else { - return -1; + CookfsLog2(printf("got error: %s", ZSTD_getErrorName(resultSize))); + Cookfs_PageObjBounceRefCount(rc); + return NULL; } -} - -Cookfs_PageObj CookfsReadPageZstd(Cookfs_Pages *p, int size, Tcl_Obj **err) { - UNUSED(err); - CookfsLog(printf("CookfsReadPageZstd: start. Want to read %d bytes.", size)); + CookfsLog2(printf("got encoded size: %zu", resultSize)); + Cookfs_PageObjSetSize(rc, resultSize); - Tcl_Obj *data = Tcl_NewObj(); - Tcl_IncrRefCount(data); - int count = Tcl_ReadChars(p->fileChannel, data, size, 0); + return rc; - if (count != size) { - CookfsLog(printf("CookfsReadPageZstd: failed to read, got only %d" - " bytes", count)); - Tcl_DecrRefCount(data); - return NULL; - } - - unsigned char *source = Tcl_GetByteArrayFromObj(data, NULL); - if (source == NULL) { - CookfsLog(printf("CookfsReadPageZstd: Tcl_GetByteArrayFromObj failed")); - Tcl_DecrRefCount(data); - return NULL; - } +} - int destSize; - Cookfs_Binary2Int(source, &destSize, 1); +int CookfsReadPageZstd(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err) +{ - Cookfs_PageObj destObj = Cookfs_PageObjAlloc(destSize); - if (destObj == NULL) { - CookfsLog(printf("CookfsReadPageZstd: ERROR: failed to alloc")); - Tcl_DecrRefCount(data); - return NULL; - } + UNUSED(err); + UNUSED(p); - CookfsLog(printf("CookfsReadPageZstd: uncompressed size=%d from %d", - destSize, size)); + CookfsLog2(printf("input buffer %p (%" TCL_SIZE_MODIFIER "d bytes) ->" + " output buffer %p (%" TCL_SIZE_MODIFIER "d bytes)", + (void *)dataCompressed, sizeCompressed, + (void *)dataUncompressed, sizeUncompressed)); - CookfsLog(printf("CookfsReadPageZstd: call ZSTD_decompress() ...")); - size_t resultSize = ZSTD_decompress(destObj, destSize, source + 4, - size - 4); - Tcl_DecrRefCount(data); + CookfsLog2(printf("call ZSTD_decompress() ...")); + size_t resultSize = ZSTD_decompress(dataUncompressed, sizeUncompressed, + dataCompressed, sizeCompressed); if (ZSTD_isError(resultSize)) { - CookfsLog(printf("CookfsReadPageZstd: call got error: %s", - ZSTD_getErrorName(resultSize))); - goto unpackError; + CookfsLog2(printf("call got error: %s", ZSTD_getErrorName(resultSize))); + return TCL_ERROR; } - CookfsLog(printf("CookfsReadPageZstd: got %zu bytes", resultSize)); + CookfsLog2(printf("got %zu bytes", resultSize)); - if (resultSize != (unsigned int)destSize) { - CookfsLog(printf("CookfsReadPageZstd: ERROR: result size doesn't" - " match original size")); - goto unpackError; + if (resultSize != (size_t)sizeUncompressed) { + CookfsLog2(printf("ERROR: result size doesn't match original size")); + return TCL_ERROR; } - return destObj; - -unpackError: + CookfsLog2(printf("return: ok")); + return TCL_OK; - Cookfs_PageObjIncrRefCount(destObj); - Cookfs_PageObjDecrRefCount(destObj); - return NULL; } diff --git a/generic/pagesComprZstd.h b/generic/pagesComprZstd.h index 7bb1874..091c994 100644 --- a/generic/pagesComprZstd.h +++ b/generic/pagesComprZstd.h @@ -11,7 +11,11 @@ #define COOKFS_DEFAULT_COMPRESSION_LEVEL_ZSTD 3 -Cookfs_PageObj CookfsReadPageZstd(Cookfs_Pages *p, int size, Tcl_Obj **err); -int CookfsWritePageZstd(Cookfs_Pages *p, unsigned char *bytes, int origSize); +int CookfsReadPageZstd(Cookfs_Pages *p, unsigned char *dataCompressed, + Tcl_Size sizeCompressed, unsigned char *dataUncompressed, + Tcl_Size sizeUncompressed, Tcl_Obj **err); + +Cookfs_PageObj CookfsWritePageZstd(Cookfs_Pages *p, unsigned char *bytes, + Tcl_Size origSize); #endif /* COOKFS_PAGESCOMPRBZ2_H */ diff --git a/generic/pagesInt.h b/generic/pagesInt.h index 9f05b45..6cbb7e3 100644 --- a/generic/pagesInt.h +++ b/generic/pagesInt.h @@ -6,6 +6,8 @@ #ifndef COOKFS_PAGESINT_H #define COOKFS_PAGESINT_H 1 +#include "pgindex.h" + enum { COOKFS_LASTOP_UNKNOWN = 0, COOKFS_LASTOP_READ, @@ -17,6 +19,13 @@ enum { COOKFS_HASH_CRC32 }; +typedef enum { + COOKFS_ENCRYPT_NONE = 0, + COOKFS_ENCRYPT_FILE, + COOKFS_ENCRYPT_KEY, + COOKFS_ENCRYPT_KEY_INDEX +} Cookfs_EncryptType; + #define COOKFS_SIGNATURE_LENGTH 7 #define COOKFS_MAX_CACHE_PAGES 256 #define COOKFS_DEFAULT_CACHE_PAGES 4 @@ -65,8 +74,11 @@ struct _Cookfs_Pages { /* file */ int isAside; int fileReadOnly; - int fileCompression; - int fileCompressionLevel; + int baseCompression; + int baseCompressionLevel; + int currentCompression; + int currentCompressionLevel; + Cookfs_EncryptType encryption; unsigned char fileSignature[COOKFS_SIGNATURE_LENGTH]; int isFirstWrite; unsigned char fileStamp[COOKFS_SIGNATURE_LENGTH]; @@ -83,11 +95,7 @@ struct _Cookfs_Pages { /* pages */ Tcl_WideInt dataInitialOffset; - Tcl_WideInt dataAllPagesSize; - int dataNumPages; - int dataPagesDataSize; - int *dataPagesSize; - unsigned char *dataPagesMD5; + Cookfs_PgIndex *pagesIndex; Cookfs_PageObj dataIndex; int dataPagesIsAside; Cookfs_Pages *dataAsidePages; diff --git a/generic/pgindex.c b/generic/pgindex.c new file mode 100644 index 0000000..964d1de --- /dev/null +++ b/generic/pgindex.c @@ -0,0 +1,360 @@ +/* + * pgindex.c + * + * Provides functions creating and managing a Cookfs_PgIndex object + * + * (c) 2024 Konstantin Kushnir + */ + +#include "cookfs.h" +#include "pgindex.h" + +// How many entries are allocated at one time to reduce re-allocation +// of memory. +#define COOKFS_PGINDEX_ALLOC_SIZE 256 + +typedef struct Cookfs_PgIndexEntry { + int compression; + int compressionLevel; + int encryption; + unsigned char hashMD5[16]; + int sizeCompressed; + int sizeUncompressed; + Tcl_WideInt offset; +} Cookfs_PgIndexEntry; + +struct _Cookfs_PgIndex { + int pagesCount; + int pagesAllocated; + Cookfs_PgIndexEntry *data; +}; + +unsigned char *Cookfs_PgIndexGetHashMD5(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + return pge->hashMD5; +} + +int Cookfs_PgIndexGetCompression(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + return pge->compression; +} + +int Cookfs_PgIndexGetCompressionLevel(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + return pge->compressionLevel; +} + +int Cookfs_PgIndexGetSizeCompressed(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + return pge->sizeCompressed; +} + +int Cookfs_PgIndexGetSizeUncompressed(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + return pge->sizeUncompressed; +} + +Tcl_WideInt Cookfs_PgIndexGetEndOffset(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num < pgi->pagesCount); + Cookfs_PgIndexEntry *pge = pgi->data + num; + if (pge->sizeCompressed < 0) { + Tcl_Panic("could not calculate end offset of the page #%d because its" + " size is unknown", num); + } + return Cookfs_PgIndexGetStartOffset(pgi, num) + pge->sizeCompressed; +} + +Tcl_WideInt Cookfs_PgIndexGetStartOffset(Cookfs_PgIndex *pgi, int num) { + assert(num >= 0 && num <= pgi->pagesCount); + // If we want to get the offset of the first page, it is always 0. + // Also, this case will work if the total number of pages is zero and + // we want to get the 0th page offset. + if (num == 0) { + return 0; + } + Cookfs_PgIndexEntry *pge = pgi->data + num; + // If we want to get an offset beyond the available pages, that means we + // want to get the offset of the end of the last page. If we want to get + // the offset of an existing page, but it is not yet known, we will + // calculate it based on the previous page. + if (num >= pgi->pagesCount || pge->offset < 0) { + Tcl_WideInt offsetEndPrevious = Cookfs_PgIndexGetEndOffset(pgi, num - 1); + // If we want to get an offset beyond the available pages, just return + // the offset of the end of the last page. + if (num >= pgi->pagesCount) { + return offsetEndPrevious; + } + // If we are here, that means we are getting an offset of an existing + // page. We have to update it in the page entry. + pge->offset = offsetEndPrevious; + } + return pge->offset; +} + +void Cookfs_PgIndexSetCompression(Cookfs_PgIndex *pgi, int num, + int compression, int compressionLevel) +{ + Cookfs_PgIndexEntry *pge = pgi->data + num; + CookfsLog2(printf("[page#%d] set compression %d, compression level %d", + num, compression, compressionLevel)); + pge->compression = compression; + pge->compressionLevel = compressionLevel; +} + +void Cookfs_PgIndexSetSizeCompressed(Cookfs_PgIndex *pgi, int num, + int sizeCompressed) +{ + Cookfs_PgIndexEntry *pge = pgi->data + num; + CookfsLog2(printf("[page#%d] set compressed size %d", num, + sizeCompressed)); + pge->sizeCompressed = sizeCompressed; +} + +int Cookfs_PgIndexSearchByMD5(Cookfs_PgIndex *pgi, unsigned char *hashMD5, + int sizeUncompressed, int *index) +{ + int currentIndex = *index; + Cookfs_PgIndexEntry *pge = pgi->data + currentIndex; + while (currentIndex < pgi->pagesCount) { + if (pge->sizeUncompressed == sizeUncompressed && + memcmp(pge->hashMD5, hashMD5, 16) == 0) + { + *index = currentIndex; + return 1; + } + currentIndex++; + pge++; + } + return 0; +} + +Cookfs_PgIndex *Cookfs_PgIndexInit(unsigned int initialPagesCount) { + + CookfsLog2(printf("enter, want to allocate %u page entries", + initialPagesCount)); + + unsigned int allocPagesCount = initialPagesCount; + + if (allocPagesCount < COOKFS_PGINDEX_ALLOC_SIZE) { + allocPagesCount = COOKFS_PGINDEX_ALLOC_SIZE; + CookfsLog2(printf("extend the requested page entries count to %u", + allocPagesCount)); + } + + Cookfs_PgIndex *pgi = (Cookfs_PgIndex *)ckalloc(sizeof(struct _Cookfs_PgIndex)); + if (pgi == NULL) { + CookfsLog2(printf("ERROR: failed to alloc Cookfs_PgIndex")); + return NULL; + } + + pgi->data = (Cookfs_PgIndexEntry *)ckalloc(sizeof(Cookfs_PgIndexEntry) + * allocPagesCount); + if (pgi->data == NULL) { + CookfsLog2(printf("ERROR: failed to alloc Cookfs_PgIndex->data")); + ckfree(pgi); + return NULL; + } + + pgi->pagesCount = initialPagesCount; + pgi->pagesAllocated = allocPagesCount; + + CookfsLog2(printf("return: ok [%p]", (void *)pgi)); + return pgi; + +} + +void Cookfs_PgIndexFini(Cookfs_PgIndex *pgi) { + CookfsLog2(printf("release [%p]", (void *)pgi)); + ckfree(pgi->data); + ckfree(pgi); +} + +#define PRINTF_MD5_FORMAT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" +#define PRINTF_MD5_VAR(x) (x)[0] ,(x)[1], (x)[2], (x)[3], \ + (x)[4] ,(x)[5], (x)[6], (x)[7], \ + (x)[8] ,(x)[9], (x)[10],(x)[11], \ + (x)[12],(x)[13],(x)[14],(x)[15] + +int Cookfs_PgIndexAddPage(Cookfs_PgIndex *pgi, int compression, + int compressionLevel, int encryption, int sizeCompressed, + int sizeUncompressed, unsigned char *hashMD5) +{ + + CookfsLog2(printf("enter - compression: %d, level: %d, encryption: %d," + " sizeCompressed: %d, sizeUncompressed: %d, MD5[" PRINTF_MD5_FORMAT + "]", compression, compressionLevel, encryption, sizeCompressed, + sizeUncompressed, PRINTF_MD5_VAR(hashMD5))); + + if (pgi->pagesCount >= pgi->pagesAllocated) { + + CookfsLog2(printf("need to realloc from %d to %d", pgi->pagesAllocated, + pgi->pagesAllocated + COOKFS_PGINDEX_ALLOC_SIZE)); + + pgi->pagesAllocated += COOKFS_PGINDEX_ALLOC_SIZE; + + pgi->data = (Cookfs_PgIndexEntry *)ckrealloc(pgi->data, + sizeof(Cookfs_PgIndexEntry) * pgi->pagesAllocated); + + if (pgi->data == NULL) { + Tcl_Panic("Cookfs_PgIndexAddPage() failed to alloc"); + return -1; + } + + } + + Cookfs_PgIndexEntry *pge = pgi->data + pgi->pagesCount; + + pge->compression = compression; + pge->compressionLevel = compressionLevel; + pge->encryption = encryption; + pge->sizeCompressed = sizeCompressed; + pge->sizeUncompressed = sizeUncompressed; + + if (pgi->pagesCount == 0) { + pge->offset = 0; + } else { + // The actual offset will be calculated later when it is needed. + pge->offset = -1; + } + + memcpy(pge->hashMD5, hashMD5, 16); + + CookfsLog2(printf("return: ok - page#%d", pgi->pagesCount)); + + // Return the current value of pagesCount as the page index, and then + // increment the total number of pages. + return pgi->pagesCount++; + +} + +// 1 byte - compression +// 1 byte - compression level +// 1 byte - encryption +// 4 bytes - sizeCompressed +// 4 bytes - sizeUncompressed +// 16 bytes - hashMD5 +// Total: 27 bytes +#define COOKFS_PGINDEX_RECORD_SIZE 27 + +Cookfs_PgIndex *Cookfs_PgIndexImport(unsigned char *bytes, int size, + Tcl_Obj **err) +{ + + CookfsLog2(printf("import from buffer %p size %d", (void *)bytes, + size)); + + // We need at least 4 bytes in the buffer for number of page entries. + if (size < 4) { + CookfsLog2(printf("ERROR: the buffet size is less than 4 bytes")); + goto malformed; + } + + unsigned int pagesCount; + Cookfs_Binary2Int(bytes, (int *)&pagesCount, 1); + bytes += 4; + CookfsLog2(printf("total number of pages: %u", pagesCount)); + + // The buffer must have exactly 4 (pagesCount) + + // COOKFS_PGINDEX_RECORD_SIZE*pagesCount bytes. Otherwise, consider it + // malformed. + if ((unsigned int)size != (4 + pagesCount * COOKFS_PGINDEX_RECORD_SIZE)) { + CookfsLog2(printf("ERROR: not expected amount of bytes in buffer," + " expected: 4 + number_of_pages * %d = %d", + (int)COOKFS_PGINDEX_RECORD_SIZE, + (int)(4 + pagesCount * COOKFS_PGINDEX_RECORD_SIZE))); + goto malformed; + } + + goto not_malformed; + +malformed: + if (err != NULL) { + *err = Tcl_NewStringObj("pages entry index is malformed", -1); + } + return NULL; + +not_malformed: ; // empty statement + + Cookfs_PgIndex *pgi = Cookfs_PgIndexInit(pagesCount); + if (pgi == NULL) { + if (err != NULL) { + *err = Tcl_ObjPrintf("failed to alloc pages index with %u entries", + pagesCount); + } + return NULL; + } + + Cookfs_PgIndexEntry *pge = pgi->data; + Tcl_WideInt offset = 0; + for (unsigned int i = 0; i < pagesCount; i++, pge++) { + + pge->compression = bytes[0 + i]; + pge->compressionLevel = bytes[(1 * pagesCount) + i]; + pge->encryption = bytes[(2 * pagesCount) + i]; + Cookfs_Binary2Int(&bytes[(3 * pagesCount) + (i * 4)], &pge->sizeCompressed, 1); + Cookfs_Binary2Int(&bytes[(7 * pagesCount) + (i * 4)], &pge->sizeUncompressed, 1); + memcpy(pge->hashMD5, &bytes[(11 * pagesCount) + (i * 16)], 16); + + pge->offset = offset; + offset += pge->sizeCompressed; + + CookfsLog2(printf("import entry #%u - compression: %d, level: %d," + " encryption: %d, sizeCompressed: %d, sizeUncompressed: %d," + " MD5[" PRINTF_MD5_FORMAT "]", i, pge->compression, + pge->compressionLevel, pge->encryption, pge->sizeCompressed, + pge->sizeUncompressed, PRINTF_MD5_VAR(pge->hashMD5))); + + } + + CookfsLog2(printf("return: ok")); + + return pgi; + +} + +Cookfs_PageObj Cookfs_PgIndexExport(Cookfs_PgIndex *pgi) { + + unsigned int pagesCount = pgi->pagesCount; + + CookfsLog2(printf("enter, export %u page entries", pagesCount)); + + Cookfs_PageObj pgo = Cookfs_PageObjAlloc(4 + pagesCount * + COOKFS_PGINDEX_RECORD_SIZE); + if (pgo == NULL) { + Tcl_Panic("Cookfs_PgIndexExport(): could not alloc page object"); + } + + Cookfs_Int2Binary((int *)&pagesCount, pgo, 1); + + Cookfs_PgIndexEntry *pge = pgi->data; + for (unsigned int i = 0; i < pagesCount; i++, pge++) { + // The first 4 bytes in the buffer are for specifying the total number + // of pages. Thus, we need to add 4 for each offset. + pgo[4 + 0 + i] = pge->compression; + pgo[4 + (1 * pagesCount) + i] = pge->compressionLevel; + pgo[4 + (2 * pagesCount) + i] = pge->encryption; + Cookfs_Int2Binary(&pge->sizeCompressed, &pgo[4 + (3 * pagesCount) + (i * 4)], 1); + Cookfs_Int2Binary(&pge->sizeUncompressed, &pgo[4 + (7 * pagesCount) + (i * 4)], 1); + memcpy(&pgo[4 + (11 * pagesCount) + (i * 16)], pge->hashMD5, 16); + + CookfsLog2(printf("export entry #%u - compression: %d, level: %d," + " encryption: %d, sizeCompressed: %d, sizeUncompressed: %d," + " MD5[" PRINTF_MD5_FORMAT "]", i, pge->compression, + pge->compressionLevel, pge->encryption, pge->sizeCompressed, + pge->sizeUncompressed, PRINTF_MD5_VAR(pge->hashMD5))); + } + + CookfsLog2(printf("return: ok")); + + return pgo; + +} + +int Cookfs_PgIndexGetLength(Cookfs_PgIndex *pgi) { + return pgi->pagesCount; +} diff --git a/generic/pgindex.h b/generic/pgindex.h new file mode 100644 index 0000000..2a2fbd5 --- /dev/null +++ b/generic/pgindex.h @@ -0,0 +1,43 @@ +/* + (c) 2024 Konstantin Kushnir +*/ + +#ifndef COOKFS_PGINDEX_H +#define COOKFS_PGINDEX_H 1 + +#include "pageObj.h" + +typedef struct _Cookfs_PgIndex Cookfs_PgIndex; + +Cookfs_PgIndex *Cookfs_PgIndexInit(unsigned int initialPagesCount); +void Cookfs_PgIndexFini(Cookfs_PgIndex *pgi); + +int Cookfs_PgIndexAddPage(Cookfs_PgIndex *pgi, int compression, + int compressionLevel, int encryption, int sizeCompressed, + int sizeUncompressed, unsigned char *hashMD5); + +int Cookfs_PgIndexGetLength(Cookfs_PgIndex *pgi); + +int Cookfs_PgIndexSearchByMD5(Cookfs_PgIndex *pgi, unsigned char *hashMD5, + int sizeUncompressed, int *index); + +Tcl_WideInt Cookfs_PgIndexGetEndOffset(Cookfs_PgIndex *pgi, int num); +Tcl_WideInt Cookfs_PgIndexGetStartOffset(Cookfs_PgIndex *pgi, int num); + +void Cookfs_PgIndexSetCompression(Cookfs_PgIndex *pgi, int num, + int compression, int compressionLevel); +void Cookfs_PgIndexSetSizeCompressed(Cookfs_PgIndex *pgi, int num, + int sizeCompressed); + +int Cookfs_PgIndexGetSizeUncompressed(Cookfs_PgIndex *pgi, int num); +int Cookfs_PgIndexGetSizeCompressed(Cookfs_PgIndex *pgi, int num); +int Cookfs_PgIndexGetCompressionLevel(Cookfs_PgIndex *pgi, int num); +int Cookfs_PgIndexGetCompression(Cookfs_PgIndex *pgi, int num); +unsigned char *Cookfs_PgIndexGetHashMD5(Cookfs_PgIndex *pgi, int num); + +Cookfs_PgIndex *Cookfs_PgIndexImport(unsigned char *bytes, int size, + Tcl_Obj **err); + +Cookfs_PageObj Cookfs_PgIndexExport(Cookfs_PgIndex *pgi); + +#endif /* COOKFS_PGINDEX_H */ diff --git a/generic/vfsCmd.c b/generic/vfsCmd.c index 228454c..bbc8910 100644 --- a/generic/vfsCmd.c +++ b/generic/vfsCmd.c @@ -440,20 +440,22 @@ int Cookfs_Mount(Tcl_Interp *interp, Tcl_Obj *archive, Tcl_Obj *local, #ifdef COOKFS_USETCLCMDS if (props->pagesobject == NULL) { #endif - int oCompression; - int oCompressionLevel; - if (Cookfs_CompressionFromObj(interp, props->compression, - &oCompression, &oCompressionLevel) != TCL_OK) - { - return TCL_ERROR; + int oCompression = -1; + int oCompressionLevel = -1; + if (props->compression != NULL) { + if (Cookfs_CompressionFromObj(interp, props->compression, + &oCompression, &oCompressionLevel) != TCL_OK) + { + return TCL_ERROR; + } } CookfsLog(printf("Cookfs_Mount: creating the pages object")); // TODO: pass a pointer to err variable instead of NULL and // handle the corresponding error message pages = Cookfs_PagesInit(interp, archiveActual, props->readonly, - oCompression, oCompressionLevel, NULL, - (props->endoffset == -1 ? 0 : 1), props->endoffset, 0, + oCompression, oCompressionLevel, oCompression, oCompressionLevel, + NULL, (props->endoffset == -1 ? 0 : 1), props->endoffset, 0, props->asyncdecompressqueuesize, props->compresscommand, props->decompresscommand, props->asynccompresscommand, props->asyncdecompresscommand, diff --git a/scripts/pages.tcl b/scripts/pages.tcl index 45804d3..baabddc 100644 --- a/scripts/pages.tcl +++ b/scripts/pages.tcl @@ -24,7 +24,8 @@ proc cookfs::tcl::pages {args} { firstwrite 0 readonly 0 cachelist {} - compression zlib + compression {} + base_compression {} compresscommand "" asynccompresscommand "" asyncdecompresscommand "" @@ -32,8 +33,8 @@ proc cookfs::tcl::pages {args} { cachesize 8 indexdata "" endoffset "" - cfsname "CFS0002" - cfsstamp "CFSS002" + cfsname "CFS0003" + cfsstamp "CFSS003" lastop read hash crc32 alwayscompress 0 @@ -95,15 +96,19 @@ proc cookfs::tcl::pages {args} { } } - if {[catch { - set c(cid) [pages::compression2cid $c(compression)] - } err]} { - error $err $err - } if {[llength $args] != 1} { error "No filename provided" } + if { $c(compression) ne "" } { + if {[catch { + set c(cid) [pages::compression2cid $c(compression)] + set c(cid_base) $c(cid) + } err]} { + error $err $err + } + } + if {[catch { if {$c(readonly)} { set c(fh) [open [lindex $args 0] r] @@ -144,10 +149,11 @@ proc cookfs::tcl::pages {args} { seek $c(fh) $c(endoffset) start } else { set c(endoffset) [tell $c(fh)] + set msg "$pages::errorMessage: signature not found" } } - if {[pages::readIndex $name msg]} { + if {![info exists msg] && [pages::readIndex $name msg]} { set c(haschanged) 0 set c(indexChanged) 0 } else { @@ -168,6 +174,18 @@ proc cookfs::tcl::pages {args} { set c(indexChanged) 1 set c(idx.md5list) {} set c(idx.sizelist) {} + set c(idx.comprlist) {} + set c(idx.comprlevellist) {} + set c(idx.encryptionlist) {} + set c(idx.sizeoriglist) {} + } + + if { ![info exists c(cid_base)] } { + set c(cid_base) [pages::compression2cid "zlib"] + } + + if { ![info exists c(cid)] } { + set c(cid) $c(cid_base) } interp alias {} $name {} ::cookfs::tcl::pages::handle $name @@ -197,27 +215,32 @@ proc cookfs::tcl::pages::crc32 { v } { proc cookfs::tcl::pages::compress {name origdata} { upvar #0 $name c + + set cid $c(cid) + set level 0 + if {[string length $origdata] == 0} { - return "" + return [list $cid $level ""] } - if {$c(cid) == 1} { + + if {$cid == 1} { if {$c(_usezlib)} { - set data "\u0001[zlib deflate $origdata]" + set data [zlib deflate $origdata] } else { - set data "\u0001[vfs::zip -mode compress -nowrap 1 $origdata]" + set data [vfs::zip -mode compress -nowrap 1 $origdata] } - } elseif {$c(cid) == 2} { + } elseif {$cid == 2} { package require Trf - set data "\u0002[binary format I [string length $origdata]][bz2 -mode compress $origdata]" - } elseif {$c(cid) == 3} { + set data [bz2 -mode compress $origdata] + } elseif {$cid == 3} { error "Lzma compression is not supported by Tcl pages" - } elseif {$c(cid) == 4} { + } elseif {$cid == 4} { error "zstd compression is not supported by Tcl pages" - } elseif {$c(cid) == 5} { + } elseif {$cid == 5} { error "brotli compression is not supported by Tcl pages" - } elseif {$c(cid) == 255} { + } elseif {$cid == 255} { if {$c(compresscommand) != ""} { - set data "\u00ff[uplevel #0 [concat $c(compresscommand) [list $origdata]]]" + set data [uplevel #0 [concat $c(compresscommand) [list $origdata]]] } else { error "No compresscommand specified" } @@ -226,19 +249,17 @@ proc cookfs::tcl::pages::compress {name origdata} { # if compression algorithm was not matched or # we should not always compress and compressed data is not smaller, revert to uncompressed data if {(![info exists data]) || ((!$c(alwayscompress)) && ([string length $data] > [string length $origdata]))} { - set data "\u0000$origdata" + set data $origdata + set cid 0 } - return $data + return [list $cid $level $data] } -proc cookfs::tcl::pages::decompress {name data} { +proc cookfs::tcl::pages::decompress {name cid sizeUncompress data} { upvar #0 $name c if {[string length $data] == 0} { return "" } - if {[binary scan $data ca* cid data] != 2} { - error "Unable to decompress page" - } switch -- $cid { 0 { return $data @@ -252,7 +273,7 @@ proc cookfs::tcl::pages::decompress {name data} { } 2 { package require Trf - return [bz2 -mode decompress [string range $data 4 end]] + return [bz2 -mode decompress $data] } 255 - -1 { if {$c(decompresscommand) == ""} { @@ -334,57 +355,114 @@ proc cookfs::tcl::pages::cid2compression {name} { } } +proc cookfs::tcl::pages::unsign_list { list } { + set result [list] + foreach x $list { + lappend result [expr { $x & 0xff }] + } + return $result +} + proc cookfs::tcl::pages::readIndex {name msgVariable} { variable errorMessage upvar #0 $name c upvar 1 $msgVariable msg + set suffix_size 62 if {[catch { - seek $c(fh) [expr {$c(endoffset) - 17}] start - set fc [read $c(fh) 17] + seek $c(fh) [expr {$c(endoffset) - $suffix_size}] start + set fc [read $c(fh) $suffix_size] }]} { set msg "$errorMessage: index not found" return 0 } - if {[string length $fc] != 17} { + if {[string length $fc] != $suffix_size} { set msg "$errorMessage: unable to read index suffix" return 0 } - if {[string range $fc 10 16] != "$c(cfsname)"} { - set msg "$errorMessage: invalid file signature" + + binary scan $fc cccccH32IIccH32IIa* \ + base_compression \ + base_compressionLevel \ + encryption \ + pgindex_compression \ + pgindex_compressionLevel \ + pgindex_md5 \ + pgindex_sizeCompressed \ + pgindex_sizeUncompressed \ + fsindex_compression \ + fsindex_compressionLevel \ + fsindex_md5 \ + fsindex_sizeCompressed \ + fsindex_sizeUncompressed \ + signature + + set pgindex_md5 [string toupper $pgindex_md5] + set fsindex_md5 [string toupper $fsindex_md5] + + if {$signature ne $c(cfsname)} { + set msg "$errorMessage: signature not found" return 0 } - binary scan [string range $fc 0 7] II idxsize numpages - set idxoffset [expr {$c(endoffset) - (17 + $idxsize + ($numpages * 20))}] - set c(indexoffset) $idxoffset - - if {$idxoffset < 0} { - set msg "$errorMessage: page sizes not found" + if { ($pgindex_sizeCompressed + $fsindex_sizeCompressed + $suffix_size) > $c(endoffset) } { + set msg "$errorMessage: failed to read index" return 0 } - seek $c(fh) $idxoffset start - set md5data [read $c(fh) [expr {$numpages * 16}]] - set sizedata [read $c(fh) [expr {$numpages * 4}]] + if { $pgindex_sizeCompressed + $fsindex_sizeCompressed > 0 } { + seek $c(fh) [expr {$c(endoffset) - $suffix_size - $pgindex_sizeCompressed - $fsindex_sizeCompressed}] start + } + + set c(idx.md5list) {} + set c(idx.sizelist) {} + set c(idx.comprlist) {} + set c(idx.comprlevellist) {} + set c(idx.sizeoriglist) {} + set c(idx.encryptionlist) {} + + if { $pgindex_sizeCompressed > 0 } { + + set fc [read $c(fh) $pgindex_sizeCompressed] + set fc [decompress $name $pgindex_compression $pgindex_sizeUncompressed $fc] + + binary scan [string range $fc 0 3] I numpages - set idx [read $c(fh) $idxsize] + binary scan [string range $fc 4 end] "c${numpages}c${numpages}c${numpages}I${numpages}I${numpages}a*" \ + c(idx.comprlist) c(idx.comprlevellist) c(idx.encryptionlist) \ + c(idx.sizelist) c(idx.sizeoriglist) md5data - set c(indexdata) [decompress $name $idx] + set c(idx.comprlist) [unsign_list $c(idx.comprlist)] + set c(idx.comprlevellist) [unsign_list $c(idx.comprlevellist)] + set c(idx.encryptionlist) [unsign_list $c(idx.encryptionlist)] - set c(idx.md5list) [list] - set c(idx.sizelist) [list] + binary scan $md5data H* md5hex + set md5hex [string toupper $md5hex] + for {set i 0} {$i < $numpages} {incr i} { + lappend c(idx.md5list) [string range $md5hex 0 31] + set md5hex [string range $md5hex 32 end] + } - binary scan $md5data H* md5hex - set md5hex [string toupper $md5hex] - for {set i 0} {$i < $numpages} {incr i} { - lappend c(idx.md5list) [string range $md5hex 0 31] - set md5hex [string range $md5hex 32 end] } - binary scan $sizedata I* c(idx.sizelist) + + if { $fsindex_sizeCompressed > 0 } { + + set fc [read $c(fh) $fsindex_sizeCompressed] + set c(indexdata) [decompress $name $fsindex_compression $fsindex_sizeUncompressed $fc] + + } else { + set c(indexdata) {} + } + + set idxoffset [expr {$c(endoffset) - $suffix_size - $pgindex_sizeCompressed - $fsindex_sizeCompressed}] + set c(indexoffset) $idxoffset + foreach s $c(idx.sizelist) { incr idxoffset -$s } set c(startoffset) $idxoffset + + set c(cid_base) $base_compression + return 1 } @@ -431,9 +509,6 @@ proc cookfs::tcl::pages::addstamp {name size} { proc cookfs::tcl::pages::pagewrite {name contents} { upvar #0 $name c - - binary scan $contents H* hd - if {!$c(haschanged)} { seek $c(fh) $c(indexoffset) start } else { @@ -449,9 +524,8 @@ proc cookfs::tcl::pages::pagewrite {name contents} { set c(haschanged) 1 } -proc cookfs::tcl::pages::pageAdd {name contents} { +proc cookfs::tcl::pages::getHash {name contents} { upvar #0 $name c - if {$c(hash) == "crc32"} { set md5 [string toupper [format %08x%08x%08x%08x \ 0 0 [string length $contents] [crc32 $contents] \ @@ -459,6 +533,13 @@ proc cookfs::tcl::pages::pageAdd {name contents} { } else { set md5 [string toupper [md5::md5 -hex $contents]] } + return $md5 +} + +proc cookfs::tcl::pages::pageAdd {name contents} { + upvar #0 $name c + + set md5 [getHash $name $contents] set idx 0 foreach imd5 $c(idx.md5list) { @@ -472,6 +553,8 @@ proc cookfs::tcl::pages::pageAdd {name contents} { set idx [llength $c(idx.sizelist)] lappend c(idx.md5list) $md5 + lappend c(idx.sizeoriglist) [string length $contents] + lappend c(idx.encryptionlist) 0 if {($c(cid) == 255) && ([string length $c(asynccompresscommand)] != 0)} { lappend c(idx.sizelist) -1 @@ -479,8 +562,10 @@ proc cookfs::tcl::pages::pageAdd {name contents} { } else { # ensure no writes are in progress while {[asyncCompressWait $name true]} {} - set contents [compress $name $contents] + lassign [compress $name $contents] cid level contents lappend c(idx.sizelist) [string length $contents] + lappend c(idx.comprlist) $cid + lappend c(idx.comprlevellist) $level pagewrite $name $contents } @@ -501,13 +586,81 @@ proc cookfs::tcl::pages::cleanup {name} { incr offset $i } seek $c(fh) $offset start - # write MD5 indexes - puts -nonewline $c(fh) [binary format H* [join $c(idx.md5list) ""]] - # write size indexes - puts -nonewline $c(fh) [binary format I* $c(idx.sizelist)] - set idx [compress $name $c(indexdata)] - puts -nonewline $c(fh) $idx - puts -nonewline $c(fh) [binary format IIcca* [string length $idx] [llength $c(idx.sizelist)] [compression2cid $c(compression)] 255 $c(cfsname)] + + set base_compression $c(cid) + set base_compressionLevel 0 + set encryption 0 + + # Make sure we use base compression and compression level for + # pgindex/fsindex data + set c(cid) $c(cid_base) + + if { [llength $c(idx.sizelist)] } { + + set pgindex "" + append pgindex [binary format I [llength $c(idx.sizelist)]] + append pgindex [binary format c* $c(idx.comprlist)] + append pgindex [binary format c* $c(idx.comprlevellist)] + append pgindex [binary format c* $c(idx.encryptionlist)] + append pgindex [binary format I* $c(idx.sizelist)] + append pgindex [binary format I* $c(idx.sizeoriglist)] + append pgindex [binary format H* [join $c(idx.md5list) ""]] + + set pgindex_sizeUncompressed [string length $pgindex] + + set pgindex_md5 [getHash $name $pgindex] + + lassign [compress $name $pgindex] pgindex_compression pgindex_compressionLevel pgindex + + set pgindex_sizeCompressed [string length $pgindex] + + puts -nonewline $c(fh) $pgindex + + } else { + set pgindex_compression 0 + set pgindex_compressionLevel 0 + set pgindex_sizeUncompressed 0 + set pgindex_sizeCompressed 0 + set pgindex_md5 [string repeat "0" 32] + } + + set fsindex_sizeUncompressed [string length $c(indexdata)] + + if { $fsindex_sizeUncompressed } { + + set fsindex_md5 [getHash $name $c(indexdata)] + + lassign [compress $name $c(indexdata)] fsindex_compression fsindex_compressionLevel fsindex + + set fsindex_sizeCompressed [string length $fsindex] + + puts -nonewline $c(fh) $fsindex + + } else { + set fsindex_compression 0 + set fsindex_compressionLevel 0 + set fsindex_sizeCompressed 0 + set fsindex_md5 [string repeat "0" 32] + } + + set suffix [binary format cccccH32IIccH32IIa* \ + $base_compression \ + $base_compressionLevel \ + $encryption \ + $pgindex_compression \ + $pgindex_compressionLevel \ + $pgindex_md5 \ + $pgindex_sizeCompressed \ + $pgindex_sizeUncompressed \ + $fsindex_compression \ + $fsindex_compressionLevel \ + $fsindex_md5 \ + $fsindex_sizeCompressed \ + $fsindex_sizeUncompressed \ + $c(cfsname)] + + puts -nonewline $c(fh) $suffix + set eo [tell $c(fh)] if {$eo < $c(endoffset)} { catch {chan truncate $c(fh)} @@ -683,11 +836,12 @@ proc cookfs::tcl::pages::tickTock {name} { } proc cookfs::tcl::pages::pageGet {name idx weight} { + upvar #0 $name c asyncPreload $name [expr {$idx + 1}] if {![pageGetStored $name $idx $weight fc] && ![cacheGet $name $idx $weight fc]} { set fc [pageGetData $name $idx] - set fc [decompress $name $fc] + set fc [decompress $name [lindex $c(idx.comprlist) $idx] [lindex $c(idx.sizeoriglist) $idx] $fc] cacheAdd $name $idx $weight $fc } @@ -718,12 +872,15 @@ proc cookfs::tcl::pages::asyncCompressWait {name require} { error "asyncCompressWait returned $idx, expecting [lindex $c(asyncwrites) 0]" } set origContents $c(asyncwrites,$idx) + set cid $c(cid) + set level 0 if {((!$c(alwayscompress)) && ([string length $contents] > [string length $origContents]))} { - set contents \u0000${origContents} - } else { - set contents [binary format c $c(cid)]$contents + set contents $origContents + set cid 0 } lset c(idx.sizelist) $idx [string length $contents] + lset c(idx.comprlist) $idx $cid + lset c(idx.comprlevellist) $idx $level pagewrite $name $contents unset c(asyncwrites,$idx) set c(asyncwrites) [lrange $c(asyncwrites) 1 end] @@ -757,13 +914,14 @@ proc cookfs::tcl::pages::asyncPreload {name idx} { for {set i $idx} {$i < $maxIdx} {incr i} { if {![pageGetStored $name $i 1000 - false] && ([lsearch $c(asyncpreloadBusy) $i] < 0)} { set fc [pageGetData $name $i] - # validate compression type and remove it before passing for processing - if {[string index $fc 0] == "\u00ff"} { + # validate compression type + if {[lindex $c(idx.comprlist) $idx] == 255} { #puts "PRELOAD $i" while {[asyncDecompressWait $name -1 false]} {} - set fc [string range $fc 1 end] uplevel #0 [concat $c(asyncdecompresscommand) [list process $i $fc]] lappend c(asyncpreloadBusy) $i + } else { + #puts "WARNING! not custom compression for page #$idx - \"[lindex $c(idx.comprlist) $idx]\"" } } } diff --git a/tests/compression.test b/tests/compression.test index 34a3381..f3ce37e 100644 --- a/tests/compression.test +++ b/tests/compression.test @@ -45,11 +45,11 @@ foreach { cid } { none zlib bz2 lzma zstd brotli } { set pg [cookfs::pages $file] lappend ok [$pg length] # the page '0' contains the 'test1' file only and it must be uncompressed - # i.e. its size should equal to 3 ( 1 (compression method) + 1 ("X") + 1 (newline) ) + # i.e. its size should equal to 2 ( 1 byte ("X") + 1 byte (newline) ) set page_size [expr { [$pg filesize] - [$pg dataoffset 0] }] $pg delete lappend ok $page_size - } -result {1 1 3} -cleanup { + } -result {1 1 2} -cleanup { catch { cookfs::Unmount $file } catch { $pg delete } file delete -force $file $dir @@ -80,11 +80,11 @@ foreach { cid } { none zlib bz2 lzma zstd brotli } { set pg [cookfs::pages $file] lappend ok [$pg length] # the page '0' contains the 'test1' file only and it must be uncompressed - # i.e. its size should equal to 129 ( 1 (compression method) + 128 (data) ) + # i.e. its size should equal to 128 set page_size [expr { [$pg filesize] - [$pg dataoffset 0] }] $pg delete lappend ok $page_size - } -result {1 1 129} -cleanup { + } -result {1 1 128} -cleanup { catch { cookfs::Unmount $file } catch { $pg delete } file delete -force $file $dir @@ -106,13 +106,13 @@ foreach { cid } { none zlib bz2 lzma zstd brotli } { set pg [cookfs::pages $file] lappend ok [$pg length] # the page '0' contains the 'test1' file only and it must be compressed (if method is not "none") - # i.e. its size should be less than 1026 ( 1 (compression method) + 1024 ("XXX...") + 1 (newline) ) + # i.e. its size should be less than 1025 ( 1024 ("XXX...") + 1 (newline) ) set page_size [expr { [$pg filesize] - [$pg dataoffset 0] }] $pg delete if { $cid eq "none" } { - if { $page_size == 1026 } { unset page_size } + if { $page_size == 1025 } { unset page_size } } { - if { $page_size < 1026 } { unset page_size } + if { $page_size < 1025 } { unset page_size } } if { [info exists page_size] } { lappend ok "unexpected page size: $page_size" @@ -194,12 +194,12 @@ foreach { cid } { none zlib bz2 lzma zstd brotli } { } } set expected [dict get { - none {1310722 1310722 1310722 1310722 1310722} - zlib {335801 395825 365353 342150 330139} - zstd {330325 357958 330325 319779 305182} - brotli {298922 400110 336878 301264 295253} - bz2 {275045 293163 278827 280154 275046} - lzma {274791 311432 303553 274791 272878} + none {1310721 1310721 1310721 1310721 1310721} + zlib {335800 395824 365352 342149 330138} + zstd {330320 357953 330320 319774 305177} + brotli {298917 400105 336873 301259 295248} + bz2 {275040 293158 278822 280149 275041} + lzma {274786 311427 303548 274786 272873} } $cid] if { $expected eq $result } { set result ok @@ -235,12 +235,12 @@ foreach { cid } { none zlib bz2 lzma zstd brotli } { } } set expected [dict get { - none {1310722 1310722} - zlib {335801 342150} - zstd {330325 319779} - brotli {298922 301264} - bz2 {275045 280154} - lzma {274791 274791} + none {1310721 1310721} + zlib {335800 342149} + zstd {330320 319774} + brotli {298917 301259} + bz2 {275040 280149} + lzma {274786 274786} } $cid] if { $expected eq $result } { set result ok diff --git a/tests/crypto.test b/tests/crypto.test index 2f02d97..75c8150 100644 --- a/tests/crypto.test +++ b/tests/crypto.test @@ -28,7 +28,7 @@ tcltest::test cookfsCrypto-1.4 "Test crypto::rng, get 8 random bytes and ensure } set result } -cleanup { - unset -nocomplain rng + unset -nocomplain rng result } -result [list 8] tcltest::test cookfsCrypto-1.5 "Test crypto::rng, get 65 random bytes and ensure they are not 0" -constraints {cookfsCrypto enabledTclCmds} -body { @@ -40,7 +40,7 @@ tcltest::test cookfsCrypto-1.5 "Test crypto::rng, get 65 random bytes and ensure } set result } -cleanup { - unset -nocomplain rng + unset -nocomplain rng result } -result [list 65] tcltest::test cookfsCrypto-2.1.1 "Test crypto::pbkdf2_hmac without arguments" -constraints {cookfsCrypto enabledTclCmds} -body { @@ -335,7 +335,7 @@ tcltest::test cookfsCrypto-5.3 "Test crypto::aes_* with known test vectors #3" - lappend result $count join $result \n } -cleanup { - unset -nocomplain result count t_num t_key t_data t_cipher + unset -nocomplain result count t_num t_iv t_key t_data t_cipher } -result 4 # Tests from: diff --git a/tests/pages.test b/tests/pages.test index ba9e771..5657b8c 100644 --- a/tests/pages.test +++ b/tests/pages.test @@ -72,7 +72,7 @@ tcltest::test cookfsPages-1.5 "Check dataoffset command reporting properly for e $pg delete file delete -force $file unset -nocomplain file pg fh -} -returnCodes {ok} -result 15,15,20 +} -returnCodes {ok} -result 15,15,19 tcltest::test cookfsPages-1.6 "Check dataoffset command reporting properly for files with prefix" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -92,7 +92,7 @@ tcltest::test cookfsPages-1.6 "Check dataoffset command reporting properly for f $pg delete file delete -force $file unset -nocomplain file pg fh -} -returnCodes {ok} -result 4111,4111,4116 +} -returnCodes {ok} -result 4111,4111,4115 tcltest::test cookfsPages-1.7 "Check filesize command reporting properly for empty archives" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -108,7 +108,7 @@ tcltest::test cookfsPages-1.7 "Check filesize command reporting properly for emp file delete -force $file unset -nocomplain file pg fh # -result is 4108 - 4096+1 byte for page prefix+15 bytes for stamp -} -returnCodes {ok} -result 4112 +} -returnCodes {ok} -result 4111 tcltest::test cookfsPages-1.8 "Check filesize command reporting properly for files with prefix" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -125,8 +125,8 @@ tcltest::test cookfsPages-1.8 "Check filesize command reporting properly for fil $pg delete file delete -force $file unset -nocomplain file pg fh - # -result is 8208 - 4096+4096+1 byte for page prefix+15 bytes for stamp -} -returnCodes {ok} -result 8208 + # -result is 8207 - 4096+4096+15 bytes for stamp +} -returnCodes {ok} -result 8207 tcltest::test cookfsPages-2.1 "Test that different pages get different indexes" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -919,6 +919,9 @@ tcltest::test cookfsPages-12.1 "Test changing page compression" -constraints {en $pg compression none + $pg add [string repeat "FOOBAR" 1024] + + # index should be stored with initial compression, i.e. zlib $pg index [string repeat "INDX" 1024] $pg delete @@ -927,12 +930,19 @@ tcltest::test cookfsPages-12.1 "Test changing page compression" -constraints {en set fc [read $fh] close $fh + # ensure we don't have "TEST" with compression "none" if {[string first [string repeat "TEST" 1024] $fc] >= 0} { - error "Pages not compressed properly" + error "TEST pages not compressed properly" + } + + # ensure we don't have index with compression "none" + if {[string first [string repeat "INDX" 1024] $fc] >= 0} { + error "Index not compressed properly" } - if {[string first [string repeat "INDX" 1024] $fc] < 0} { - error "Index compressed incorrectly" + # ensure we don't have index with compression "none" + if {[string first [string repeat "FOOBAR" 1024] $fc] < 0} { + error "FOOBAR pages compressed incorrectly" } set pg [cookfs::pages $file] @@ -977,7 +987,7 @@ tcltest::test cookfsPages-13.1 "Error message when index not found" -constraints catch {$pg delete} file delete -force $file unset -nocomplain file pg -} -returnCodes {error} -match glob -result {Unable to create Cookfs object: index not found} +} -returnCodes {error} -match glob -result {Unable to create Cookfs object: signature not found} tcltest::test cookfsPages-13.2.1 "Error message when invalid signature found" -constraints {enabledTclCmds} -setup { set file [tcltest::makeBinFile {} pages.cfs] @@ -992,7 +1002,7 @@ tcltest::test cookfsPages-13.2.1 "Error message when invalid signature found" -c set fh [open $file w] fconfigure $fh -translation binary - puts -nonewline $fh [string map [list CFS0002 CFSTEST] $fc] + puts -nonewline $fh [string map [list CFS0003 CFSTEST] $fc] close $fh } -body { set pg [cookfs::pages -readonly $file] @@ -1000,7 +1010,7 @@ tcltest::test cookfsPages-13.2.1 "Error message when invalid signature found" -c catch {$pg delete} file delete -force $file unset -nocomplain file pg fh fc -} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 181 bytes or larger.} +} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 236 bytes or larger.} tcltest::test cookfsPages-13.2.2 "Error message for truncated file with 100 bytes before archive" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -1021,7 +1031,7 @@ tcltest::test cookfsPages-13.2.2 "Error message for truncated file with 100 byte catch {$pg delete} file delete -force $file unset -nocomplain file pg fh fp -} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 1177 bytes or larger.} +} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 1232 bytes or larger.} tcltest::test cookfsPages-13.2.3 "Error message for truncated file with 9MB bytes before archive" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -1042,7 +1052,7 @@ tcltest::test cookfsPages-13.2.3 "Error message for truncated file with 9MB byte catch {$pg delete} file delete -force $file unset -nocomplain file pg fp fh -} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 9438261 bytes or larger.} +} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 9438316 bytes or larger.} # We are looking for no more than 10 mb, so this test should not find the stamp # and give a generic "invalid file signature" error message. @@ -1065,7 +1075,7 @@ tcltest::test cookfsPages-13.2.4 "Error message for truncated file with 10MB byt catch {$pg delete} file delete -force $file unset -nocomplain file pg fp fh -} -returnCodes {error} -match glob -result {Unable to create Cookfs object: invalid file signature} +} -returnCodes {error} -match glob -result {Unable to create Cookfs object: signature not found} tcltest::test cookfsPages-13.2.5 "Error message for truncated file with 1024 bytes before archive, without pages and updated fsindex" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -1086,7 +1096,7 @@ tcltest::test cookfsPages-13.2.5 "Error message for truncated file with 1024 byt catch {$pg delete} file delete -force $file unset -nocomplain file pg fp fh -} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 2081 bytes or larger.} +} -returnCodes {error} -match glob -result {The archive "*" appears to be corrupted or truncated. Expected archive size is 2125 bytes or larger.} tcltest::test cookfsPages-13.3 "Error message when unable to seek to pages" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] @@ -1114,7 +1124,7 @@ tcltest::test cookfsPages-13.3 "Error message when unable to seek to pages" -con catch {$pg delete} file delete -force $file unset -nocomplain file pg size fh fc -} -returnCodes {error} -match glob -result {Unable to create Cookfs object: page sizes not found} +} -returnCodes {error} -match glob -result {Unable to create Cookfs object: failed to read index} tcltest::test cookfsPages-13.4 "Error message when specifying invalid -endoffset" -constraints {enabledTclCmds} -setup { set file [tcltest::makeFile {} pages.cfs] diff --git a/tests/pagesAsync.test b/tests/pagesAsync.test index 5b651d8..15f8fb7 100644 --- a/tests/pagesAsync.test +++ b/tests/pagesAsync.test @@ -91,7 +91,7 @@ tcltest::test cookfsPagesAsync-1.6 "Verify multiple async pages can be read" -co -asynccompresscommand testasynccompress \ -compresscommand testcompress -decompresscommand testdecompress $file] set pages {} - for {set i 0} {$i <1024} {incr i} { + for {set i 0} {$i < 1024} {incr i} { set c [string repeat [format "TESTDATA%04d" $i] 4096] lappend pages [$pg add $c] $c } diff --git a/tests/vfs.test b/tests/vfs.test index c7c4cd9..63bb337 100644 --- a/tests/vfs.test +++ b/tests/vfs.test @@ -36,7 +36,7 @@ tcltest::test cookfsVfs-1.3 "Mount non-existant file as read-only" -setup { catch {cookfs::Unmount $file} tcltest::removeFile $file unset -nocomplain file -} -returnCodes {error} -result {Unable to create Cookfs object: index not found} +} -returnCodes {error} -result {Unable to create Cookfs object: signature not found} tcltest::test cookfsVfs-1.4 "Remounting archive" -setup { set file [tcltest::makeFile {} cookfs.cfs] @@ -536,7 +536,7 @@ tcltest::test cookfsVfs-7.1 "Test filesize command" -setup { catch {cookfs::Unmount $file} tcltest::removeFile $file unset -nocomplain file fh fsid -} -returnCodes {ok} -result 69649 +} -returnCodes {ok} -result 69647 tcltest::test cookfsVfs-7.2 "Test smallfilebuffersize command" -setup { set file [tcltest::makeFile {} pages.cfs] @@ -701,8 +701,10 @@ tcltest::test cookfsVfs-10.1 "Test changing compression without remounting" -con set fh [open $file w] close $fh } -body { - set fsid [cookfs::Mount $file $file -compression zlib -smallfilesize 32768 \ + set fsid [cookfs::Mount $file $file -compression custom -smallfilesize 32768 \ -compresscommand testcompressraw -decompresscommand testdecompressraw -alwayscompress] + + $fsid compression zlib if {[$fsid compression] != "zlib"} { error "Getting compression does not return \"zlib\"" } @@ -1515,13 +1517,19 @@ tcltest::test cookfsVfs-19.6.2 "Check unmount in child interp after interp died" # This test is related to cookfsVfs-20.1.2. If something needs to be changed here, # the appropriate changes should be added to cookfsVfs-20.1.2. tcltest::test cookfsVfs-20.1.1 "Don't crash when stamp is not present" -setup { - set data "" - set index "\u0000CFS2.200" - # 0 file - append index [binary format I 0] - append data $index - # + index size + page count (0) + compression id + compression level + signature - append data [binary format IIcca* [string length $index] 0 0 0 CFS0002] + # base compression + base compression level + encryption + set data [binary format ccc 0 0 0] + # pgindex + # compression + compression level + 16 bytes hash + compressed size + uncompressed size + append data [binary format ccc16II 0 0 \ + {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + 0 0] + # fsindex + # compression + compression level + 16 bytes hash + compressed size + uncompressed size + append data [binary format ccc16II 0 0 \ + {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + 0 0] + append data "CFS0003" set file [tcltest::makeBinFile $data pages.cfs] } -body { cookfs::Mount $file $file -readonly @@ -1536,13 +1544,19 @@ tcltest::test cookfsVfs-20.1.1 "Don't crash when stamp is not present" -setup { # This test differs from the previous one in that there is no byte representing # the compression level in the tail of the archive tcltest::test cookfsVfs-20.1.2 "Don't crash when stamp is not present and archive suffix is malformed" -setup { - set data "" - set index "\u0000CFS2.200" - # 0 file - append index [binary format I 0] - append data $index - # + index size + page count (0) + compression id + compression level + signature - append data [binary format IIca* [string length $index] 0 0 CFS0002] + # base compression + encryption + set data [binary format cc 0 0] + # pgindex + # compression + compression level + 16 bytes hash + compressed size + uncompressed size + append data [binary format ccc16II 0 0 \ + {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + 0 0] + # fsindex + # compression + compression level + 16 bytes hash + compressed size + uncompressed size + append data [binary format ccc16II 0 0 \ + {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \ + 0 0] + append data "CFS0003" set file [tcltest::makeBinFile $data pages.cfs] } -body { cookfs::Mount $file $file -readonly @@ -1552,5 +1566,5 @@ tcltest::test cookfsVfs-20.1.2 "Don't crash when stamp is not present and archiv catch { cookfs::Unmount $file } tcltest::removeFile $file unset -nocomplain data index file ok -} -returnCodes error -result {Unable to create Cookfs object: page sizes not found} +} -returnCodes error -result {Unable to create Cookfs object: index not found}