Skip to content

Commit

Permalink
Refactor storage format (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
chpock authored Jul 20, 2024
1 parent f535dc3 commit ce56e9e
Show file tree
Hide file tree
Showing 37 changed files with 2,451 additions and 1,831 deletions.
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2024-07-20 Konstantin Kushnir <[email protected]>
* Refactor storage format

2024-07-08 Konstantin Kushnir <[email protected]>
* Add procedures for AES encryption/decryption
* Rename crypt->crypto
Expand Down
16 changes: 16 additions & 0 deletions TODO.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
* Add support for encrypted/password protected files
* Consider mapping archive to memory to improve performance on readonly VFS
* Add ability to disable custom compression to reduce size footprint when it is not needed
* Add ability to disable async compression to reduce size footprint when it is not needed
* Add MD5 hash validation on load for pgindex data
* Add MD5 hash validation on load for fsindex data
* Add MD5 hash validation on load for all pages data
* Update bzip2 and use it as a submodule. Perhaps this will get rid of its compile-time warnings.

== Examples

* Add simple example of tclsh with cookfs. Compare with tclsh9+zipfs (size and
loading speed). This example will cover cookfs usage from C side.

* Add an example of a Tcl archiver application that uses the cookfs format.
It will be like any other archiver and will support file
compression/decompression. This example will cover cookfs usage from Tcl side.

2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -8754,7 +8754,7 @@ if test ${USECPAGES} = yes; then
COOKFS_PKGCONFIG_USECPAGES=1
COOKFS_PKGCONFIG_FEATURE_ASIDE=1

vars="pageObj.c pages.c pagesCompr.c pagesComprZlib.c pagesCmd.c"
vars="pgindex.c pageObj.c pages.c pagesCompr.c pagesAsync.c pagesComprZlib.c pagesComprCustom.c pagesCmd.c"
for i in $vars; do
case $i in
\$*)
Expand Down
2 changes: 1 addition & 1 deletion configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ if test ${USECPAGES} = yes; then
AC_DEFINE(COOKFS_USECPAGES)
COOKFS_PKGCONFIG_USECPAGES=1
COOKFS_PKGCONFIG_FEATURE_ASIDE=1
TEA_ADD_SOURCES([pageObj.c pages.c pagesCompr.c pagesComprZlib.c pagesCmd.c])
TEA_ADD_SOURCES([pgindex.c pageObj.c pages.c pagesCompr.c pagesAsync.c pagesComprZlib.c pagesComprCustom.c pagesCmd.c])

# enable bz2 files only if pages are handled using C
if test ${USEBZ2} = yes; then
Expand Down
65 changes: 52 additions & 13 deletions cookfswriter/cookfswriter.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@

namespace eval cookfs {}

proc cookfs::createArchivePageIndex {pagelist} {
set rc ""

# add page count
append rc [binary format I [llength $pagelist]]

set data_compression ""
set data_compressionLevel ""
set data_encryption ""
set data_sizeCompressed ""
set data_sizeUncompressed ""
set data_hash ""

foreach page $pagelist {
append data_compression [binary format c 0]
append data_compressionLevel [binary format c 0]
append data_encryption [binary format c 0]
append data_sizeCompressed [binary format I [string length $page]]
append data_sizeUncompressed [binary format I [string length $page]]
append data_hash [binary format c16 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0}]
}

append rc $data_compression $data_compressionLevel $data_encryption
append rc $data_sizeCompressed $data_sizeUncompressed $data_hash

return $rc
}


proc cookfs::createArchiveFileIndex {filelist} {
set rc ""

Expand Down Expand Up @@ -150,24 +179,34 @@ proc cookfs::createArchive {archivefile filelist {bootstrap ""}} {
}
fconfigure $fh -translation binary
foreach page $pagelist {
puts -nonewline $fh \u0000$page
puts -nonewline $fh $page
}

# add fake md5 indexes
foreach page $pagelist {
puts -nonewline $fh [binary format IIII 0 0 0 0]
}
set pgindexdata [createArchivePageIndex $pagelist]
puts -nonewline $fh $pgindexdata

# add page indexes
foreach page $pagelist {
puts -nonewline $fh [binary format I [expr {[string length $page] + 1}]]
}
set fsindexdata "CFS2.200[createArchiveFileIndex $fileindex]"
puts -nonewline $fh $fsindexdata

# write archive footer
# base compression type + base compression level + encryption
puts -nonewline $fh [binary format ccc 0 0 0]

# write pgindex info:
# compression + compression level + hash(16 bytes) + size compressed + size uncompressed
puts -nonewline $fh [binary format ccc16II \
0 0 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \
[string length $pgindexdata] [string length $pgindexdata]]

# write fsindex info:
# compression + compression level + hash(16 bytes) + size compressed + size uncompressed
puts -nonewline $fh [binary format ccc16II \
0 0 {0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0} \
[string length $fsindexdata] [string length $fsindexdata]]

# TODO: add index
set indexdata "\u0000CFS2.200[createArchiveFileIndex $fileindex]"
# write signature
puts -nonewline $fh "CFS0003"

puts -nonewline $fh $indexdata
puts -nonewline $fh [binary format IIcca* [string length $indexdata] [llength $pagelist] 0 0 CFS0002]
close $fh
}

15 changes: 15 additions & 0 deletions generic/cookfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,28 @@
#include <assert.h>

#ifdef COOKFS_INTERNAL_DEBUG

#ifndef __FUNCTION_NAME__
#ifdef _WIN32 // WINDOWS
#define __FUNCTION_NAME__ __FUNCTION__
#else // GCC
#define __FUNCTION_NAME__ __func__
#endif
#endif

// This is an experiment to print debug messages indented according to
// the current stack depth. The -funwind-tables compiler key must be used
// for the backtrace() function to work.
//
// This feature is not currently used, but may be used in the future.
//
// #include <execinfo.h>
// static inline int ___get_stack_depth() {
// void *buffer[200];
// return backtrace(buffer, 200);
// }
// #define CookfsLog(a) {printf("%d ", ___get_stack_depth()); a; printf("\n"); fflush(stdout);}

// #define CookfsLog(a) {printf("[%p] ", (void *)Tcl_GetCurrentThread()); a; printf("\n"); fflush(stdout);}
// #define CookfsLog2(a) {printf("[%p] ", (void *)Tcl_GetCurrentThread()); printf("%s: ", __FUNCTION_NAME__); a; printf("\n"); fflush(stdout);}
#define CookfsLog(a) {a; printf("\n"); fflush(stdout);}
Expand Down
33 changes: 21 additions & 12 deletions generic/pageObj.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
void Cookfs_PageObjIncrRefCount(Cookfs_PageObj pg) {
Cookfs_PageObjStruct *ps = (Cookfs_PageObjStruct *)(pg -
sizeof(Cookfs_PageObjStruct));
// CookfsLog(printf("Cookfs_PageObjIncrRefCount: %p", (void *)pg));
// CookfsLog2(printf("%p (allocated at %p)", (void *)pg, (void *)ps));
#ifdef TCL_THREADS
Tcl_MutexLock(&ps->mx);
#endif /* TCL_THREADS */
ps->refCount++;
// CookfsLog(printf("Cookfs_PageObjIncrRefCount: %p - count:%d",
// (void *)pg, ps->refCount));
// CookfsLog2(printf("%p - count:%d", (void *)pg, ps->refCount));
#ifdef TCL_THREADS
Tcl_MutexUnlock(&ps->mx);
#endif /* TCL_THREADS */
Expand All @@ -27,21 +26,24 @@ void Cookfs_PageObjIncrRefCount(Cookfs_PageObj pg) {
void Cookfs_PageObjDecrRefCount(Cookfs_PageObj pg) {
Cookfs_PageObjStruct *ps = (Cookfs_PageObjStruct *)(pg -
sizeof(Cookfs_PageObjStruct));
// CookfsLog(printf("Cookfs_PageObjDecrRefCount: release %p", (void *)pg));
// CookfsLog2(printf("%p (allocated at %p)", (void *)pg, (void *)ps));
#ifdef TCL_THREADS
Tcl_MutexLock(&ps->mx);
#endif /* TCL_THREADS */
// There should not be Cookfs_PageObjDecrRefCount() without
// a corresponding Cookfs_PageObjIncrRefCount() that was called before it.
// Throw an error if refcount is less than or equal to zero.
assert(ps->refCount > 0);
ps->refCount--;
// CookfsLog(printf("Cookfs_PageObjDecrRefCount: %p - count:%d",
// (void *)pg, ps->refCount));
// CookfsLog2(printf("%p - count:%d", (void *)pg, ps->refCount));
#ifdef TCL_THREADS
Tcl_MutexUnlock(&ps->mx);
#endif /* TCL_THREADS */
if (!ps->refCount) {
#ifdef TCL_THREADS
Tcl_MutexFinalize(&ps->mx);
#endif /* TCL_THREADS */
// CookfsLog(printf("Cookfs_PageObjDecrRefCount: release %p", (void *)pg));
// CookfsLog2(printf("release %p", (void *)pg));
ckfree(ps);
}
}
Expand All @@ -59,7 +61,7 @@ static Tcl_Size Cookfs_PageObjCalculateSize(Tcl_Size size) {
}

Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size) {
CookfsLog2(printf("enter..."));
// CookfsLog2(printf("enter..."));
Tcl_Size bufferSize = Cookfs_PageObjCalculateSize(size);
Cookfs_PageObj p = ckalloc(bufferSize + sizeof(Cookfs_PageObjStruct));
if (p != NULL) {
Expand All @@ -75,20 +77,27 @@ Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size) {
#endif /* COOKFS_USECCRYPTO */
p += sizeof(Cookfs_PageObjStruct);
}
CookfsLog(printf("Cookfs_PageObjAlloc: return %p", (void *)p));
CookfsLog(printf("Cookfs_PageObjAlloc: return %p (allocated at %p)",
(void *)p, (void *)(p - sizeof(Cookfs_PageObjStruct))));
return p;
}

Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj) {
Tcl_Size size;
unsigned char *bytes = Tcl_GetByteArrayFromObj(obj, &size);
Cookfs_PageObj Cookfs_PageObjNewFromString(const unsigned char *bytes,
Tcl_Size size)
{
Cookfs_PageObj rc = Cookfs_PageObjAlloc(size);
if (rc != NULL) {
memcpy(rc, bytes, size);
}
return rc;
}

Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj) {
Tcl_Size size;
const unsigned char *bytes = Tcl_GetByteArrayFromObj(obj, &size);
return Cookfs_PageObjNewFromString(bytes, size);
}

#ifdef COOKFS_USECCRYPTO

Cookfs_PageObj Cookfs_PageObjNewFromByteArrayIV(Tcl_Obj *obj) {
Expand Down
6 changes: 6 additions & 0 deletions generic/pageObj.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,17 @@ void Cookfs_PageObjDecrRefCount(Cookfs_PageObj pg);
(((Cookfs_PageObjStruct *)((Cookfs_PageObj)(p) - \
sizeof(Cookfs_PageObjStruct)))->effectiveSize)

#define Cookfs_PageObjSetSize(p,n) \
(((Cookfs_PageObjStruct *)((Cookfs_PageObj)(p) - \
sizeof(Cookfs_PageObjStruct)))->effectiveSize) = (n)

#define Cookfs_PageObjCopyAsByteArray(p) \
Tcl_NewByteArrayObj(p, Cookfs_PageObjSize(p))

Cookfs_PageObj Cookfs_PageObjAlloc(Tcl_Size size);
Cookfs_PageObj Cookfs_PageObjNewFromByteArray(Tcl_Obj *obj);
Cookfs_PageObj Cookfs_PageObjNewFromString(const unsigned char *bytes,
Tcl_Size size);

#ifdef COOKFS_USECCRYPTO

Expand Down
Loading

0 comments on commit ce56e9e

Please sign in to comment.