Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to detect truncated archives #6

Merged
merged 1 commit into from
May 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
2024-05-26 Konstantin Kushnir <[email protected]>
* Add tests for writer
* Fix build without c-pages, but with c-fsindex
* Add the ability to detect truncated archives

2024-05-25 Konstantin Kushnir <[email protected]>
* Add support for writer in C
Expand Down
231 changes: 226 additions & 5 deletions generic/pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

#define COOKFS_SUFFIX_BYTES 16

// read by 512kb chunks
#define COOKFS_SEARCH_STAMP_CHUNK 524288
// max read 10mb
#define COOKFS_SEARCH_STAMP_MAX_READ 10485760

#define COOKFS_PAGES_ERRORMSG "Unable to create Cookfs object"

/* declarations of static and/or internal functions */
Expand All @@ -19,6 +24,7 @@ static void CookfsPagesPageCacheMoveToTop(Cookfs_Pages *p, int index);
static int CookfsReadIndex(Tcl_Interp *interp, Cookfs_Pages *p);
static void CookfsPagesPageExtendIfNeeded(Cookfs_Pages *p, int count);
static void CookfsTruncateFileIfNeeded(Cookfs_Pages *p, Tcl_WideInt targetOffset);
static Tcl_WideInt Cookfs_PageSearchStamp(Cookfs_Pages *p);

static const char *const pagehashNames[] = { "md5", "crc32", NULL };

Expand Down Expand Up @@ -232,16 +238,24 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, int fileRe
}

/* initialize structure */
rc->isFirstWrite = 0;
rc->useFoffset = useFoffset;
rc->foffset = foffset;
rc->fileReadOnly = fileReadOnly;
rc->fileCompression = fileCompression;
rc->alwaysCompress = 0;
if (fileSignature != NULL)
memcpy(rc->fileSignature, fileSignature, 7);
else
memcpy(rc->fileSignature, "CFS0002", 7);

if (fileSignature != NULL) {
memcpy(rc->fileSignature, fileSignature, 7);
} else {
// Split the signature into 2 strings so we don't find that whole string
// when searching for the signature
memcpy(rc->fileSignature, "CFS", 3);
memcpy(rc->fileSignature + 3, "0002", 4);
}
// Split the stamp into 2 strings so we don't find that whole string
// when searching for the stamp
memcpy(rc->fileStamp, "CFS", 3);
memcpy(rc->fileStamp + 3, "S002", 4);

/* initialize parameters */
rc->fileLastOp = COOKFS_LASTOP_UNKNOWN;
Expand Down Expand Up @@ -346,12 +360,24 @@ Cookfs_Pages *Cookfs_PagesInit(Tcl_Interp *interp, Tcl_Obj *fileName, int fileRe
/* read index or fail */
if (!CookfsReadIndex(interp, rc)) {
if (rc->fileReadOnly) {
// Detecting a corrupted file only if no endoffset is specified and we
// tried to automatically detect the contents of the archive.
if (!useFoffset) {
Tcl_WideInt expectedSize = Cookfs_PageSearchStamp(rc);
if (expectedSize != -1) {
Tcl_SetObjResult(interp, Tcl_ObjPrintf("The archive \"%s\""
" appears to be corrupted or truncated. Expected"
" archive size is %" TCL_LL_MODIFIER "d bytes or"
" larger.", Tcl_GetString(fileName), expectedSize));
}
}
rc->pagesUptodate = 1;
rc->indexChanged = 0;
rc->shouldTruncate = 0;
Cookfs_PagesFini(rc);
return NULL;
} else {
rc->isFirstWrite = 1;
rc->dataInitialOffset = Tcl_Seek(rc->fileChannel, 0, SEEK_END);
rc->dataAllPagesSize = 0;
rc->dataNumPages = 0;
Expand Down Expand Up @@ -416,6 +442,9 @@ Tcl_WideInt Cookfs_PagesClose(Cookfs_Pages *p) {
Cookfs_AsyncCompressFinalize(p);
Cookfs_AsyncDecompressFinalize(p);

// Add initial stamp if needed
Cookfs_PageAddStamp(p, 0);

/* seek to proper position */
Cookfs_SeekToPage(p, p->dataNumPages);

Expand Down Expand Up @@ -461,6 +490,9 @@ Tcl_WideInt Cookfs_PagesClose(Cookfs_Pages *p) {

CookfsTruncateFileIfNeeded(p, p->foffset);

// Add final stamp if needed
Cookfs_PageAddStamp(p, p->foffset);

}

/* close file channel */
Expand Down Expand Up @@ -561,6 +593,195 @@ void Cookfs_PagesFini(Cookfs_Pages *p) {
Tcl_Free((void *) p);
}

/*
*----------------------------------------------------------------------
*
* Cookfs_PageSearchStamp --
*
* Trying to find the cookfs stamp that should be located in front of the archive
*
* Results:
* Expected file size if the stamp is found, or -1 otherwise
*
* Side effects:
* Changing the current offset in the channel
*
*----------------------------------------------------------------------
*/

static Tcl_WideInt Cookfs_PageSearchStamp(Cookfs_Pages *p) {

CookfsLog(printf("Cookfs_PageSearchStamp: enter"));

unsigned char *buf = NULL;
Tcl_WideInt size = -1;

buf = ckalloc(COOKFS_SEARCH_STAMP_CHUNK);
if (buf == NULL) {
CookfsLog(printf("Cookfs_PageSearchStamp: failed to alloc"));
goto error;
}

if (Tcl_Seek(p->fileChannel, 0, SEEK_SET) == -1) {
CookfsLog(printf("Cookfs_PageSearchStamp: failed to seek"));
goto error;
}

Tcl_WideInt read = 0;
int bufSize = 0;
int i;

while (!Tcl_Eof(p->fileChannel) && (read < COOKFS_SEARCH_STAMP_MAX_READ)) {

int wantToRead = COOKFS_SEARCH_STAMP_CHUNK - bufSize;

if ((wantToRead + read) > COOKFS_SEARCH_STAMP_MAX_READ) {
wantToRead = COOKFS_SEARCH_STAMP_MAX_READ - read;
}

CookfsLog(printf("Cookfs_PageSearchStamp: try to read %d bytes",
wantToRead));

int readCount = Tcl_Read(p->fileChannel, (char *)buf + bufSize,
wantToRead);

if (!readCount) {
CookfsLog(printf("Cookfs_PageSearchStamp: got zero bytes,"
" continue"));
continue;
}

// A negative value of bytes read indicates an error. Stop processing
// in this case.
if (readCount < 0) {
goto error;
}

CookfsLog(printf("Cookfs_PageSearchStamp: got %d bytes", readCount));

read += readCount;
bufSize += readCount;

// Do not look for the last 20 bytes, as a situation may arise where
// the stamp byte is at the very end of the buffer and the WideInt
// that should come after the stamp is not read.
int bytesToLookup = bufSize - 20;

for (i = 0; i < bytesToLookup; i++) {
if (buf[i] != p->fileStamp[0]) {
continue;
}
if (memcmp(&buf[i], p->fileStamp, COOKFS_SIGNATURE_LENGTH) != 0) {
continue;
}
goto found;
}

CookfsLog(printf("Cookfs_PageSearchStamp: stamp is not found yet"));

// Leave the last 20 bytes in the buffer
if (bufSize > 20) {
memcpy(buf, &buf[bufSize - 20], 20);
bufSize = 20;
}

}

CookfsLog(printf("Cookfs_PageSearchStamp: read total %ld bytes and"
" could not find the stamp", read));

goto error;

found:


Cookfs_Binary2WideInt(&buf[i + COOKFS_SIGNATURE_LENGTH], &size, 1);
CookfsLog(printf("Cookfs_PageSearchStamp: return the size: %ld", size));

error:

if (buf != NULL) {
ckfree(buf);
}

return size;

}

/*
*----------------------------------------------------------------------
*
* Cookfs_PageAddStamp --
*
* Adds a stamp before archive
*
* Results:
* TCL_OK on success or TCL_ERROR on failure
*
* Side effects:
* Sets the current offset in the channel immediately following the stamp
*
*----------------------------------------------------------------------
*/

int Cookfs_PageAddStamp(Cookfs_Pages *p, Tcl_WideInt size) {

CookfsLog(printf("Cookfs_PageAddStamp: enter, size: %ld", size));

unsigned char sizeBin[8]; // 64-bit WideInt
Cookfs_WideInt2Binary(&size, sizeBin, 1);

if (size == 0) {
if (!p->isFirstWrite) {
CookfsLog(printf("Cookfs_PageAddStamp: return:"
" is not the first write"));
return TCL_OK;
}
CookfsLog(printf("Cookfs_PageAddStamp: write initial stamp"));
if (Tcl_Seek(p->fileChannel, 0, SEEK_END) == -1) {
CookfsLog(printf("Cookfs_PageAddStamp: return error,"
" failed to seek"));
return TCL_ERROR;
}
if (Tcl_Write(p->fileChannel, p->fileStamp, COOKFS_SIGNATURE_LENGTH)
!= COOKFS_SIGNATURE_LENGTH)
{
CookfsLog(printf("Cookfs_PageAddStamp: return error,"
" failed to write signature"));
return TCL_ERROR;
}
if (Tcl_Write(p->fileChannel, (const char *)sizeBin, 8) != 8)
{
CookfsLog(printf("Cookfs_PageAddStamp: return error,"
" failed to write size"));
return TCL_ERROR;
}
p->dataInitialOffset += COOKFS_SIGNATURE_LENGTH + 8; // 7+8 = 15
p->isFirstWrite = 0;
// We're already in position for the next file write
p->fileLastOp = COOKFS_LASTOP_WRITE;
} else {
CookfsLog(printf("Cookfs_PageAddStamp: write final stamp"));
if (Tcl_Seek(p->fileChannel, p->dataInitialOffset - 8, SEEK_SET)
== -1)
{
CookfsLog(printf("Cookfs_PageAddStamp: return error,"
" failed to seek"));
return TCL_ERROR;
}
if (Tcl_Write(p->fileChannel, (const char *)sizeBin, 8) != 8)
{
CookfsLog(printf("Cookfs_PageAddStamp: return error,"
" failed to write size"));
return TCL_ERROR;
}
}

CookfsLog(printf("Cookfs_PageAddStamp: ok"));
return TCL_OK;

}

/*
*----------------------------------------------------------------------
*
Expand Down
4 changes: 4 additions & 0 deletions generic/pages.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ typedef struct Cookfs_Pages {
int fileCompression;
int fileCompressionLevel;
char fileSignature[COOKFS_SIGNATURE_LENGTH];
int isFirstWrite;
char fileStamp[COOKFS_SIGNATURE_LENGTH];
Tcl_Channel fileChannel;
int fileLastOp;
int useFoffset;
Expand Down Expand Up @@ -176,6 +178,8 @@ Tcl_Obj *Cookfs_PagesGetHashAsObj(Cookfs_Pages *p);
int Cookfs_PagesSetHashByObj(Cookfs_Pages *p, Tcl_Obj *pagehash,
Tcl_Interp *interp);

int Cookfs_PageAddStamp(Cookfs_Pages *p, Tcl_WideInt size);

#endif /* COOKFS_USECPAGES */

#endif /* COOKFS_PAGES_H */
3 changes: 3 additions & 0 deletions generic/pagesCompr.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,9 @@ void Cookfs_SeekToPage(Cookfs_Pages *p, int idx) {
int Cookfs_WritePage(Cookfs_Pages *p, int idx, unsigned char *bytes, int origSize, Tcl_Obj *compressedData) {
int size = -1;

// Add initial stamp if needed
Cookfs_PageAddStamp(p, 0);

/* if last operation was not write, we need to seek
* to make sure we're at location where we should be writing */
if ((idx >= 0) && (p->fileLastOp != COOKFS_LASTOP_WRITE)) {
Expand Down
Loading