Skip to content

Commit

Permalink
Add tests for writer
Browse files Browse the repository at this point in the history
  • Loading branch information
chpock committed May 26, 2024
1 parent 15db495 commit 921c8d8
Show file tree
Hide file tree
Showing 10 changed files with 595 additions and 11 deletions.
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2024-05-26 Konstantin Kushnir <[email protected]>
* Add tests for writer

2024-05-25 Konstantin Kushnir <[email protected]>
* Add support for writer in C

Expand Down
6 changes: 6 additions & 0 deletions generic/vfsDriver.c
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,12 @@ static int CookfsDeleteFile(Tcl_Obj *pathPtr) {
return TCL_ERROR;
}

if (Cookfs_FsindexEntryIsPending(entry)) {
CookfsLog(printf("CookfsDeleteFile: the entry is pending,"
" remove it from small file buffer"));
Cookfs_WriterRemoveFile(vfs->writer, entry);
}

int result = Cookfs_FsindexUnset(index, internalRep->relativePathObj);

// Check to see if anything's wrong
Expand Down
71 changes: 65 additions & 6 deletions generic/writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ int Cookfs_WriterAddBufferToSmallFiles(Cookfs_Writer *w, Tcl_Obj *pathObj,
if (wb->entry == NULL) {
CookfsLog(printf("Cookfs_WriterAddBufferToSmallFiles: failed to create"
" the entry"));
Cookfs_WriterSetLastError(w, "unable to create entry");
Cookfs_WriterSetLastError(w, "Unable to create entry");
Cookfs_WriterWriterBufferFree(wb);
return TCL_ERROR;
}
Expand Down Expand Up @@ -241,6 +241,43 @@ static Tcl_WideInt Cookfs_WriterReadChannel(char *buffer,

}

int Cookfs_WriterRemoveFile(Cookfs_Writer *w, Cookfs_FsindexEntry *entry) {
CookfsLog(printf("Cookfs_WriterRemoveFile: enter"));
Cookfs_WriterBuffer *wbPrev = NULL;
Cookfs_WriterBuffer *wb = w->bufferFirst;
while (wb != NULL) {
if (wb->entry == entry) {

CookfsLog(printf("Cookfs_WriterRemoveFile: found the buffer"
" to remove [%p]", (void *)wb));
Cookfs_WriterBuffer *next = wb->next;
if (wbPrev == NULL) {
w->bufferFirst = next;
} else {
wbPrev->next = next;
}
w->bufferCount--;
w->bufferSize -= wb->bufferSize;
Cookfs_WriterWriterBufferFree(wb);

// Shift block number for the following files and their entries
while (next != NULL) {
CookfsLog(printf("Cookfs_WriterRemoveFile: shift buffer number"
" for buffer [%p]", (void *)next));
next->entry->data.fileInfo.fileBlockOffsetSize[0]++;
next = next->next;
}

return 1;
}
wbPrev = wb;
wb = wb->next;
}
CookfsLog(printf("Cookfs_WriterRemoveFile: could not find the buffer"
" to remove"));
return 0;
}

#define DATA_FILE (Tcl_Obj *)data
#define DATA_CHANNEL (Tcl_Channel)data
#define DATA_OBJECT (Tcl_Obj *)data
Expand Down Expand Up @@ -269,6 +306,24 @@ int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
Tcl_DString chanTranslation, chanEncoding;
Cookfs_FsindexEntry *entry = NULL;

// Check if we have the file in the small file buffer. We will try to get
// the fsindex entry for this file and see if it is a pending file.
entry = Cookfs_FsindexGet(w->index, pathObj);
if (entry != NULL) {
CookfsLog(printf("Cookfs_WriterAddFile: an existing entry for the file"
" was found"));
if (Cookfs_FsindexEntryIsPending(entry)) {
CookfsLog(printf("Cookfs_WriterAddFile: the entry is pending,"
" remove it from small file buffer"));
Cookfs_WriterRemoveFile(w, entry);
} else {
CookfsLog(printf("Cookfs_WriterAddFile: the entry is"
" not pending"));
}
entry = NULL;
}


switch (dataType) {

case COOKFS_WRITER_SOURCE_BUFFER:
Expand Down Expand Up @@ -349,7 +404,7 @@ int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
case COOKFS_WRITER_SOURCE_OBJECT: ; // an empty statement

int length;
data = (void *)Tcl_GetStringFromObj(DATA_OBJECT, &length);
data = (void *)Tcl_GetByteArrayFromObj(DATA_OBJECT, &length);

if (dataSize < 0) {
CookfsLog(printf("Cookfs_WriterAddFile: get datasize from"
Expand Down Expand Up @@ -386,7 +441,7 @@ int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
if (entry == NULL) {
CookfsLog(printf("Cookfs_WriterAddFile: failed to create"
" the entry"));
Cookfs_WriterSetLastError(w, "unable to create entry");
Cookfs_WriterSetLastError(w, "Unable to create entry");
goto error;
}
// Set entry block information
Expand Down Expand Up @@ -474,7 +529,10 @@ int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
}

// Calculate number of blocks
int numBlocks = (dataSize / w->pageSize) + 1;
int numBlocks = dataSize / w->pageSize;
if (dataSize % w->pageSize) {
numBlocks++;
}

// Create an entry
CookfsLog(printf("Cookfs_WriterAddFile: create an entry"
Expand All @@ -483,7 +541,7 @@ int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
if (entry == NULL) {
CookfsLog(printf("Cookfs_WriterAddFile: failed to create"
" the entry"));
Cookfs_WriterSetLastError(w, "unable to create entry");
Cookfs_WriterSetLastError(w, "Unable to create entry");
goto error;
}
Cookfs_FsindexUpdateEntryFileSize(entry, dataSize);
Expand Down Expand Up @@ -640,7 +698,8 @@ int Cookfs_WriterPurge(Cookfs_Writer *w) {
// To solve this problem, we will check the buffers and if they are
// identical, then we will use the same sort key for those buffers.

CookfsLog(printf("Cookfs_WriterPurge: sort %d entries", w->bufferCount));
CookfsLog(printf("Cookfs_WriterPurge: have total %d entries",
w->bufferCount));
// Create array for all our entries
sortedWB = ckalloc(w->bufferCount * sizeof(Cookfs_WriterBuffer *));
if (sortedWB == NULL) {
Expand Down
2 changes: 2 additions & 0 deletions generic/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ const void *Cookfs_WriterGetBuffer(Cookfs_Writer *w, int blockNumber,
int Cookfs_WriterAddFile(Cookfs_Writer *w, Tcl_Obj *pathObj,
Cookfs_WriterDataSource dataType, void *data, Tcl_WideInt dataSize);

int Cookfs_WriterRemoveFile(Cookfs_Writer *w, Cookfs_FsindexEntry *entry);

int Cookfs_WriterGetWritetomemory(Cookfs_Writer *w);
void Cookfs_WriterSetWritetomemory(Cookfs_Writer *w, int status);

Expand Down
4 changes: 3 additions & 1 deletion scripts/vfs.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,9 @@ proc cookfs::tcl::vfs::delete {fsid root relative actualpath type recursive} {
} else {
vfs::filesystem posixerror $::cookfs::posix(ENOTDIR)
}
} elseif {$type == "directory" && $recursive} {
} elseif {$type eq "file"} {
$fs(writer) deleteFile $relative
} elseif {$type == "directory" && $recursive} {
if {[catch {
foreach ch [$fs(index) list $relative] {
# check type and delete appropriately
Expand Down
59 changes: 56 additions & 3 deletions scripts/writer.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ proc cookfs::tcl::writer::handle { name cmd args } {
write {
tailcall write $name {*}$args
}
deleteFile {
tailcall deleteFile $name {*}$args
}
writetomemory {
if { [llength $args] > 1 } {
error "wrong # args: should be \"$name\
Expand Down Expand Up @@ -210,12 +213,46 @@ proc cookfs::tcl::writer::purge {wrid} {
}
}

proc cookfs::tcl::writer::deleteFile {wrid args} {
upvar #0 $wrid c
# args - path type data size
foreach path_to_delete $args {
set idx 0
set deleted 0
set newsmallfilepaths [list]
foreach { path size clk } $c(smallfilepaths) {
if { !$deleted } {
if { $path eq $path_to_delete } {
set c(smallfilebuf) [lreplace $c(smallfilebuf) $idx $idx]
set c(smallfilebufsize) [expr { $c(smallfilebufsize) - $size }]
set deleted 1
} else {
lappend newsmallfilepaths $path $size $clk
incr idx
}
} else {
# shift buffer number for other files
set entry [$c(index) get $path]
set chunklist [lindex $entry 2]
set chunklist [lreplace $chunklist 0 0 [expr { [lindex $chunklist 0] + 1 }]]
$c(index) set $path [lindex $entry 0] $chunklist
set entry [$c(index) get $path]
lappend newsmallfilepaths $path $size $clk
}
}
if { $deleted } {
set c(smallfilepaths) $newsmallfilepaths
}
}
}

proc cookfs::tcl::writer::write {wrid args} {
upvar #0 $wrid c
# args - path type data size

# iterate over arguments
foreach {path datatype data size} $args {
deleteFile $wrid $path
if {$size == ""} {
# read actual size, from file or channel
switch -- $datatype {
Expand Down Expand Up @@ -249,6 +286,8 @@ proc cookfs::tcl::writer::write {wrid args} {
channel {
set clk [clock seconds]
set chan $data
set translation [fconfigure $chan -translation]
set encoding [fconfigure $chan -encoding]
fconfigure $chan -translation binary
set doclose 0
set rawdata 0
Expand All @@ -274,12 +313,22 @@ proc cookfs::tcl::writer::write {wrid args} {
}

set sfidx [llength $c(smallfilebuf)]

if { [catch {
$c(index) set $path $clk [list [expr {-$sfidx - 1}] 0 $size]
} res opts] } {
if { $doclose } {
close $chan
} elseif { $datatype eq "channel" } {
fconfigure $chan -translation $translation -encoding $encoding
}
return -options $opts "unable to add \"$path\": $res"
}

lappend c(smallfilebuf) $fc
lappend c(smallfilepaths) $path $size $clk
incr c(smallfilebufsize) [string length $fc]

$c(index) set $path $clk [list [expr {-$sfidx - 1}] 0 $size]

# if current buffer exceeds maximum, write small files to clean it
# but only if not writing to memory
if {(!$c(writetomemory)) && ($c(smallfilebufsize) >= $c(smallfilebuffersize))} {
Expand Down Expand Up @@ -316,7 +365,11 @@ proc cookfs::tcl::writer::write {wrid args} {

$c(index) set $path $clk $chunklist
}
if {$doclose} {close $chan}
if { $doclose } {
close $chan
} elseif { $datatype eq "channel" } {
fconfigure $chan -translation $translation -encoding $encoding
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion tests/all.tcl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package require tcltest
source [file join [file dirname [info script]] tcltestex.tcl]

#lappend auto_path [pwd]

Expand Down
104 changes: 104 additions & 0 deletions tests/dedup.test
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,107 @@ tcltest::test cookfsDedup-1.2 "Small file deduplication for same resulting pages
tcltest::removeFile $file
} -result 1

tcltest::test cookfsDedup-1.3 "Small file deduplication for different resulting pages" -setup {
set file [tcltest::makeFile {} pages.cfs]
} -body {
set datas [randomDatas 4 2048]
set c [vfs::cookfs::Mount $file $file -compression none \
-pagesize 4096 -smallfilesize 4096 -smallfilebuffer 16384]
# datas is 4 files of 2048 bytes each
# page has 4096 bytes
# file1 contains datas#0 comes to page#0[0]
# file2 contains datas#1 comes to page#0[1] - flush page now
# file3 contains datas#2 comes to page#1[0]
# file4 contains datas#0 uses page#0[0]
# file5 contains datas#3 comes to page#1[1] - flush page now
set files [list \
file1 [lindex $datas 0] \
file2 [lindex $datas 1] \
file3 [lindex $datas 2] \
file4 [lindex $datas 0] \
file5 [lindex $datas 3] \
]
foreach { f d } $files {
set fh [open $file/$f w]
fconfigure $fh -translation binary
puts -nonewline $fh $d
close $fh
}
vfs::unmount $file
set fs [file size $file]
set result [list]
# the file size should be (2 pages + internal data) and less than (3 pages)
lappend result [expr {($fs > (4096 * 2)) && ($fs < (4096 * 3))}]
set c [vfs::cookfs::Mount -readonly $file $file]
foreach { f d } $files {
lappend result "$f [[$c getindex] get $f] [string equal $d [read [set fh [open $file/$f rb]]][close $fh]]"
}
join $result \n
} -cleanup {
catch {vfs::unmount $file}
tcltest::removeFile $file
} -match glob -result {1
file1 * 2048 {0 0 2048} 1
file2 * 2048 {0 2048 2048} 1
file3 * 2048 {1 0 2048} 1
file4 * 2048 {0 0 2048} 1
file5 * 2048 {1 2048 2048} 1}

tcltest::test cookfsDedup-1.4 "Small file deduplication for different resulting pages" -setup {
set file [tcltest::makeFile {} pages.cfs]
} -body {
set datas [randomDatas 3 2048]
set c [vfs::cookfs::Mount $file $file -compression none \
-pagesize 4096 -smallfilesize 4096 -smallfilebuffer 8192]
# datas is 3 files of 2048 bytes each
# page has 4096 bytes
# file1 contains datas#0 comes to page#0[0]
# file2 contains datas#1 comes to page#0[1] - flush page now
# file3 contains datas#1 uses page#0[1]
# file4 contains datas#1 uses page#0[1] - the buffer size of the small file
# has reached the limit and should be flushed.
# But only 1 page should be used for all 4 files.
# file5 contains datas#0
# file6 contains datas#1 - flush page now, but it appears the same page as #0,
# so no new pages need to be created.
# file7 contains data#1 uses page#0[1]
# file8 contains data#2 uses page#1[0] - unmount now and flush page#1, even if it's not filled
set files [list \
file1 [lindex $datas 0] \
file2 [lindex $datas 1] \
file3 [lindex $datas 1] \
file4 [lindex $datas 1] \
file5 [lindex $datas 0] \
file6 [lindex $datas 1] \
file7 [lindex $datas 1] \
file8 [lindex $datas 2] \
]
foreach { f d } $files {
set fh [open $file/$f w]
fconfigure $fh -translation binary
puts -nonewline $fh $d
close $fh
}
vfs::unmount $file
set fs [file size $file]
set result [list]
# the file size should be (1.5 pages + internal data) and less than (2 pages)
lappend result [expr {($fs > (1.5 * 4096)) && ($fs < (4096 * 2))}]
set c [vfs::cookfs::Mount -readonly $file $file]
foreach { f d } $files {
lappend result "$f [[$c getindex] get $f] [string equal $d [read [set fh [open $file/$f rb]]][close $fh]]"
}
join $result \n
} -cleanup {
catch {vfs::unmount $file}
tcltest::removeFile $file
} -match glob -result {1
file1 * 2048 {0 0 2048} 1
file2 * 2048 {0 2048 2048} 1
file3 * 2048 {0 2048 2048} 1
file4 * 2048 {0 2048 2048} 1
file5 * 2048 {0 0 2048} 1
file6 * 2048 {0 2048 2048} 1
file7 * 2048 {0 2048 2048} 1
file8 * 2048 {1 0 2048} 1}

Loading

0 comments on commit 921c8d8

Please sign in to comment.