diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 9b4a9a98..d67480c7 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -28,20 +28,24 @@ strategy: Linux amd64 GCC: VM_IMAGE: 'ubuntu-20.04' APT_PACKAGES: ninja-build g++-8 + C_COMPILER: gcc-8 CXX_COMPILER: g++-8 Linux i686 GCC: VM_IMAGE: 'ubuntu-20.04' APT_PACKAGES: ninja-build g++-i686-linux-gnu + C_COMPILER: i686-linux-gnu-gcc CXX_COMPILER: i686-linux-gnu-g++ COMPILER_FLAGS: -mfpmath=sse -msse Linux arm64 GCC: VM_IMAGE: 'ubuntu-20.04' APT_PACKAGES: ninja-build g++-aarch64-linux-gnu qemu-user + C_COMPILER: aarch64-linux-gnu-gcc CXX_COMPILER: aarch64-linux-gnu-g++ CRUNCH_EXE_RUNNER: qemu-aarch64 -L /usr/aarch64-linux-gnu Linux armhf GCC: VM_IMAGE: 'ubuntu-20.04' APT_PACKAGES: ninja-build g++-arm-linux-gnueabihf qemu-user + C_COMPILER: arm-linux-gnueabihf-gcc CXX_COMPILER: arm-linux-gnueabihf-g++ CRUNCH_EXE_RUNNER: qemu-arm -L /usr/arm-linux-gnueabihf # There is a qemu-arm IO bug in ubuntu-20.04 and ubuntu-22.04, @@ -50,6 +54,7 @@ strategy: Linux amd64 Clang: VM_IMAGE: 'ubuntu-20.04' APT_PACKAGES: ninja-build + C_COMPILER: clang CXX_COMPILER: clang++ Windows amd64 MinGW: VM_IMAGE: 'ubuntu-22.04' @@ -116,11 +121,14 @@ steps: if [ -n "${TOOLCHAIN_FILE:-}" ]; then cmake_args+=(-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}") fi + if [ -n "${C_COMPILER:-}" ]; then + cmake_args+=(-DCMAKE_C_COMPILER="${C_COMPILER}") + fi if [ -n "${CXX_COMPILER:-}" ]; then cmake_args+=(-DCMAKE_CXX_COMPILER="${CXX_COMPILER}") fi if [ -n "${COMPILER_FLAGS:-}" ]; then - cmake_args+=(-DCMAKE_CXX_FLAGS="${COMPILER_FLAGS}") + cmake_args+=(-DCMAKE_C_FLAGS="${COMPILER_FLAGS}" -DCMAKE_CXX_FLAGS="${COMPILER_FLAGS}") fi if [ -z "${SOURCE_DIR:-}" ]; then cmake_args+=(-DBUILD_CRUNCH=ON -DBUILD_EXAMPLES=ON -DBUILD_SHARED_LIBS=ON) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ed79c99..2a61ad55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ set(CRUNCH_PROJECT_NAME crunch) set(CRUNCH_LIBRARY_NAME crn) set(CRUNCH_EXE_NAME crunch) -project(${CRUNCH_PROJECT_NAME} LANGUAGES CXX) +project(${CRUNCH_PROJECT_NAME} LANGUAGES C CXX) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) @@ -30,12 +30,14 @@ if (Git_FOUND) endif() endif() -macro(set_cxx_flag FLAG) - if (${ARGC} GREATER 1) - set(CMAKE_CXX_FLAGS_${ARGV1} "${CMAKE_CXX_FLAGS_${ARGV1}} ${FLAG}") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") - endif() +macro(set_c_cxx_flag FLAG) + foreach(lang C CXX) + if (${ARGC} GREATER 1) + set(CMAKE_${lang}_FLAGS_${ARGV1} "${CMAKE_${lang}_FLAGS_${ARGV1}} ${FLAG}") + else() + set(CMAKE_${lang}_FLAGS "${CMAKE_${lang}_FLAGS} ${FLAG}") + endif() + endforeach() endmacro() macro(set_linker_flag FLAG) @@ -73,7 +75,7 @@ option(USE_FAST_MATH "Enable fast math (generated images are less likely to be r if (MSVC) # Enable MSVC parallel compilation. - set_cxx_flag("/MP") + set_c_cxx_flag("/MP") # MSVC doesn't implement strict aliasing so there is nothing else to do. @@ -84,50 +86,50 @@ if (MSVC) # See https://learn.microsoft.com/en-us/cpp/build/reference/fp-specify-floating-point-behavior # and https://devblogs.microsoft.com/cppblog/the-fpcontract-flag-and-changes-to-fp-modes-in-vs2022/ # By default, MSVC doesn't enable the /fp:fast option. - set_cxx_flag("/fp:fast") + set_c_cxx_flag("/fp:fast") endif() if (USE_LTO) - set_cxx_flag("/GL" RELEASE) - set_cxx_flag("/GL" RELWITHDEBINFO) - set_cxx_flag("/GL" MINSIZEREL) + set_c_cxx_flag("/GL" RELEASE) + set_c_cxx_flag("/GL" RELWITHDEBINFO) + set_c_cxx_flag("/GL" MINSIZEREL) set_linker_flag("/LTCG" RELEASE) set_linker_flag("/LTCG" RELWITHDEBINFO) set_linker_flag("/LTCG" MINSIZEREL) endif() else() # As written in crnlib.h and stb_image.h, strict aliasing should always be disabled. - set_cxx_flag("-fno-strict-aliasing") + set_c_cxx_flag("-fno-strict-aliasing") # Generate maxmimum amount of debug information, including macro definitions. - set_cxx_flag("-g3" DEBUG) - set_cxx_flag("-g3" RELWITHDEBINFO) + set_c_cxx_flag("-g3" DEBUG) + set_c_cxx_flag("-g3" RELWITHDEBINFO) - set_cxx_flag("-pthread") + set_c_cxx_flag("-pthread") set_linker_flag("-pthread") if (USE_EXTRA_OPTIMIZATION) # CMake already sets the -O3 flag on Release build and -O[1-3s] already sets the -fomit-frame-pointer flag. - set_cxx_flag("-Og" DEBUG) - set_cxx_flag("-O3" RELWITHDEBINFO) + set_c_cxx_flag("-Og" DEBUG) + set_c_cxx_flag("-O3" RELWITHDEBINFO) endif() if (USE_FAST_MATH) # By default, GCC uses -ffp-contract=fast with -std=gnu* and uses -ffp-contract=off with -std=c*. # See https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html # By default, GCC doesn't enable the -ffast-math option. - set_cxx_flag("-ffast-math -fno-math-errno -ffp-contract=fast") + set_c_cxx_flag("-ffast-math -fno-math-errno -ffp-contract=fast") else() # By default, GCC uses -std=gnu* and then enables -ffp-contract=fast even if -ffast-math is not enabled. - set_cxx_flag("-ffp-contract=off") + set_c_cxx_flag("-ffp-contract=off") endif() # It should be done at the very end because it copies all compiler flags # to the linker flags. if (USE_LTO) - set_cxx_flag("-flto" RELEASE) - set_cxx_flag("-flto" RELWITHDEBINFO) - set_cxx_flag("-flto" MINSIZEREL) + set_c_cxx_flag("-flto" RELEASE) + set_c_cxx_flag("-flto" RELWITHDEBINFO) + set_c_cxx_flag("-flto" MINSIZEREL) set_linker_flag("${CMAKE_CXX_FLAGS}" RELEASE) set_linker_flag("${CMAKE_CXX_FLAGS}" RELWITHDEBINFO) set_linker_flag("${CMAKE_CXX_FLAGS}" MINSIZEREL) diff --git a/crnlib/CMakeLists.txt b/crnlib/CMakeLists.txt index 7f6292f2..ce0c974a 100644 --- a/crnlib/CMakeLists.txt +++ b/crnlib/CMakeLists.txt @@ -142,49 +142,107 @@ set(CRNLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/crn_vector2d.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_winhdr.h ${CMAKE_CURRENT_SOURCE_DIR}/crnlib.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf2.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zCrc.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zFile.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zFile.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zStream.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zVersion.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Alloc.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Alloc.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bcj2.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bcj2.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra86.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_BraIA64.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_CpuArch.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFind.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFind.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzHash.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaDec.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaDec.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaEnc.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaEnc.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaLib.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaLib.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_MyVersion.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Types.h ${CMAKE_CURRENT_SOURCE_DIR}/stb_image.h ${CMAKE_CURRENT_SOURCE_DIR}/stb_image_write.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/crn_decomp.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/crnlib.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/dds_defs.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zAlloc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zAlloc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zArcIn.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zBuf2.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zBuf.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zBuf.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zCrc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zCrc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zCrcOpt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zDec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zFile.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zFile.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7z.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zStream.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zTypes.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zVersion.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/7zWindows.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Aes.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Aes.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/AesOpt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Alloc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Alloc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bcj2.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bcj2Enc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bcj2.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bra86.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bra.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Bra.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/BraIA64.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Compiler.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/CpuArch.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/CpuArch.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Delta.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Delta.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/DllSecur.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/DllSecur.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFind.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFind.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFindMt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFindMt.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFindOpt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzHash.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2Dec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2Dec.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2DecMt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2DecMt.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2Enc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma2Enc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma86Dec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma86Enc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Lzma86.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaDec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaDec.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaEnc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaEnc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaLib.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzmaLib.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/MtCoder.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/MtCoder.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/MtDec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/MtDec.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Ppmd7.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Ppmd7Dec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Ppmd7Enc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Ppmd7.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Ppmd.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Precomp.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/RotateDefs.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Sha256.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Sha256.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Sha256Opt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Sort.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Sort.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/SwapBytes.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/SwapBytes.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Threads.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Threads.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Xz.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzCrc64.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzCrc64.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzCrc64Opt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzDec.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzEnc.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzEnc.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Xz.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/XzIn.c ) if(WIN32) set(CRNLIB_THREAD_SRCS - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFindMt.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFindMt.h - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Threads.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Threads.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_win32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_win32.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFindMt.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/LzFindMt.h + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Threads.c + ${CMAKE_CURRENT_SOURCE_DIR}/lzma/Threads.h ) else() set(CRNLIB_THREAD_SRCS diff --git a/crnlib/crn_lzma_codec.cpp b/crnlib/crn_lzma_codec.cpp index 0e270bd5..cbcd6414 100644 --- a/crnlib/crn_lzma_codec.cpp +++ b/crnlib/crn_lzma_codec.cpp @@ -4,7 +4,7 @@ #include "crn_lzma_codec.h" #include "crn_strutils.h" #include "crn_checksum.h" -#include "lzma_LzmaLib.h" +#include "lzma/LzmaLib.h" #include "crn_threading.h" namespace crnlib { diff --git a/crnlib/lzma/7z.h b/crnlib/lzma/7z.h new file mode 100644 index 00000000..b42405cc --- /dev/null +++ b/crnlib/lzma/7z.h @@ -0,0 +1,204 @@ +/* 7z.h -- 7z interface +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_H +#define ZIP7_INC_7Z_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define k7zStartHeaderSize 0x20 +#define k7zSignatureSize 6 + +extern const Byte k7zSignature[k7zSignatureSize]; + +typedef struct +{ + const Byte *Data; + size_t Size; +} CSzData; + +/* CSzCoderInfo & CSzFolder support only default methods */ + +typedef struct +{ + size_t PropsOffset; + UInt32 MethodID; + Byte NumStreams; + Byte PropsSize; +} CSzCoderInfo; + +typedef struct +{ + UInt32 InIndex; + UInt32 OutIndex; +} CSzBond; + +#define SZ_NUM_CODERS_IN_FOLDER_MAX 4 +#define SZ_NUM_BONDS_IN_FOLDER_MAX 3 +#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4 + +typedef struct +{ + UInt32 NumCoders; + UInt32 NumBonds; + UInt32 NumPackStreams; + UInt32 UnpackStream; + UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX]; + CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX]; + CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX]; +} CSzFolder; + + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd); + +typedef struct +{ + UInt32 Low; + UInt32 High; +} CNtfsFileTime; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + UInt32 *Vals; +} CSzBitUi32s; + +typedef struct +{ + Byte *Defs; /* MSB 0 bit numbering */ + // UInt64 *Vals; + CNtfsFileTime *Vals; +} CSzBitUi64s; + +#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0) + +typedef struct +{ + UInt32 NumPackStreams; + UInt32 NumFolders; + + UInt64 *PackPositions; // NumPackStreams + 1 + CSzBitUi32s FolderCRCs; // NumFolders + + size_t *FoCodersOffsets; // NumFolders + 1 + UInt32 *FoStartPackStreamIndex; // NumFolders + 1 + UInt32 *FoToCoderUnpackSizes; // NumFolders + 1 + Byte *FoToMainUnpackSizeIndex; // NumFolders + UInt64 *CoderUnpackSizes; // for all coders in all folders + + Byte *CodersData; + + UInt64 RangeLimit; +} CSzAr; + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr stream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain); + +typedef struct +{ + CSzAr db; + + UInt64 startPosAfterHeader; + UInt64 dataPos; + + UInt32 NumFiles; + + UInt64 *UnpackPositions; // NumFiles + 1 + // Byte *IsEmptyFiles; + Byte *IsDirs; + CSzBitUi32s CRCs; + + CSzBitUi32s Attribs; + // CSzBitUi32s Parents; + CSzBitUi64s MTime; + CSzBitUi64s CTime; + + UInt32 *FolderToFile; // NumFolders + 1 + UInt32 *FileToFolder; // NumFiles + + size_t *FileNameOffsets; /* in 2-byte steps */ + Byte *FileNames; /* UTF-16-LE */ +} CSzArEx; + +#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i)) + +#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i]) + +void SzArEx_Init(CSzArEx *p); +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc); +UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder); +int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize); + +/* +if dest == NULL, the return value specifies the required size of the buffer, + in 16-bit characters, including the null-terminating character. +if dest != NULL, the return value specifies the number of 16-bit characters that + are written to the dest, including the null-terminating character. */ + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest); + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex); +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest); +*/ + + + +/* + SzArEx_Extract extracts file from archive + + *outBuffer must be 0 before first call for each new archive. + + Extracting cache: + If you need to decompress more than one file, you can send + these values from previous call: + *blockIndex, + *outBuffer, + *outBufferSize + You can consider "*outBuffer" as cache of solid block. If your archive is solid, + it will increase decompression speed. + + If you use external function, you can declare these 3 cache variables + (blockIndex, outBuffer, outBufferSize) as static in that external function. + + Free *outBuffer and set *outBuffer to 0, if you want to flush cache. +*/ + +SRes SzArEx_Extract( + const CSzArEx *db, + ILookInStreamPtr inStream, + UInt32 fileIndex, /* index of file */ + UInt32 *blockIndex, /* index of solid block */ + Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ + size_t *outBufferSize, /* buffer size for output buffer */ + size_t *offset, /* offset of stream for required file in *outBuffer */ + size_t *outSizeProcessed, /* size of file in *outBuffer */ + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp); + + +/* +SzArEx_Open Errors: +SZ_ERROR_NO_ARCHIVE +SZ_ERROR_ARCHIVE +SZ_ERROR_UNSUPPORTED +SZ_ERROR_MEM +SZ_ERROR_CRC +SZ_ERROR_INPUT_EOF +SZ_ERROR_FAIL +*/ + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/7zAlloc.c b/crnlib/lzma/7zAlloc.c new file mode 100644 index 00000000..3a8e2cbf --- /dev/null +++ b/crnlib/lzma/7zAlloc.c @@ -0,0 +1,89 @@ +/* 7zAlloc.c -- Allocation functions for 7z processing +2023-03-04 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zAlloc.h" + +/* #define SZ_ALLOC_DEBUG */ +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ + +#ifdef SZ_ALLOC_DEBUG + +/* +#ifdef _WIN32 +#include "7zWindows.h" +#endif +*/ + +#include +static int g_allocCount = 0; +static int g_allocCountTemp = 0; + +static void Print_Alloc(const char *s, size_t size, int *counter) +{ + const unsigned size2 = (unsigned)size; + fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2); + (*counter)++; +} +static void Print_Free(const char *s, int *counter) +{ + (*counter)--; + fprintf(stderr, "\n%s count = %10d", s, *counter); +} +#endif + +void *SzAlloc(ISzAllocPtr p, size_t size) +{ + UNUSED_VAR(p) + if (size == 0) + return 0; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc", size, &g_allocCount); + #endif + return malloc(size); +} + +void SzFree(ISzAllocPtr p, void *address) +{ + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free ", &g_allocCount); + #endif + free(address); +} + +void *SzAllocTemp(ISzAllocPtr p, size_t size) +{ + UNUSED_VAR(p) + if (size == 0) + return 0; + #ifdef SZ_ALLOC_DEBUG + Print_Alloc("Alloc_temp", size, &g_allocCountTemp); + /* + #ifdef _WIN32 + return HeapAlloc(GetProcessHeap(), 0, size); + #endif + */ + #endif + return malloc(size); +} + +void SzFreeTemp(ISzAllocPtr p, void *address) +{ + UNUSED_VAR(p) + #ifdef SZ_ALLOC_DEBUG + if (address) + Print_Free("Free_temp ", &g_allocCountTemp); + /* + #ifdef _WIN32 + HeapFree(GetProcessHeap(), 0, address); + return; + #endif + */ + #endif + free(address); +} diff --git a/crnlib/lzma/7zAlloc.h b/crnlib/lzma/7zAlloc.h new file mode 100644 index 00000000..bedd125c --- /dev/null +++ b/crnlib/lzma/7zAlloc.h @@ -0,0 +1,19 @@ +/* 7zAlloc.h -- Allocation functions +2023-03-04 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_ALLOC_H +#define ZIP7_INC_7Z_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void *SzAlloc(ISzAllocPtr p, size_t size); +void SzFree(ISzAllocPtr p, void *address); + +void *SzAllocTemp(ISzAllocPtr p, size_t size); +void SzFreeTemp(ISzAllocPtr p, void *address); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/7zArcIn.c b/crnlib/lzma/7zArcIn.c new file mode 100644 index 00000000..84a1c08d --- /dev/null +++ b/crnlib/lzma/7zArcIn.c @@ -0,0 +1,1786 @@ +/* 7zArcIn.c -- 7z Input functions +2023-09-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7z.h" +#include "7zBuf.h" +#include "7zCrc.h" +#include "CpuArch.h" + +#define MY_ALLOC(T, p, size, alloc) \ + { if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; } + +#define MY_ALLOC_ZE(T, p, size, alloc) \ + { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) } + +#define MY_ALLOC_AND_CPY(to, size, from, alloc) \ + { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); } + +#define MY_ALLOC_ZE_AND_CPY(to, size, from, alloc) \ + { if ((size) == 0) to = NULL; else { MY_ALLOC_AND_CPY(to, size, from, alloc) } } + +#define k7zMajorVersion 0 + +enum EIdEnum +{ + k7zIdEnd, + k7zIdHeader, + k7zIdArchiveProperties, + k7zIdAdditionalStreamsInfo, + k7zIdMainStreamsInfo, + k7zIdFilesInfo, + k7zIdPackInfo, + k7zIdUnpackInfo, + k7zIdSubStreamsInfo, + k7zIdSize, + k7zIdCRC, + k7zIdFolder, + k7zIdCodersUnpackSize, + k7zIdNumUnpackStream, + k7zIdEmptyStream, + k7zIdEmptyFile, + k7zIdAnti, + k7zIdName, + k7zIdCTime, + k7zIdATime, + k7zIdMTime, + k7zIdWinAttrib, + k7zIdComment, + k7zIdEncodedHeader, + k7zIdStartPos, + k7zIdDummy + // k7zNtSecure, + // k7zParent, + // k7zIsReal +}; + +const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C}; + +#define SzBitUi32s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) +{ + if (num == 0) + { + p->Defs = NULL; + p->Vals = NULL; + } + else + { + MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc) + MY_ALLOC(UInt32, p->Vals, num, alloc) + } + return SZ_OK; +} + +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + +#define SzBitUi64s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; } + +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; + ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; +} + + +static void SzAr_Init(CSzAr *p) +{ + p->NumPackStreams = 0; + p->NumFolders = 0; + + p->PackPositions = NULL; + SzBitUi32s_INIT(&p->FolderCRCs) + + p->FoCodersOffsets = NULL; + p->FoStartPackStreamIndex = NULL; + p->FoToCoderUnpackSizes = NULL; + p->FoToMainUnpackSizeIndex = NULL; + p->CoderUnpackSizes = NULL; + + p->CodersData = NULL; + + p->RangeLimit = 0; +} + +static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->PackPositions); + SzBitUi32s_Free(&p->FolderCRCs, alloc); + + ISzAlloc_Free(alloc, p->FoCodersOffsets); + ISzAlloc_Free(alloc, p->FoStartPackStreamIndex); + ISzAlloc_Free(alloc, p->FoToCoderUnpackSizes); + ISzAlloc_Free(alloc, p->FoToMainUnpackSizeIndex); + ISzAlloc_Free(alloc, p->CoderUnpackSizes); + + ISzAlloc_Free(alloc, p->CodersData); + + SzAr_Init(p); +} + + +void SzArEx_Init(CSzArEx *p) +{ + SzAr_Init(&p->db); + + p->NumFiles = 0; + p->dataPos = 0; + + p->UnpackPositions = NULL; + p->IsDirs = NULL; + + p->FolderToFile = NULL; + p->FileToFolder = NULL; + + p->FileNameOffsets = NULL; + p->FileNames = NULL; + + SzBitUi32s_INIT(&p->CRCs) + SzBitUi32s_INIT(&p->Attribs) + // SzBitUi32s_INIT(&p->Parents) + SzBitUi64s_INIT(&p->MTime) + SzBitUi64s_INIT(&p->CTime) +} + +void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->UnpackPositions); + ISzAlloc_Free(alloc, p->IsDirs); + + ISzAlloc_Free(alloc, p->FolderToFile); + ISzAlloc_Free(alloc, p->FileToFolder); + + ISzAlloc_Free(alloc, p->FileNameOffsets); + ISzAlloc_Free(alloc, p->FileNames); + + SzBitUi32s_Free(&p->CRCs, alloc); + SzBitUi32s_Free(&p->Attribs, alloc); + // SzBitUi32s_Free(&p->Parents, alloc); + SzBitUi64s_Free(&p->MTime, alloc); + SzBitUi64s_Free(&p->CTime, alloc); + + SzAr_Free(&p->db, alloc); + SzArEx_Init(p); +} + + +static int TestSignatureCandidate(const Byte *testBytes) +{ + unsigned i; + for (i = 0; i < k7zSignatureSize; i++) + if (testBytes[i] != k7zSignature[i]) + return 0; + return 1; +} + +#define SzData_CLEAR(p) { (p)->Data = NULL; (p)->Size = 0; } + +#define SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) \ + (_sd_)->Size--; dest = *(_sd_)->Data++; + +#define SZ_READ_BYTE_SD(_sd_, dest) \ + if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; \ + SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) + +#define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest) + +#define SZ_READ_BYTE_2(dest) \ + if (sd.Size == 0) return SZ_ERROR_ARCHIVE; \ + sd.Size--; dest = *sd.Data++; + +#define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); } +#define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); } + +#define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \ + dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4); + +static Z7_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value) +{ + Byte firstByte, mask; + unsigned i; + UInt32 v; + + SZ_READ_BYTE(firstByte) + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + return SZ_OK; + } + SZ_READ_BYTE(v) + if ((firstByte & 0x40) == 0) + { + *value = (((UInt32)firstByte & 0x3F) << 8) | v; + return SZ_OK; + } + SZ_READ_BYTE(mask) + *value = v | ((UInt32)mask << 8); + mask = 0x20; + for (i = 2; i < 8; i++) + { + Byte b; + if ((firstByte & mask) == 0) + { + const UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1); + *value |= (highPart << (8 * i)); + return SZ_OK; + } + SZ_READ_BYTE(b) + *value |= ((UInt64)b << (8 * i)); + mask >>= 1; + } + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value) +{ + Byte firstByte; + UInt64 value64; + if (sd->Size == 0) + return SZ_ERROR_ARCHIVE; + firstByte = *sd->Data; + if ((firstByte & 0x80) == 0) + { + *value = firstByte; + sd->Data++; + sd->Size--; + return SZ_OK; + } + RINOK(ReadNumber(sd, &value64)) + if (value64 >= (UInt32)0x80000000 - 1) + return SZ_ERROR_UNSUPPORTED; + if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4))) + return SZ_ERROR_UNSUPPORTED; + *value = (UInt32)value64; + return SZ_OK; +} + +#define ReadID(sd, value) ReadNumber(sd, value) + +static SRes SkipData(CSzData *sd) +{ + UInt64 size; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, size) + return SZ_OK; +} + +static SRes WaitId(CSzData *sd, UInt32 id) +{ + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == id) + return SZ_OK; + if (type == k7zIdEnd) + return SZ_ERROR_ARCHIVE; + RINOK(SkipData(sd)) + } +} + +static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v) +{ + const UInt32 numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + *v = sd->Data; + SKIP_DATA(sd, numBytes) + return SZ_OK; +} + +static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems) +{ + unsigned b = 0; + unsigned m = 0; + UInt32 sum = 0; + for (; numItems != 0; numItems--) + { + if (m == 0) + { + b = *bits++; + m = 8; + } + m--; + sum += (UInt32)((b >> m) & 1); + } + return sum; +} + +static Z7_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc) +{ + Byte allAreDefined; + Byte *v2; + const UInt32 numBytes = (numItems + 7) >> 3; + *v = NULL; + SZ_READ_BYTE(allAreDefined) + if (numBytes == 0) + return SZ_OK; + if (allAreDefined == 0) + { + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc) + SKIP_DATA(sd, numBytes) + return SZ_OK; + } + MY_ALLOC(Byte, *v, numBytes, alloc) + v2 = *v; + memset(v2, 0xFF, (size_t)numBytes); + { + const unsigned numBits = (unsigned)numItems & 7; + if (numBits != 0) + v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits)); + } + return SZ_OK; +} + +static Z7_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + UInt32 i; + CSzData sd; + UInt32 *vals; + const Byte *defs; + MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc) + sd = *sd2; + defs = crcs->Defs; + vals = crcs->Vals; + for (i = 0; i < numItems; i++) + if (SzBitArray_Check(defs, i)) + { + SZ_READ_32(vals[i]) + } + else + vals[i] = 0; + *sd2 = sd; + return SZ_OK; +} + +static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc) +{ + SzBitUi32s_Free(crcs, alloc); + RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc)) + return ReadUi32s(sd, numItems, crcs, alloc); +} + +static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems) +{ + Byte allAreDefined; + UInt32 numDefined = numItems; + SZ_READ_BYTE(allAreDefined) + if (!allAreDefined) + { + const size_t numBytes = (numItems + 7) >> 3; + if (numBytes > sd->Size) + return SZ_ERROR_ARCHIVE; + numDefined = CountDefinedBits(sd->Data, numItems); + SKIP_DATA(sd, numBytes) + } + if (numDefined > (sd->Size >> 2)) + return SZ_ERROR_ARCHIVE; + SKIP_DATA(sd, (size_t)numDefined * 4) + return SZ_OK; +} + +static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc) +{ + RINOK(SzReadNumber32(sd, &p->NumPackStreams)) + + RINOK(WaitId(sd, k7zIdSize)) + MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc) + { + UInt64 sum = 0; + UInt32 i; + const UInt32 numPackStreams = p->NumPackStreams; + for (i = 0; i < numPackStreams; i++) + { + UInt64 packSize; + p->PackPositions[i] = sum; + RINOK(ReadNumber(sd, &packSize)) + sum += packSize; + if (sum < packSize) + return SZ_ERROR_ARCHIVE; + } + p->PackPositions[i] = sum; + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + /* CRC of packed streams is unused now */ + RINOK(SkipBitUi32s(sd, p->NumPackStreams)) + continue; + } + RINOK(SkipData(sd)) + } +} + +/* +static SRes SzReadSwitch(CSzData *sd) +{ + Byte external; + RINOK(SzReadByte(sd, &external)); + return (external == 0) ? SZ_OK: SZ_ERROR_UNSUPPORTED; +} +*/ + +#define k_NumCodersStreams_in_Folder_MAX (SZ_NUM_BONDS_IN_FOLDER_MAX + SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + +SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) +{ + UInt32 numCoders, i; + UInt32 numInStreams = 0; + const Byte *dataStart = sd->Data; + + f->NumCoders = 0; + f->NumBonds = 0; + f->NumPackStreams = 0; + f->UnpackStream = 0; + + RINOK(SzReadNumber32(sd, &numCoders)) + if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numCoders; i++) + { + Byte mainByte; + CSzCoderInfo *coder = f->Coders + i; + unsigned idSize, j; + UInt64 id; + + SZ_READ_BYTE(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + + idSize = (unsigned)(mainByte & 0xF); + if (idSize > sizeof(id)) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd->Size) + return SZ_ERROR_ARCHIVE; + id = 0; + for (j = 0; j < idSize; j++) + { + id = ((id << 8) | *sd->Data); + sd->Data++; + sd->Size--; + } + if (id > (UInt32)0xFFFFFFFF) + return SZ_ERROR_UNSUPPORTED; + coder->MethodID = (UInt32)id; + + coder->NumStreams = 1; + coder->PropsOffset = 0; + coder->PropsSize = 0; + + if ((mainByte & 0x10) != 0) + { + UInt32 numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + coder->NumStreams = (Byte)numStreams; + + RINOK(SzReadNumber32(sd, &numStreams)) + if (numStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coder->NumStreams; + + if (numInStreams > k_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize = 0; + RINOK(SzReadNumber32(sd, &propsSize)) + if (propsSize > sd->Size) + return SZ_ERROR_ARCHIVE; + if (propsSize >= 0x80) + return SZ_ERROR_UNSUPPORTED; + coder->PropsOffset = (size_t)(sd->Data - dataStart); + coder->PropsSize = (Byte)propsSize; + sd->Data += (size_t)propsSize; + sd->Size -= (size_t)propsSize; + } + } + + /* + if (numInStreams == 1 && numCoders == 1) + { + f->NumPackStreams = 1; + f->PackStreams[0] = 0; + } + else + */ + { + Byte streamUsed[k_NumCodersStreams_in_Folder_MAX]; + UInt32 numBonds, numPackStreams; + + numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + if (numBonds > SZ_NUM_BONDS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumBonds = numBonds; + + numPackStreams = numInStreams - numBonds; + if (numPackStreams > SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX) + return SZ_ERROR_UNSUPPORTED; + f->NumPackStreams = numPackStreams; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + + if (numBonds != 0) + { + Byte coderUsed[SZ_NUM_CODERS_IN_FOLDER_MAX]; + + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + CSzBond *bp = f->Bonds + i; + + RINOK(SzReadNumber32(sd, &bp->InIndex)) + if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex]) + return SZ_ERROR_ARCHIVE; + streamUsed[bp->InIndex] = True; + + RINOK(SzReadNumber32(sd, &bp->OutIndex)) + if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex]) + return SZ_ERROR_ARCHIVE; + coderUsed[bp->OutIndex] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + f->UnpackStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + if (numPackStreams == 1) + { + for (i = 0; i < numInStreams; i++) + if (!streamUsed[i]) + break; + if (i == numInStreams) + return SZ_ERROR_ARCHIVE; + f->PackStreams[0] = i; + } + else + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + f->PackStreams[i] = index; + } + } + + f->NumCoders = numCoders; + + return SZ_OK; +} + + +static Z7_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num) +{ + CSzData sd; + sd = *sd2; + for (; num != 0; num--) + { + Byte firstByte, mask; + unsigned i; + SZ_READ_BYTE_2(firstByte) + if ((firstByte & 0x80) == 0) + continue; + if ((firstByte & 0x40) == 0) + { + if (sd.Size == 0) + return SZ_ERROR_ARCHIVE; + sd.Size--; + sd.Data++; + continue; + } + mask = 0x20; + for (i = 2; i < 8 && (firstByte & mask) != 0; i++) + mask >>= 1; + if (i > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, i) + } + *sd2 = sd; + return SZ_OK; +} + + +#define k_Scan_NumCoders_MAX 64 +#define k_Scan_NumCodersStreams_in_Folder_MAX 64 + + +static SRes ReadUnpackInfo(CSzAr *p, + CSzData *sd2, + UInt32 numFoldersMax, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + + UInt32 fo, numFolders, numCodersOutStreams, packStreamIndex; + const Byte *startBufPtr; + Byte external; + + RINOK(WaitId(sd2, k7zIdFolder)) + + RINOK(SzReadNumber32(sd2, &numFolders)) + if (numFolders > numFoldersMax) + return SZ_ERROR_UNSUPPORTED; + p->NumFolders = numFolders; + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc) + MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc) + MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc) + + startBufPtr = sd.Data; + + packStreamIndex = 0; + numCodersOutStreams = 0; + + for (fo = 0; fo < numFolders; fo++) + { + UInt32 numCoders, ci, numInStreams = 0; + + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); + + RINOK(SzReadNumber32(&sd, &numCoders)) + if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (ci = 0; ci < numCoders; ci++) + { + Byte mainByte; + unsigned idSize; + UInt32 coderInStreams; + + SZ_READ_BYTE_2(mainByte) + if ((mainByte & 0xC0) != 0) + return SZ_ERROR_UNSUPPORTED; + idSize = (mainByte & 0xF); + if (idSize > 8) + return SZ_ERROR_UNSUPPORTED; + if (idSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, idSize) + + coderInStreams = 1; + + if ((mainByte & 0x10) != 0) + { + UInt32 coderOutStreams; + RINOK(SzReadNumber32(&sd, &coderInStreams)) + RINOK(SzReadNumber32(&sd, &coderOutStreams)) + if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1) + return SZ_ERROR_UNSUPPORTED; + } + + numInStreams += coderInStreams; + + if ((mainByte & 0x20) != 0) + { + UInt32 propsSize; + RINOK(SzReadNumber32(&sd, &propsSize)) + if (propsSize > sd.Size) + return SZ_ERROR_ARCHIVE; + SKIP_DATA2(sd, propsSize) + } + } + + { + UInt32 indexOfMainStream = 0; + UInt32 numPackStreams = 1; + + if (numCoders != 1 || numInStreams != 1) + { + Byte streamUsed[k_Scan_NumCodersStreams_in_Folder_MAX]; + Byte coderUsed[k_Scan_NumCoders_MAX]; + + UInt32 i; + const UInt32 numBonds = numCoders - 1; + if (numInStreams < numBonds) + return SZ_ERROR_ARCHIVE; + + if (numInStreams > k_Scan_NumCodersStreams_in_Folder_MAX) + return SZ_ERROR_UNSUPPORTED; + + for (i = 0; i < numInStreams; i++) + streamUsed[i] = False; + for (i = 0; i < numCoders; i++) + coderUsed[i] = False; + + for (i = 0; i < numBonds; i++) + { + UInt32 index; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numCoders || coderUsed[index]) + return SZ_ERROR_ARCHIVE; + coderUsed[index] = True; + } + + numPackStreams = numInStreams - numBonds; + + if (numPackStreams != 1) + for (i = 0; i < numPackStreams; i++) + { + UInt32 index; + RINOK(SzReadNumber32(&sd, &index)) + if (index >= numInStreams || streamUsed[index]) + return SZ_ERROR_ARCHIVE; + streamUsed[index] = True; + } + + for (i = 0; i < numCoders; i++) + if (!coderUsed[i]) + { + indexOfMainStream = i; + break; + } + + if (i == numCoders) + return SZ_ERROR_ARCHIVE; + } + + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + p->FoToMainUnpackSizeIndex[fo] = (Byte)indexOfMainStream; + numCodersOutStreams += numCoders; + if (numCodersOutStreams < numCoders) + return SZ_ERROR_UNSUPPORTED; + if (numPackStreams > p->NumPackStreams - packStreamIndex) + return SZ_ERROR_ARCHIVE; + packStreamIndex += numPackStreams; + } + } + + p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; + + { + const size_t dataSize = (size_t)(sd.Data - startBufPtr); + p->FoStartPackStreamIndex[fo] = packStreamIndex; + p->FoCodersOffsets[fo] = dataSize; + MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc) + } + + if (external != 0) + { + if (sd.Size != 0) + return SZ_ERROR_ARCHIVE; + sd = *sd2; + } + + RINOK(WaitId(&sd, k7zIdCodersUnpackSize)) + + MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc) + { + UInt32 i; + for (i = 0; i < numCodersOutStreams; i++) + { + RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i)) + } + } + + for (;;) + { + UInt64 type; + RINOK(ReadID(&sd, &type)) + if (type == k7zIdEnd) + { + *sd2 = sd; + return SZ_OK; + } + if (type == k7zIdCRC) + { + RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc)) + continue; + } + RINOK(SkipData(&sd)) + } +} + + +UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex) +{ + return p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex] + p->FoToMainUnpackSizeIndex[folderIndex]]; +} + + +typedef struct +{ + UInt32 NumTotalSubStreams; + UInt32 NumSubDigests; + CSzData sdNumSubStreams; + CSzData sdSizes; + CSzData sdCRCs; +} CSubStreamInfo; + + +static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) +{ + UInt64 type = 0; + UInt32 numSubDigests = 0; + const UInt32 numFolders = p->NumFolders; + UInt32 numUnpackStreams = numFolders; + UInt32 numUnpackSizesInData = 0; + + for (;;) + { + RINOK(ReadID(sd, &type)) + if (type == k7zIdNumUnpackStream) + { + UInt32 i; + ssi->sdNumSubStreams.Data = sd->Data; + numUnpackStreams = 0; + numSubDigests = 0; + for (i = 0; i < numFolders; i++) + { + UInt32 numStreams; + RINOK(SzReadNumber32(sd, &numStreams)) + if (numUnpackStreams > numUnpackStreams + numStreams) + return SZ_ERROR_UNSUPPORTED; + numUnpackStreams += numStreams; + if (numStreams != 0) + numUnpackSizesInData += (numStreams - 1); + if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) + numSubDigests += numStreams; + } + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); + continue; + } + if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + if (!ssi->sdNumSubStreams.Data) + { + numSubDigests = numFolders; + if (p->FolderCRCs.Defs) + numSubDigests = numFolders - CountDefinedBits(p->FolderCRCs.Defs, numFolders); + } + + ssi->NumTotalSubStreams = numUnpackStreams; + ssi->NumSubDigests = numSubDigests; + + if (type == k7zIdSize) + { + ssi->sdSizes.Data = sd->Data; + RINOK(SkipNumbers(sd, numUnpackSizesInData)) + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); + RINOK(ReadID(sd, &type)) + } + + for (;;) + { + if (type == k7zIdEnd) + return SZ_OK; + if (type == k7zIdCRC) + { + ssi->sdCRCs.Data = sd->Data; + RINOK(SkipBitUi32s(sd, numSubDigests)) + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); + } + else + { + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } +} + +static SRes SzReadStreamsInfo(CSzAr *p, + CSzData *sd, + UInt32 numFoldersMax, const CBuf *tempBufs, UInt32 numTempBufs, + UInt64 *dataOffset, + CSubStreamInfo *ssi, + ISzAllocPtr alloc) +{ + UInt64 type; + + SzData_CLEAR(&ssi->sdSizes) + SzData_CLEAR(&ssi->sdCRCs) + SzData_CLEAR(&ssi->sdNumSubStreams) + + *dataOffset = 0; + RINOK(ReadID(sd, &type)) + if (type == k7zIdPackInfo) + { + RINOK(ReadNumber(sd, dataOffset)) + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; + RINOK(ReadPackInfo(p, sd, alloc)) + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdUnpackInfo) + { + RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc)) + RINOK(ReadID(sd, &type)) + } + if (type == k7zIdSubStreamsInfo) + { + RINOK(ReadSubStreamsInfo(p, sd, ssi)) + RINOK(ReadID(sd, &type)) + } + else + { + ssi->NumTotalSubStreams = p->NumFolders; + // ssi->NumSubDigests = 0; + } + + return (type == k7zIdEnd ? SZ_OK : SZ_ERROR_UNSUPPORTED); +} + +static SRes SzReadAndDecodePackedStreams( + ILookInStreamPtr inStream, + CSzData *sd, + CBuf *tempBufs, + UInt32 numFoldersMax, + UInt64 baseOffset, + CSzAr *p, + ISzAllocPtr allocTemp) +{ + UInt64 dataStartPos; + UInt32 fo; + CSubStreamInfo ssi; + + RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp)) + + dataStartPos += baseOffset; + if (p->NumFolders == 0) + return SZ_ERROR_ARCHIVE; + + for (fo = 0; fo < p->NumFolders; fo++) + Buf_Init(tempBufs + fo); + + for (fo = 0; fo < p->NumFolders; fo++) + { + CBuf *tempBuf = tempBufs + fo; + const UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo); + if ((size_t)unpackSize != unpackSize) + return SZ_ERROR_MEM; + if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp)) + return SZ_ERROR_MEM; + } + + for (fo = 0; fo < p->NumFolders; fo++) + { + const CBuf *tempBuf = tempBufs + fo; + RINOK(LookInStream_SeekTo(inStream, dataStartPos)) + RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp)) + } + + return SZ_OK; +} + +static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size_t *offsets) +{ + size_t pos = 0; + *offsets++ = 0; + if (numFiles == 0) + return (size == 0) ? SZ_OK : SZ_ERROR_ARCHIVE; + if (size < 2) + return SZ_ERROR_ARCHIVE; + if (data[size - 2] != 0 || data[size - 1] != 0) + return SZ_ERROR_ARCHIVE; + do + { + const Byte *p; + if (pos == size) + return SZ_ERROR_ARCHIVE; + for (p = data + pos; + #ifdef _WIN32 + *(const UInt16 *)(const void *)p != 0 + #else + p[0] != 0 || p[1] != 0 + #endif + ; p += 2); + pos = (size_t)(p - data) + 2; + *offsets++ = (pos >> 1); + } + while (--numFiles); + return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; +} + +static Z7_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num, + CSzData *sd2, + const CBuf *tempBufs, UInt32 numTempBufs, + ISzAllocPtr alloc) +{ + CSzData sd; + UInt32 i; + CNtfsFileTime *vals; + Byte *defs; + Byte external; + + RINOK(ReadBitVector(sd2, num, &p->Defs, alloc)) + + SZ_READ_BYTE_SD(sd2, external) + if (external == 0) + sd = *sd2; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd2, &index)) + if (index >= numTempBufs) + return SZ_ERROR_ARCHIVE; + sd.Data = tempBufs[index].data; + sd.Size = tempBufs[index].size; + } + + MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc) + vals = p->Vals; + defs = p->Defs; + for (i = 0; i < num; i++) + if (SzBitArray_Check(defs, i)) + { + if (sd.Size < 8) + return SZ_ERROR_ARCHIVE; + vals[i].Low = GetUi32(sd.Data); + vals[i].High = GetUi32(sd.Data + 4); + SKIP_DATA2(sd, 8) + } + else + vals[i].High = vals[i].Low = 0; + + if (external == 0) + *sd2 = sd; + + return SZ_OK; +} + + +#define NUM_ADDITIONAL_STREAMS_MAX 8 + + +static SRes SzReadHeader2( + CSzArEx *p, /* allocMain */ + CSzData *sd, + ILookInStreamPtr inStream, + CBuf *tempBufs, UInt32 *numTempBufs, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp + ) +{ + CSubStreamInfo ssi; + +{ + UInt64 type; + + SzData_CLEAR(&ssi.sdSizes) + SzData_CLEAR(&ssi.sdCRCs) + SzData_CLEAR(&ssi.sdNumSubStreams) + + ssi.NumSubDigests = 0; + ssi.NumTotalSubStreams = 0; + + RINOK(ReadID(sd, &type)) + + if (type == k7zIdArchiveProperties) + { + for (;;) + { + UInt64 type2; + RINOK(ReadID(sd, &type2)) + if (type2 == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdAdditionalStreamsInfo) + { + CSzAr tempAr; + SRes res; + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, + p->startPosAfterHeader, &tempAr, allocTemp); + *numTempBufs = tempAr.NumFolders; + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + return res; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdMainStreamsInfo) + { + RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs, + &p->dataPos, &ssi, allocMain)) + p->dataPos += p->startPosAfterHeader; + RINOK(ReadID(sd, &type)) + } + + if (type == k7zIdEnd) + { + return SZ_OK; + } + + if (type != k7zIdFilesInfo) + return SZ_ERROR_ARCHIVE; +} + +{ + UInt32 numFiles = 0; + UInt32 numEmptyStreams = 0; + const Byte *emptyStreams = NULL; + const Byte *emptyFiles = NULL; + + RINOK(SzReadNumber32(sd, &numFiles)) + p->NumFiles = numFiles; + + for (;;) + { + UInt64 type; + UInt64 size; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(ReadNumber(sd, &size)) + if (size > sd->Size) + return SZ_ERROR_ARCHIVE; + + if (type >= ((UInt32)1 << 8)) + { + SKIP_DATA(sd, size) + } + else switch ((unsigned)type) + { + case k7zIdName: + { + size_t namesSize; + const Byte *namesData; + Byte external; + + SZ_READ_BYTE(external) + if (external == 0) + { + namesSize = (size_t)size - 1; + namesData = sd->Data; + } + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + namesData = (tempBufs)[index].data; + namesSize = (tempBufs)[index].size; + } + + if ((namesSize & 1) != 0) + return SZ_ERROR_ARCHIVE; + MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain) + MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain) + RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets)) + if (external == 0) + { + SKIP_DATA(sd, namesSize) + } + break; + } + case k7zIdEmptyStream: + { + RINOK(RememberBitVector(sd, numFiles, &emptyStreams)) + numEmptyStreams = CountDefinedBits(emptyStreams, numFiles); + emptyFiles = NULL; + break; + } + case k7zIdEmptyFile: + { + RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles)) + break; + } + case k7zIdWinAttrib: + { + Byte external; + CSzData sdSwitch; + CSzData *sdPtr; + SzBitUi32s_Free(&p->Attribs, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain)) + + SZ_READ_BYTE(external) + if (external == 0) + sdPtr = sd; + else + { + UInt32 index; + RINOK(SzReadNumber32(sd, &index)) + if (index >= *numTempBufs) + return SZ_ERROR_ARCHIVE; + sdSwitch.Data = (tempBufs)[index].data; + sdSwitch.Size = (tempBufs)[index].size; + sdPtr = &sdSwitch; + } + RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain)) + break; + } + /* + case k7zParent: + { + SzBitUi32s_Free(&p->Parents, allocMain); + RINOK(ReadBitVector(sd, numFiles, &p->Parents.Defs, allocMain)); + RINOK(SzReadSwitch(sd)); + RINOK(ReadUi32s(sd, numFiles, &p->Parents, allocMain)); + break; + } + */ + case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break; + default: + { + SKIP_DATA(sd, size) + } + } + } + + if (numFiles - numEmptyStreams != ssi.NumTotalSubStreams) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + UInt64 type; + RINOK(ReadID(sd, &type)) + if (type == k7zIdEnd) + break; + RINOK(SkipData(sd)) + } + + { + UInt32 i; + UInt32 emptyFileIndex = 0; + UInt32 folderIndex = 0; + UInt32 remSubStreams = 0; + UInt32 numSubStreams = 0; + UInt64 unpackPos = 0; + const Byte *digestsDefs = NULL; + const Byte *digestsVals = NULL; + UInt32 digestIndex = 0; + Byte isDirMask = 0; + Byte crcMask = 0; + Byte mask = 0x80; + + MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain) + MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain) + MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain) + MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain) + + RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain)) + + if (ssi.sdCRCs.Size != 0) + { + Byte allDigestsDefined = 0; + SZ_READ_BYTE_SD_NOCHECK(&ssi.sdCRCs, allDigestsDefined) + if (allDigestsDefined) + digestsVals = ssi.sdCRCs.Data; + else + { + const size_t numBytes = (ssi.NumSubDigests + 7) >> 3; + digestsDefs = ssi.sdCRCs.Data; + digestsVals = digestsDefs + numBytes; + } + } + + for (i = 0; i < numFiles; i++, mask >>= 1) + { + if (mask == 0) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + isDirMask = 0; + crcMask = 0; + mask = 0x80; + } + + p->UnpackPositions[i] = unpackPos; + p->CRCs.Vals[i] = 0; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + { + if (emptyFiles) + { + if (!SzBitArray_Check(emptyFiles, emptyFileIndex)) + isDirMask |= mask; + emptyFileIndex++; + } + else + isDirMask |= mask; + if (remSubStreams == 0) + { + p->FileToFolder[i] = (UInt32)-1; + continue; + } + } + + if (remSubStreams == 0) + { + for (;;) + { + if (folderIndex >= p->db.NumFolders) + return SZ_ERROR_ARCHIVE; + p->FolderToFile[folderIndex] = i; + numSubStreams = 1; + if (ssi.sdNumSubStreams.Data) + { + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + } + remSubStreams = numSubStreams; + if (numSubStreams != 0) + break; + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + folderIndex++; + } + } + + p->FileToFolder[i] = folderIndex; + + if (emptyStreams && SzBitArray_Check(emptyStreams, i)) + continue; + + if (--remSubStreams == 0) + { + const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + const UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]]; + if (folderUnpackSize < unpackPos - startFolderUnpackPos) + return SZ_ERROR_ARCHIVE; + unpackPos = startFolderUnpackPos + folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + + if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, folderIndex)) + { + p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex]; + crcMask |= mask; + } + folderIndex++; + } + else + { + UInt64 v; + RINOK(ReadNumber(&ssi.sdSizes, &v)) + unpackPos += v; + if (unpackPos < v) + return SZ_ERROR_ARCHIVE; + } + if ((crcMask & mask) == 0 && digestsVals) + { + if (!digestsDefs || SzBitArray_Check(digestsDefs, digestIndex)) + { + p->CRCs.Vals[i] = GetUi32(digestsVals); + digestsVals += 4; + crcMask |= mask; + } + digestIndex++; + } + } + + if (mask != 0x80) + { + const UInt32 byteIndex = (i - 1) >> 3; + p->IsDirs[byteIndex] = isDirMask; + p->CRCs.Defs[byteIndex] = crcMask; + } + + p->UnpackPositions[i] = unpackPos; + + if (remSubStreams != 0) + return SZ_ERROR_ARCHIVE; + + for (;;) + { + p->FolderToFile[folderIndex] = i; + if (folderIndex >= p->db.NumFolders) + break; + if (!ssi.sdNumSubStreams.Data) + return SZ_ERROR_ARCHIVE; + RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams)) + if (numSubStreams != 0) + return SZ_ERROR_ARCHIVE; + /* + { + UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + unpackPos += folderUnpackSize; + if (unpackPos < folderUnpackSize) + return SZ_ERROR_ARCHIVE; + } + */ + folderIndex++; + } + + if (ssi.sdNumSubStreams.Data && ssi.sdNumSubStreams.Size != 0) + return SZ_ERROR_ARCHIVE; + } +} + return SZ_OK; +} + + +static SRes SzReadHeader( + CSzArEx *p, + CSzData *sd, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + UInt32 i; + UInt32 numTempBufs = 0; + SRes res; + CBuf tempBufs[NUM_ADDITIONAL_STREAMS_MAX]; + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Init(tempBufs + i); + + res = SzReadHeader2(p, sd, inStream, + tempBufs, &numTempBufs, + allocMain, allocTemp); + + for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++) + Buf_Free(tempBufs + i, allocTemp); + + RINOK(res) + + if (sd->Size != 0) + return SZ_ERROR_FAIL; + + return res; +} + +static SRes SzArEx_Open2( + CSzArEx *p, + ILookInStreamPtr inStream, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + Byte header[k7zStartHeaderSize]; + Int64 startArcPos; + UInt64 nextHeaderOffset, nextHeaderSize; + size_t nextHeaderSizeT; + UInt32 nextHeaderCRC; + CBuf buf; + SRes res; + + startArcPos = 0; + RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR)) + + RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE)) + + if (!TestSignatureCandidate(header)) + return SZ_ERROR_NO_ARCHIVE; + if (header[6] != k7zMajorVersion) + return SZ_ERROR_UNSUPPORTED; + + nextHeaderOffset = GetUi64(header + 12); + nextHeaderSize = GetUi64(header + 20); + nextHeaderCRC = GetUi32(header + 28); + + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; + + if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) + return SZ_ERROR_CRC; + + p->db.RangeLimit = nextHeaderOffset; + + nextHeaderSizeT = (size_t)nextHeaderSize; + if (nextHeaderSizeT != nextHeaderSize) + return SZ_ERROR_MEM; + if (nextHeaderSizeT == 0) + return SZ_OK; + if (nextHeaderOffset > nextHeaderOffset + nextHeaderSize || + nextHeaderOffset > nextHeaderOffset + nextHeaderSize + k7zStartHeaderSize) + return SZ_ERROR_NO_ARCHIVE; + + { + Int64 pos = 0; + RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + return SZ_ERROR_INPUT_EOF; + } + + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)) + + if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) + return SZ_ERROR_MEM; + + res = LookInStream_Read(inStream, buf.data, nextHeaderSizeT); + + if (res == SZ_OK) + { + res = SZ_ERROR_ARCHIVE; + if (CrcCalc(buf.data, nextHeaderSizeT) == nextHeaderCRC) + { + CSzData sd; + UInt64 type; + sd.Data = buf.data; + sd.Size = buf.size; + + res = ReadID(&sd, &type); + + if (res == SZ_OK && type == k7zIdEncodedHeader) + { + CSzAr tempAr; + CBuf tempBuf; + Buf_Init(&tempBuf); + + SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); + SzAr_Free(&tempAr, allocTemp); + + if (res != SZ_OK) + { + Buf_Free(&tempBuf, allocTemp); + } + else + { + Buf_Free(&buf, allocTemp); + buf.data = tempBuf.data; + buf.size = tempBuf.size; + sd.Data = buf.data; + sd.Size = buf.size; + res = ReadID(&sd, &type); + } + } + + if (res == SZ_OK) + { + if (type == k7zIdHeader) + { + /* + CSzData sd2; + unsigned ttt; + for (ttt = 0; ttt < 40000; ttt++) + { + SzArEx_Free(p, allocMain); + sd2 = sd; + res = SzReadHeader(p, &sd2, inStream, allocMain, allocTemp); + if (res != SZ_OK) + break; + } + */ + res = SzReadHeader(p, &sd, inStream, allocMain, allocTemp); + } + else + res = SZ_ERROR_UNSUPPORTED; + } + } + } + + Buf_Free(&buf, allocTemp); + return res; +} + + +SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream, + ISzAllocPtr allocMain, ISzAllocPtr allocTemp) +{ + const SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp); + if (res != SZ_OK) + SzArEx_Free(p, allocMain); + return res; +} + + +SRes SzArEx_Extract( + const CSzArEx *p, + ILookInStreamPtr inStream, + UInt32 fileIndex, + UInt32 *blockIndex, + Byte **tempBuf, + size_t *outBufferSize, + size_t *offset, + size_t *outSizeProcessed, + ISzAllocPtr allocMain, + ISzAllocPtr allocTemp) +{ + const UInt32 folderIndex = p->FileToFolder[fileIndex]; + SRes res = SZ_OK; + + *offset = 0; + *outSizeProcessed = 0; + + if (folderIndex == (UInt32)-1) + { + ISzAlloc_Free(allocMain, *tempBuf); + *blockIndex = folderIndex; + *tempBuf = NULL; + *outBufferSize = 0; + return SZ_OK; + } + + if (*tempBuf == NULL || *blockIndex != folderIndex) + { + const UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex); + /* + UInt64 unpackSizeSpec = + p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] - + p->UnpackPositions[p->FolderToFile[folderIndex]]; + */ + const size_t unpackSize = (size_t)unpackSizeSpec; + + if (unpackSize != unpackSizeSpec) + return SZ_ERROR_MEM; + *blockIndex = folderIndex; + ISzAlloc_Free(allocMain, *tempBuf); + *tempBuf = NULL; + + if (res == SZ_OK) + { + *outBufferSize = unpackSize; + if (unpackSize != 0) + { + *tempBuf = (Byte *)ISzAlloc_Alloc(allocMain, unpackSize); + if (*tempBuf == NULL) + res = SZ_ERROR_MEM; + } + + if (res == SZ_OK) + { + res = SzAr_DecodeFolder(&p->db, folderIndex, + inStream, p->dataPos, *tempBuf, unpackSize, allocTemp); + } + } + } + + if (res == SZ_OK) + { + const UInt64 unpackPos = p->UnpackPositions[fileIndex]; + *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]); + *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos); + if (*offset + *outSizeProcessed > *outBufferSize) + return SZ_ERROR_FAIL; + if (SzBitWithVals_Check(&p->CRCs, fileIndex)) + if (CrcCalc(*tempBuf + *offset, *outSizeProcessed) != p->CRCs.Vals[fileIndex]) + res = SZ_ERROR_CRC; + } + + return res; +} + + +size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + const size_t offs = p->FileNameOffsets[fileIndex]; + const size_t len = p->FileNameOffsets[fileIndex + 1] - offs; + if (dest != 0) + { + size_t i; + const Byte *src = p->FileNames + offs * 2; + for (i = 0; i < len; i++) + dest[i] = GetUi16(src + i * 2); + } + return len; +} + +/* +size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex) +{ + size_t len; + if (!p->FileNameOffsets) + return 1; + len = 0; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + len += p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return len; + fileIndex = parent; + } +} + +UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest) +{ + BoolInt needSlash; + if (!p->FileNameOffsets) + { + *(--dest) = 0; + return dest; + } + needSlash = False; + for (;;) + { + UInt32 parent = (UInt32)(Int32)-1; + size_t curLen = p->FileNameOffsets[fileIndex + 1] - p->FileNameOffsets[fileIndex]; + SzArEx_GetFileNameUtf16(p, fileIndex, dest - curLen); + if (needSlash) + *(dest - 1) = '/'; + needSlash = True; + dest -= curLen; + + if SzBitWithVals_Check(&p->Parents, fileIndex) + parent = p->Parents.Vals[fileIndex]; + if (parent == (UInt32)(Int32)-1) + return dest; + fileIndex = parent; + } +} +*/ diff --git a/crnlib/lzma/7zBuf.c b/crnlib/lzma/7zBuf.c new file mode 100644 index 00000000..438bba68 --- /dev/null +++ b/crnlib/lzma/7zBuf.c @@ -0,0 +1,36 @@ +/* 7zBuf.c -- Byte Buffer +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zBuf.h" + +void Buf_Init(CBuf *p) +{ + p->data = 0; + p->size = 0; +} + +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc) +{ + p->size = 0; + if (size == 0) + { + p->data = 0; + return 1; + } + p->data = (Byte *)ISzAlloc_Alloc(alloc, size); + if (p->data) + { + p->size = size; + return 1; + } + return 0; +} + +void Buf_Free(CBuf *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->data); + p->data = 0; + p->size = 0; +} diff --git a/crnlib/lzma/7zBuf.h b/crnlib/lzma/7zBuf.h new file mode 100644 index 00000000..ca34c190 --- /dev/null +++ b/crnlib/lzma/7zBuf.h @@ -0,0 +1,35 @@ +/* 7zBuf.h -- Byte Buffer +2023-03-04 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_BUF_H +#define ZIP7_INC_7Z_BUF_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef struct +{ + Byte *data; + size_t size; +} CBuf; + +void Buf_Init(CBuf *p); +int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc); +void Buf_Free(CBuf *p, ISzAllocPtr alloc); + +typedef struct +{ + Byte *data; + size_t size; + size_t pos; +} CDynBuf; + +void DynBuf_Construct(CDynBuf *p); +void DynBuf_SeekToBeg(CDynBuf *p); +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc); +void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/7zBuf2.c b/crnlib/lzma/7zBuf2.c new file mode 100644 index 00000000..49b4343b --- /dev/null +++ b/crnlib/lzma/7zBuf2.c @@ -0,0 +1,52 @@ +/* 7zBuf2.c -- Byte Buffer +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zBuf.h" + +void DynBuf_Construct(CDynBuf *p) +{ + p->data = 0; + p->size = 0; + p->pos = 0; +} + +void DynBuf_SeekToBeg(CDynBuf *p) +{ + p->pos = 0; +} + +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc) +{ + if (size > p->size - p->pos) + { + size_t newSize = p->pos + size; + Byte *data; + newSize += newSize / 4; + data = (Byte *)ISzAlloc_Alloc(alloc, newSize); + if (!data) + return 0; + p->size = newSize; + if (p->pos != 0) + memcpy(data, p->data, p->pos); + ISzAlloc_Free(alloc, p->data); + p->data = data; + } + if (size != 0) + { + memcpy(p->data + p->pos, buf, size); + p->pos += size; + } + return 1; +} + +void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->data); + p->data = 0; + p->size = 0; + p->pos = 0; +} diff --git a/crnlib/lzma/7zCrc.c b/crnlib/lzma/7zCrc.c new file mode 100644 index 00000000..9be9a75a --- /dev/null +++ b/crnlib/lzma/7zCrc.c @@ -0,0 +1,420 @@ +/* 7zCrc.c -- CRC32 calculation and init +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zCrc.h" +#include "CpuArch.h" + +// for debug: +// #define __ARM_FEATURE_CRC32 1 + +#ifdef __ARM_FEATURE_CRC32 +// #pragma message("__ARM_FEATURE_CRC32") +#define Z7_CRC_HW_FORCE +#endif + +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#ifdef Z7_CRC_HW_FORCE + #define Z7_CRC_NUM_TABLES_USE 1 +#else +#ifdef Z7_CRC_NUM_TABLES + #define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else + #define Z7_CRC_NUM_TABLES_USE 12 +#endif +#endif + +#if Z7_CRC_NUM_TABLES_USE < 1 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + +#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1) + #define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE +#else + #define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1) +#endif + +#ifndef Z7_CRC_HW_FORCE + +#if Z7_CRC_NUM_TABLES_USE == 1 \ + || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1 +static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size) +{ + const UInt32 *table = g_CrcTable; + const Byte *p = (const Byte *)data; + const Byte *lim = p + size; + for (; p != lim; p++) + v = CRC_UPDATE_BYTE_2(v, *p); + return v; +} +#endif + + +#if Z7_CRC_NUM_TABLES_USE != 1 +#ifndef MY_CPU_BE + #define FUNC_NAME_LE_2(s) CrcUpdateT ## s + #define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s) + #define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#ifndef MY_CPU_LE + #define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s + #define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s) + #define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE) + UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table); +#endif +#endif + +#endif // Z7_CRC_HW_FORCE + +/* ---------- hardware CRC ---------- */ + +#ifdef MY_CPU_LE + +#if defined(MY_CPU_ARM_OR_ARM64) +// #pragma message("ARM*") + + #if (defined(__clang__) && (__clang_major__ >= 3)) \ + || defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #if !defined(__ARM_FEATURE_CRC32) +// #pragma message("!defined(__ARM_FEATURE_CRC32)") +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define __ARM_FEATURE_CRC32 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRC32_WAS_SET + #if defined(__clang__) + #if defined(MY_CPU_ARM64) + #define ATTRIB_CRC __attribute__((__target__("crc"))) + #else + #define ATTRIB_CRC __attribute__((__target__("armv8-a,crc"))) + #endif + #else + #if defined(MY_CPU_ARM64) +#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000) + #define ATTRIB_CRC __attribute__((__target__("+crc"))) +#endif + #else +#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8) +#if defined(__ARM_FP) && __GNUC__ >= 8 +// for -mfloat-abi=hard: similar to + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd"))) +#else + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) +#endif +#endif + #endif + #endif + #endif + #if defined(__ARM_FEATURE_CRC32) + // #pragma message("") +/* +arm_acle.h (GGC): + before Nov 17, 2017: +#ifdef __ARM_FEATURE_CRC32 + + Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked" +#if __ARM_ARCH >= 8 +#pragma GCC target ("arch=armv8-a+crc") + + Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1: +#ifdef __ARM_FEATURE_CRC32 +#ifdef __ARM_FP +#pragma GCC target ("arch=armv8-a+crc+simd") +#else +#pragma GCC target ("arch=armv8-a+crc") +#endif +*/ +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 +#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") +#undef __ARM_ARCH +#define __ARM_ARCH 8 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif + #define Z7_CRC_HW_USE + #include + #endif + #elif defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #ifdef __clang__ + // #define Z7_CRC_HW_USE + // #include + #else + #define Z7_CRC_HW_USE + #include + #endif + #endif + #endif + #endif + +#else // non-ARM* + +// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code +#ifdef Z7_CRC_HW_USE +#include "7zCrcEmu.h" +#endif + +#endif // non-ARM* + + + +#if defined(Z7_CRC_HW_USE) + +// #pragma message("USE ARM HW CRC") + +#ifdef MY_CPU_64BIT + #define CRC_HW_WORD_TYPE UInt64 + #define CRC_HW_WORD_FUNC __crc32d +#else + #define CRC_HW_WORD_TYPE UInt32 + #define CRC_HW_WORD_FUNC __crc32w +#endif + +#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4) + +#ifdef ATTRIB_CRC + ATTRIB_CRC +#endif +Z7_NO_INLINE +#ifdef Z7_CRC_HW_FORCE + UInt32 Z7_FASTCALL CrcUpdate +#else + static UInt32 Z7_FASTCALL CrcUpdate_HW +#endif + (UInt32 v, const void *data, size_t size) +{ + const Byte *p = (const Byte *)data; + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + if (size >= CRC_HW_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= CRC_HW_UNROLL_BYTES - 1; + lim -= size; + do + { + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p)); + v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE))); + p += 2 * sizeof(CRC_HW_WORD_TYPE); + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#undef __ARM_FEATURE_CRC32 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#undef Z7_ARM_FEATURE_CRC32_WAS_SET +#endif + +#endif // defined(Z7_CRC_HW_USE) +#endif // MY_CPU_LE + + + +#ifndef Z7_CRC_HW_FORCE + +#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) +/* +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC) + (UInt32 v, const void *data, size_t size, const UInt32 *table); +Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate; +*/ +static unsigned g_Crc_Algo; +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) +static unsigned g_Crc_Be; +#endif +#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + + + +Z7_NO_INLINE +#ifdef Z7_CRC_HW_USE + static UInt32 Z7_FASTCALL CrcUpdate_Base +#else + UInt32 Z7_FASTCALL CrcUpdate +#endif + (UInt32 crc, const void *data, size_t size) +{ +#if Z7_CRC_NUM_TABLES_USE == 1 + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#else // Z7_CRC_NUM_TABLES_USE != 1 +#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME + if (g_Crc_Algo == 1) + return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size); +#endif + +#ifdef MY_CPU_LE + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#elif defined(MY_CPU_BE) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); +#else + if (g_Crc_Be) + return FUNC_NAME_BE(crc, data, size, g_CrcTable); + else + return FUNC_NAME_LE(crc, data, size, g_CrcTable); +#endif +#endif // Z7_CRC_NUM_TABLES_USE != 1 +} + + +#ifdef Z7_CRC_HW_USE +Z7_NO_INLINE +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size) +{ + if (g_Crc_Algo == 0) + return CrcUpdate_HW(crc, data, size); + return CrcUpdate_Base(crc, data, size); +} +#endif + +#endif // !defined(Z7_CRC_HW_FORCE) + + + +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size) +{ + return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL; +} + + +MY_ALIGN(64) +UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL]; + + +void Z7_FASTCALL CrcGenerateTable(void) +{ + UInt32 i; + for (i = 0; i < 256; i++) + { +#if defined(Z7_CRC_HW_FORCE) + g_CrcTable[i] = __crc32b(i, 0); +#else + #define kCrcPoly 0xEDB88320 + UInt32 r = i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + g_CrcTable[i] = r; +#endif + } + for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++) + { + const UInt32 r = g_CrcTable[(size_t)i - 256]; + g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); + } + +#if !defined(Z7_CRC_HW_FORCE) && \ + (defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE)) + +#if Z7_CRC_NUM_TABLES_USE <= 1 + g_Crc_Algo = 1; +#else // Z7_CRC_NUM_TABLES_USE <= 1 + +#if defined(MY_CPU_LE) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#else // !defined(MY_CPU_LE) + { +#ifndef MY_CPU_BE + UInt32 k = 0x01020304; + const Byte *p = (const Byte *)&k; + if (p[0] == 4 && p[1] == 3) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; + else if (p[0] != 1 || p[1] != 2) + g_Crc_Algo = 1; + else +#endif // MY_CPU_BE + { + for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--) + { + const UInt32 x = g_CrcTable[(size_t)i - 256]; + g_CrcTable[i] = Z7_BSWAP32(x); + } +#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME) + g_Crc_Algo = Z7_CRC_NUM_TABLES_USE; +#endif +#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE)) + g_Crc_Be = 1; +#endif + } + } +#endif // !defined(MY_CPU_LE) + +#ifdef MY_CPU_LE +#ifdef Z7_CRC_HW_USE + if (CPU_IsSupported_CRC32()) + g_Crc_Algo = 0; +#endif // Z7_CRC_HW_USE +#endif // MY_CPU_LE + +#endif // Z7_CRC_NUM_TABLES_USE <= 1 +#endif // g_Crc_Algo was declared +} + +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo) +{ + if (algo == 0) + return &CrcUpdate; + +#if defined(Z7_CRC_HW_USE) + if (algo == sizeof(CRC_HW_WORD_TYPE) * 8) + { +#ifdef Z7_CRC_HW_FORCE + return &CrcUpdate; +#else + if (g_Crc_Algo == 0) + return &CrcUpdate_HW; +#endif + } +#endif + +#ifndef Z7_CRC_HW_FORCE + if (algo == Z7_CRC_NUM_TABLES_USE) + return + #ifdef Z7_CRC_HW_USE + &CrcUpdate_Base; + #else + &CrcUpdate; + #endif +#endif + + return NULL; +} + +#undef kCrcPoly +#undef Z7_CRC_NUM_TABLES_USE +#undef Z7_CRC_NUM_TABLES_TOTAL +#undef CRC_UPDATE_BYTE_2 +#undef FUNC_NAME_LE_2 +#undef FUNC_NAME_LE_1 +#undef FUNC_NAME_LE +#undef FUNC_NAME_BE_2 +#undef FUNC_NAME_BE_1 +#undef FUNC_NAME_BE + +#undef CRC_HW_UNROLL_BYTES +#undef CRC_HW_WORD_FUNC +#undef CRC_HW_WORD_TYPE diff --git a/crnlib/lzma/7zCrc.h b/crnlib/lzma/7zCrc.h new file mode 100644 index 00000000..ed042358 --- /dev/null +++ b/crnlib/lzma/7zCrc.h @@ -0,0 +1,28 @@ +/* 7zCrc.h -- CRC32 calculation +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_CRC_H +#define ZIP7_INC_7Z_CRC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +extern UInt32 g_CrcTable[]; + +/* Call CrcGenerateTable one time before other CRC functions */ +void Z7_FASTCALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size); +UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size); + +typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size); +Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/7zCrcOpt.c b/crnlib/lzma/7zCrcOpt.c new file mode 100644 index 00000000..bbb61f54 --- /dev/null +++ b/crnlib/lzma/7zCrcOpt.c @@ -0,0 +1,199 @@ +/* 7zCrcOpt.c -- CRC32 calculation (optimized functions) +2023-12-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1 + +// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC_DEBUG_BE +#ifdef Z7_CRC_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c +#ifdef Z7_CRC_NUM_TABLES +#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES +#else +#define Z7_CRC_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC_NUM_TABLES_USE % 4 || \ + Z7_CRC_NUM_TABLES_USE < 4 * 1 || \ + Z7_CRC_NUM_TABLES_USE > 4 * 6 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + + +#ifndef MY_CPU_BE + +#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +#define Q(n, d) \ + ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) + +#define R(a) *((const UInt32 *)(const void *)p + (a)) + +#define CRC_FUNC_PRE_LE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_LE(step) \ + CRC_FUNC_PRE_LE2(step); \ + CRC_FUNC_PRE_LE2(step) + +CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC_UPDATE_BYTE_2(v, *p); + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) + { + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC_UPDATE_BYTE_2(v, *p); + return v; +} + +#undef CRC_UPDATE_BYTE_2 +#undef R +#undef Q +#undef CRC_FUNC_PRE_LE +#undef CRC_FUNC_PRE_LE2 + +#endif + + + + +#ifndef MY_CPU_LE + +#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8)) + +#define Q(n, d) \ + ( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) + +#ifdef Z7_CRC_DEBUG_BE + #define R(a) GetBe32a((const UInt32 *)(const void *)p + (a)) +#else + #define R(a) *((const UInt32 *)(const void *)p + (a)) +#endif + + +#define CRC_FUNC_PRE_BE2(step) \ +UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table) + +#define CRC_FUNC_PRE_BE(step) \ + CRC_FUNC_PRE_BE2(step); \ + CRC_FUNC_PRE_BE2(step) + +CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + table += 0x100; + v = Z7_BSWAP32(v); + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC_UPDATE_BYTE_2_BE(v, *p); + lim = p + size; + if (size >= Z7_CRC_NUM_TABLES_USE) + { + lim -= Z7_CRC_NUM_TABLES_USE; + do + { + v ^= R(0); + { +#if Z7_CRC_NUM_TABLES_USE == 1 * 4 + v = Q(0, v); +#else +#define U2(r, op) \ + { d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); } + UInt32 d, x; + U2(1, =) +#if Z7_CRC_NUM_TABLES_USE >= 3 * 4 +#define U(r) U2(r, ^=) + U(2) +#if Z7_CRC_NUM_TABLES_USE >= 4 * 4 + U(3) +#if Z7_CRC_NUM_TABLES_USE >= 5 * 4 + U(4) +#if Z7_CRC_NUM_TABLES_USE >= 6 * 4 + U(5) +#if Z7_CRC_NUM_TABLES_USE >= 7 * 4 +#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif +#endif +#endif +#endif +#endif +#undef U +#undef U2 + v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v); +#endif + } + p += Z7_CRC_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC_UPDATE_BYTE_2_BE(v, *p); + return Z7_BSWAP32(v); +} + +#undef CRC_UPDATE_BYTE_2_BE +#undef R +#undef Q +#undef CRC_FUNC_PRE_BE +#undef CRC_FUNC_PRE_BE2 + +#endif +#undef Z7_CRC_NUM_TABLES_USE +#endif diff --git a/crnlib/lzma/7zDec.c b/crnlib/lzma/7zDec.c new file mode 100644 index 00000000..01afc330 --- /dev/null +++ b/crnlib/lzma/7zDec.c @@ -0,0 +1,673 @@ +/* 7zDec.c -- Decoding from 7z folder +: Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define Z7_PPMD_SUPPORT */ + +#include "7z.h" +#include "7zCrc.h" + +#include "Bcj2.h" +#include "Bra.h" +#include "CpuArch.h" +#include "Delta.h" +#include "LzmaDec.h" +#include "Lzma2Dec.h" +#ifdef Z7_PPMD_SUPPORT +#include "Ppmd7.h" +#endif + +#define k_Copy 0 +#ifndef Z7_NO_METHOD_LZMA2 +#define k_LZMA2 0x21 +#endif +#define k_LZMA 0x30101 +#define k_BCJ2 0x303011B + +#if !defined(Z7_NO_METHODS_FILTERS) +#define Z7_USE_BRANCH_FILTER +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64) +#define Z7_USE_FILTER_ARM64 +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARM64 0xa +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || \ + defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT) +#define Z7_USE_FILTER_ARMT +#ifndef Z7_USE_BRANCH_FILTER +#define Z7_USE_BRANCH_FILTER +#endif +#define k_ARMT 0x3030701 +#endif + +#ifndef Z7_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_RISCV 0xb +#define k_BCJ 0x3030103 +#define k_PPC 0x3030205 +#define k_IA64 0x3030401 +#define k_ARM 0x3030501 +#define k_SPARC 0x3030805 +#endif + +#ifdef Z7_PPMD_SUPPORT + +#define k_PPMD 0x30401 + +typedef struct +{ + IByteIn vt; + const Byte *cur; + const Byte *end; + const Byte *begin; + UInt64 processed; + BoolInt extra; + SRes res; + ILookInStreamPtr inStream; +} CByteInToLook; + +static Byte ReadByte(IByteInPtr pp) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook) + if (p->cur != p->end) + return *p->cur++; + if (p->res == SZ_OK) + { + size_t size = (size_t)(p->cur - p->begin); + p->processed += size; + p->res = ILookInStream_Skip(p->inStream, size); + size = (1 << 25); + p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size); + p->cur = p->begin; + p->end = p->begin + size; + if (size != 0) + return *p->cur++; + } + p->extra = True; + return 0; +} + +static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CPpmd7 ppmd; + CByteInToLook s; + SRes res = SZ_OK; + + s.vt.Read = ReadByte; + s.inStream = inStream; + s.begin = s.end = s.cur = NULL; + s.extra = False; + s.res = SZ_OK; + s.processed = 0; + + if (propsSize != 5) + return SZ_ERROR_UNSUPPORTED; + + { + unsigned order = props[0]; + UInt32 memSize = GetUi32(props + 1); + if (order < PPMD7_MIN_ORDER || + order > PPMD7_MAX_ORDER || + memSize < PPMD7_MIN_MEM_SIZE || + memSize > PPMD7_MAX_MEM_SIZE) + return SZ_ERROR_UNSUPPORTED; + Ppmd7_Construct(&ppmd); + if (!Ppmd7_Alloc(&ppmd, memSize, allocMain)) + return SZ_ERROR_MEM; + Ppmd7_Init(&ppmd, order); + } + { + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) + res = SZ_ERROR_DATA; + else if (!s.extra) + { + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) + { + int sym = Ppmd7z_DecodeSymbol(&ppmd); + if (s.extra || sym < 0) + break; + *buf = (Byte)sym; + } + if (buf != lim) + res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ + res = SZ_ERROR_DATA; + } + } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; + } + Ppmd7_Free(&ppmd, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzmaDec state; + SRes res = SZ_OK; + + LzmaDec_CONSTRUCT(&state) + RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)) + state.dic = outBuffer; + state.dicBufSize = outSize; + LzmaDec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos; + ELzmaStatus status; + res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) + break; + + if (inProcessed == 0 && dicPos == state.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + LzmaDec_FreeProbs(&state, allocMain); + return res; +} + + +#ifndef Z7_NO_METHOD_LZMA2 + +static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) +{ + CLzma2Dec state; + SRes res = SZ_OK; + + Lzma2Dec_CONSTRUCT(&state) + if (propsSize != 1) + return SZ_ERROR_DATA; + RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)) + state.decoder.dic = outBuffer; + state.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&state); + + for (;;) + { + const void *inBuf = NULL; + size_t lookahead = (1 << 18); + if (lookahead > inSize) + lookahead = (size_t)inSize; + res = ILookInStream_Look(inStream, &inBuf, &lookahead); + if (res != SZ_OK) + break; + + { + SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos; + ELzmaStatus status; + res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status); + lookahead -= inProcessed; + inSize -= inProcessed; + if (res != SZ_OK) + break; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (outSize != state.decoder.dicPos || inSize != 0) + res = SZ_ERROR_DATA; + break; + } + + if (inProcessed == 0 && dicPos == state.decoder.dicPos) + { + res = SZ_ERROR_DATA; + break; + } + + res = ILookInStream_Skip(inStream, inProcessed); + if (res != SZ_OK) + break; + } + } + + Lzma2Dec_FreeProbs(&state, allocMain); + return res; +} + +#endif + + +static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer) +{ + while (inSize > 0) + { + const void *inBuf; + size_t curSize = (1 << 18); + if (curSize > inSize) + curSize = (size_t)inSize; + RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)) + if (curSize == 0) + return SZ_ERROR_INPUT_EOF; + memcpy(outBuffer, inBuf, curSize); + outBuffer += curSize; + inSize -= curSize; + RINOK(ILookInStream_Skip(inStream, curSize)) + } + return SZ_OK; +} + +static BoolInt IS_MAIN_METHOD(UInt32 m) +{ + switch (m) + { + case k_Copy: + case k_LZMA: + #ifndef Z7_NO_METHOD_LZMA2 + case k_LZMA2: + #endif + #ifdef Z7_PPMD_SUPPORT + case k_PPMD: + #endif + return True; + default: + return False; + } +} + +static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c) +{ + return + c->NumStreams == 1 + /* && c->MethodID <= (UInt32)0xFFFFFFFF */ + && IS_MAIN_METHOD((UInt32)c->MethodID); +} + +#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4) + +static SRes CheckSupportedFolder(const CSzFolder *f) +{ + if (f->NumCoders < 1 || f->NumCoders > 4) + return SZ_ERROR_UNSUPPORTED; + if (!IS_SUPPORTED_CODER(&f->Coders[0])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumCoders == 1) + { + if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + + #if defined(Z7_USE_BRANCH_FILTER) + + if (f->NumCoders == 2) + { + const CSzCoderInfo *c = &f->Coders[1]; + if ( + /* c->MethodID > (UInt32)0xFFFFFFFF || */ + c->NumStreams != 1 + || f->NumPackStreams != 1 + || f->PackStreams[0] != 0 + || f->NumBonds != 1 + || f->Bonds[0].InIndex != 1 + || f->Bonds[0].OutIndex != 0) + return SZ_ERROR_UNSUPPORTED; + switch ((UInt32)c->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_Delta: + case k_BCJ: + case k_PPC: + case k_IA64: + case k_SPARC: + case k_ARM: + case k_RISCV: + #endif + #ifdef Z7_USE_FILTER_ARM64 + case k_ARM64: + #endif + #ifdef Z7_USE_FILTER_ARMT + case k_ARMT: + #endif + break; + default: + return SZ_ERROR_UNSUPPORTED; + } + return SZ_OK; + } + + #endif + + + if (f->NumCoders == 4) + { + if (!IS_SUPPORTED_CODER(&f->Coders[1]) + || !IS_SUPPORTED_CODER(&f->Coders[2]) + || !IS_BCJ2(&f->Coders[3])) + return SZ_ERROR_UNSUPPORTED; + if (f->NumPackStreams != 4 + || f->PackStreams[0] != 2 + || f->PackStreams[1] != 6 + || f->PackStreams[2] != 1 + || f->PackStreams[3] != 0 + || f->NumBonds != 3 + || f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0 + || f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1 + || f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2) + return SZ_ERROR_UNSUPPORTED; + return SZ_OK; + } + + return SZ_ERROR_UNSUPPORTED; +} + + + + + + +static SRes SzFolder_Decode2(const CSzFolder *folder, + const Byte *propsData, + const UInt64 *unpackSizes, + const UInt64 *packPositions, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain, + Byte *tempBuf[]) +{ + UInt32 ci; + SizeT tempSizes[3] = { 0, 0, 0}; + SizeT tempSize3 = 0; + Byte *tempBuf3 = 0; + + RINOK(CheckSupportedFolder(folder)) + + for (ci = 0; ci < folder->NumCoders; ci++) + { + const CSzCoderInfo *coder = &folder->Coders[ci]; + + if (IS_MAIN_METHOD((UInt32)coder->MethodID)) + { + UInt32 si = 0; + UInt64 offset; + UInt64 inSize; + Byte *outBufCur = outBuffer; + SizeT outSizeCur = outSize; + if (folder->NumCoders == 4) + { + const UInt32 indices[] = { 3, 2, 0 }; + const UInt64 unpackSize = unpackSizes[ci]; + si = indices[ci]; + if (ci < 2) + { + Byte *temp; + outSizeCur = (SizeT)unpackSize; + if (outSizeCur != unpackSize) + return SZ_ERROR_MEM; + temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur); + if (!temp && outSizeCur != 0) + return SZ_ERROR_MEM; + outBufCur = tempBuf[1 - ci] = temp; + tempSizes[1 - ci] = outSizeCur; + } + else if (ci == 2) + { + if (unpackSize > outSize) /* check it */ + return SZ_ERROR_PARAM; + tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize); + tempSize3 = outSizeCur = (SizeT)unpackSize; + } + else + return SZ_ERROR_UNSUPPORTED; + } + offset = packPositions[si]; + inSize = packPositions[(size_t)si + 1] - offset; + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + + if (coder->MethodID == k_Copy) + { + if (inSize != outSizeCur) /* check it */ + return SZ_ERROR_DATA; + RINOK(SzDecodeCopy(inSize, inStream, outBufCur)) + } + else if (coder->MethodID == k_LZMA) + { + RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #ifndef Z7_NO_METHOD_LZMA2 + else if (coder->MethodID == k_LZMA2) + { + RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + #ifdef Z7_PPMD_SUPPORT + else if (coder->MethodID == k_PPMD) + { + RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)) + } + #endif + else + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->MethodID == k_BCJ2) + { + const UInt64 offset = packPositions[1]; + const UInt64 s3Size = packPositions[2] - offset; + + if (ci != 3) + return SZ_ERROR_UNSUPPORTED; + + tempSizes[2] = (SizeT)s3Size; + if (tempSizes[2] != s3Size) + return SZ_ERROR_MEM; + tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]); + if (!tempBuf[2] && tempSizes[2] != 0) + return SZ_ERROR_MEM; + + RINOK(LookInStream_SeekTo(inStream, startPos + offset)) + RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])) + + if ((tempSizes[0] & 3) != 0 || + (tempSizes[1] & 3) != 0 || + tempSize3 + tempSizes[0] + tempSizes[1] != outSize) + return SZ_ERROR_DATA; + + { + CBcj2Dec p; + + p.bufs[0] = tempBuf3; p.lims[0] = tempBuf3 + tempSize3; + p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0]; + p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1]; + p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2]; + + p.dest = outBuffer; + p.destLim = outBuffer + outSize; + + Bcj2Dec_Init(&p); + RINOK(Bcj2Dec_Decode(&p)) + + { + unsigned i; + for (i = 0; i < 4; i++) + if (p.bufs[i] != p.lims[i]) + return SZ_ERROR_DATA; + if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p)) + return SZ_ERROR_DATA; + } + } + } +#if defined(Z7_USE_BRANCH_FILTER) + else if (ci == 1) + { +#if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_Delta) + { + if (coder->PropsSize != 1) + return SZ_ERROR_UNSUPPORTED; + { + Byte state[DELTA_STATE_SIZE]; + Delta_Init(state); + Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize); + } + continue; + } +#endif + +#ifdef Z7_USE_FILTER_ARM64 + if (coder->MethodID == k_ARM64) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + { + pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 3) + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc); + continue; + } +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) + if (coder->MethodID == k_RISCV) + { + UInt32 pc = 0; + if (coder->PropsSize == 4) + { + pc = GetUi32(propsData + coder->PropsOffset); + if (pc & 1) + return SZ_ERROR_UNSUPPORTED; + } + else if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc); + continue; + } +#endif + +#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + { + if (coder->PropsSize != 0) + return SZ_ERROR_UNSUPPORTED; + #define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0; + switch (coder->MethodID) + { + #if !defined(Z7_NO_METHODS_FILTERS) + case k_BCJ: + { + UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0 + break; + } + case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0; + // CASE_BRA_CONV(PPC) + CASE_BRA_CONV(IA64) + CASE_BRA_CONV(SPARC) + CASE_BRA_CONV(ARM) + #endif + #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT) + CASE_BRA_CONV(ARMT) + #endif + default: + return SZ_ERROR_UNSUPPORTED; + } + continue; + } +#endif + } // (c == 1) +#endif // Z7_USE_BRANCH_FILTER + else + return SZ_ERROR_UNSUPPORTED; + } + + return SZ_OK; +} + + +SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, + ILookInStreamPtr inStream, UInt64 startPos, + Byte *outBuffer, size_t outSize, + ISzAllocPtr allocMain) +{ + SRes res; + CSzFolder folder; + CSzData sd; + + const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex]; + sd.Data = data; + sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex]; + + res = SzGetNextFolderItem(&folder, &sd); + + if (res != SZ_OK) + return res; + + if (sd.Size != 0 + || folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex] + || outSize != SzAr_GetFolderUnpackSize(p, folderIndex)) + return SZ_ERROR_FAIL; + { + unsigned i; + Byte *tempBuf[3] = { 0, 0, 0}; + + res = SzFolder_Decode2(&folder, data, + &p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]], + p->PackPositions + p->FoStartPackStreamIndex[folderIndex], + inStream, startPos, + outBuffer, (SizeT)outSize, allocMain, tempBuf); + + for (i = 0; i < 3; i++) + ISzAlloc_Free(allocMain, tempBuf[i]); + + if (res == SZ_OK) + if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex)) + if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex]) + res = SZ_ERROR_CRC; + + return res; + } +} diff --git a/crnlib/lzma/7zFile.c b/crnlib/lzma/7zFile.c new file mode 100644 index 00000000..96d4edd2 --- /dev/null +++ b/crnlib/lzma/7zFile.c @@ -0,0 +1,443 @@ +/* 7zFile.c -- File IO +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zFile.h" + +#ifndef USE_WINDOWS_FILE + + #include + + #ifndef USE_FOPEN + #include + #include + #ifdef _WIN32 + #include + typedef int ssize_t; + typedef int off_t; + #else + #include + #endif + #endif + +#else + +/* + ReadFile and WriteFile functions in Windows have BUG: + If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) + from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES + (Insufficient system resources exist to complete the requested service). + Probably in some version of Windows there are problems with other sizes: + for 32 MB (maybe also for 16 MB). + And message can be "Network connection was lost" +*/ + +#endif + +#define kChunkSizeMax (1 << 22) + +void File_Construct(CSzFile *p) +{ + #ifdef USE_WINDOWS_FILE + p->handle = INVALID_HANDLE_VALUE; + #elif defined(USE_FOPEN) + p->file = NULL; + #else + p->fd = -1; + #endif +} + +#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) + +static WRes File_Open(CSzFile *p, const char *name, int writeMode) +{ + #ifdef USE_WINDOWS_FILE + + p->handle = CreateFileA(name, + writeMode ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, NULL, + writeMode ? CREATE_ALWAYS : OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); + + #elif defined(USE_FOPEN) + + p->file = fopen(name, writeMode ? "wb+" : "rb"); + return (p->file != 0) ? 0 : + #ifdef UNDER_CE + 2; /* ENOENT */ + #else + errno; + #endif + + #else + + int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY); + #ifdef O_BINARY + flags |= O_BINARY; + #endif + p->fd = open(name, flags, 0666); + return (p->fd != -1) ? 0 : errno; + + #endif +} + +WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); } + +WRes OutFile_Open(CSzFile *p, const char *name) +{ + #if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN) + return File_Open(p, name, 1); + #else + p->fd = creat(name, 0666); + return (p->fd != -1) ? 0 : errno; + #endif +} + +#endif + + +#ifdef USE_WINDOWS_FILE +static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode) +{ + p->handle = CreateFileW(name, + writeMode ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, NULL, + writeMode ? CREATE_ALWAYS : OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); +} +WRes InFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 0); } +WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1); } +#endif + +WRes File_Close(CSzFile *p) +{ + #ifdef USE_WINDOWS_FILE + + if (p->handle != INVALID_HANDLE_VALUE) + { + if (!CloseHandle(p->handle)) + return GetLastError(); + p->handle = INVALID_HANDLE_VALUE; + } + + #elif defined(USE_FOPEN) + + if (p->file != NULL) + { + int res = fclose(p->file); + if (res != 0) + { + if (res == EOF) + return errno; + return res; + } + p->file = NULL; + } + + #else + + if (p->fd != -1) + { + if (close(p->fd) != 0) + return errno; + p->fd = -1; + } + + #endif + + return 0; +} + + +WRes File_Read(CSzFile *p, void *data, size_t *size) +{ + size_t originalSize = *size; + *size = 0; + if (originalSize == 0) + return 0; + + #ifdef USE_WINDOWS_FILE + + do + { + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); + data = (void *)((Byte *)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + // debug : we can break here for partial reading mode + if (processed == 0) + break; + } + while (originalSize > 0); + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fread(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + // debug : we can break here for partial reading mode + if (processed == 0) + break; + } + while (originalSize > 0); + + #else + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = read(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + // debug : we can break here for partial reading mode + // break; + } + while (originalSize > 0); + + #endif + + return 0; +} + + +WRes File_Write(CSzFile *p, const void *data, size_t *size) +{ + size_t originalSize = *size; + *size = 0; + if (originalSize == 0) + return 0; + + #ifdef USE_WINDOWS_FILE + + do + { + const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); + data = (const void *)((const Byte *)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + if (processed == 0) + break; + } + while (originalSize > 0); + + #elif defined(USE_FOPEN) + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const size_t processed = fwrite(data, 1, curSize, p->file); + data = (void *)((Byte *)data + (size_t)processed); + originalSize -= processed; + *size += processed; + if (processed != curSize) + return ferror(p->file); + if (processed == 0) + break; + } + while (originalSize > 0); + + #else + + do + { + const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize; + const ssize_t processed = write(p->fd, data, curSize); + if (processed == -1) + return errno; + if (processed == 0) + break; + data = (const void *)((const Byte *)data + (size_t)processed); + originalSize -= (size_t)processed; + *size += (size_t)processed; + } + while (originalSize > 0); + + #endif + + return 0; +} + + +WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) +{ + #ifdef USE_WINDOWS_FILE + + DWORD moveMethod; + UInt32 low = (UInt32)*pos; + LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ + // (int) to eliminate clang warning + switch ((int)origin) + { + case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; + case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; + case SZ_SEEK_END: moveMethod = FILE_END; break; + default: return ERROR_INVALID_PARAMETER; + } + low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod); + if (low == (UInt32)0xFFFFFFFF) + { + WRes res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *pos = ((Int64)high << 32) | low; + return 0; + + #else + + int moveMethod; // = origin; + + switch ((int)origin) + { + case SZ_SEEK_SET: moveMethod = SEEK_SET; break; + case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; + case SZ_SEEK_END: moveMethod = SEEK_END; break; + default: return EINVAL; + } + + #if defined(USE_FOPEN) + { + int res = fseek(p->file, (long)*pos, moveMethod); + if (res == -1) + return errno; + *pos = ftell(p->file); + if (*pos == -1) + return errno; + return 0; + } + #else + { + off_t res = lseek(p->fd, (off_t)*pos, moveMethod); + if (res == -1) + return errno; + *pos = res; + return 0; + } + + #endif // USE_FOPEN + #endif // USE_WINDOWS_FILE +} + + +WRes File_GetLength(CSzFile *p, UInt64 *length) +{ + #ifdef USE_WINDOWS_FILE + + DWORD sizeHigh; + DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); + if (sizeLow == 0xFFFFFFFF) + { + DWORD res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *length = (((UInt64)sizeHigh) << 32) + sizeLow; + return 0; + + #elif defined(USE_FOPEN) + + long pos = ftell(p->file); + int res = fseek(p->file, 0, SEEK_END); + *length = ftell(p->file); + fseek(p->file, pos, SEEK_SET); + return res; + + #else + + off_t pos; + *length = 0; + pos = lseek(p->fd, 0, SEEK_CUR); + if (pos != -1) + { + const off_t len2 = lseek(p->fd, 0, SEEK_END); + const off_t res2 = lseek(p->fd, pos, SEEK_SET); + if (len2 != -1) + { + *length = (UInt64)len2; + if (res2 != -1) + return 0; + } + } + return errno; + + #endif +} + + +/* ---------- FileSeqInStream ---------- */ + +static SRes FileSeqInStream_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileSeqInStream) + const WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; +} + +void FileSeqInStream_CreateVTable(CFileSeqInStream *p) +{ + p->vt.Read = FileSeqInStream_Read; +} + + +/* ---------- FileInStream ---------- */ + +static SRes FileInStream_Read(ISeekInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream) + const WRes wres = File_Read(&p->file, buf, size); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; +} + +static SRes FileInStream_Seek(ISeekInStreamPtr pp, Int64 *pos, ESzSeek origin) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream) + const WRes wres = File_Seek(&p->file, pos, origin); + p->wres = wres; + return (wres == 0) ? SZ_OK : SZ_ERROR_READ; +} + +void FileInStream_CreateVTable(CFileInStream *p) +{ + p->vt.Read = FileInStream_Read; + p->vt.Seek = FileInStream_Seek; +} + + +/* ---------- FileOutStream ---------- */ + +static size_t FileOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileOutStream) + const WRes wres = File_Write(&p->file, data, &size); + p->wres = wres; + return size; +} + +void FileOutStream_CreateVTable(CFileOutStream *p) +{ + p->vt.Write = FileOutStream_Write; +} diff --git a/crnlib/lzma/7zFile.h b/crnlib/lzma/7zFile.h new file mode 100644 index 00000000..d7c871a7 --- /dev/null +++ b/crnlib/lzma/7zFile.h @@ -0,0 +1,92 @@ +/* 7zFile.h -- File IO +2023-03-05 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_FILE_H +#define ZIP7_INC_FILE_H + +#ifdef _WIN32 +#define USE_WINDOWS_FILE +// #include +#endif + +#ifdef USE_WINDOWS_FILE +#include "7zWindows.h" + +#else +// note: USE_FOPEN mode is limited to 32-bit file size +// #define USE_FOPEN +// #include +#endif + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* ---------- File ---------- */ + +typedef struct +{ + #ifdef USE_WINDOWS_FILE + HANDLE handle; + #elif defined(USE_FOPEN) + FILE *file; + #else + int fd; + #endif +} CSzFile; + +void File_Construct(CSzFile *p); +#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE) +WRes InFile_Open(CSzFile *p, const char *name); +WRes OutFile_Open(CSzFile *p, const char *name); +#endif +#ifdef USE_WINDOWS_FILE +WRes InFile_OpenW(CSzFile *p, const WCHAR *name); +WRes OutFile_OpenW(CSzFile *p, const WCHAR *name); +#endif +WRes File_Close(CSzFile *p); + +/* reads max(*size, remain file's size) bytes */ +WRes File_Read(CSzFile *p, void *data, size_t *size); + +/* writes *size bytes */ +WRes File_Write(CSzFile *p, const void *data, size_t *size); + +WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin); +WRes File_GetLength(CSzFile *p, UInt64 *length); + + +/* ---------- FileInStream ---------- */ + +typedef struct +{ + ISeqInStream vt; + CSzFile file; + WRes wres; +} CFileSeqInStream; + +void FileSeqInStream_CreateVTable(CFileSeqInStream *p); + + +typedef struct +{ + ISeekInStream vt; + CSzFile file; + WRes wres; +} CFileInStream; + +void FileInStream_CreateVTable(CFileInStream *p); + + +typedef struct +{ + ISeqOutStream vt; + CSzFile file; + WRes wres; +} CFileOutStream; + +void FileOutStream_CreateVTable(CFileOutStream *p); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/7zStream.c b/crnlib/lzma/7zStream.c new file mode 100644 index 00000000..92f821e2 --- /dev/null +++ b/crnlib/lzma/7zStream.c @@ -0,0 +1,199 @@ +/* 7zStream.c -- 7z Stream functions +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zTypes.h" + + +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize) +{ + size_t size = *processedSize; + *processedSize = 0; + while (size != 0) + { + size_t cur = size; + const SRes res = ISeqInStream_Read(stream, buf, &cur); + *processedSize += cur; + buf = (void *)((Byte *)buf + cur); + size -= cur; + if (res != SZ_OK) + return res; + if (cur == 0) + return SZ_OK; + } + return SZ_OK; +} + +/* +SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size) +{ + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} +*/ + + +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf) +{ + size_t processed = 1; + RINOK(ISeqInStream_Read(stream, buf, &processed)) + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + + + +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset) +{ + Int64 t = (Int64)offset; + return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size) +{ + const void *lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(ILookInStream_Look(stream, &lookBuf, size)) + memcpy(buf, lookBuf, *size); + return ILookInStream_Skip(stream, *size); +} + +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(ILookInStream_Read(stream, buf, &processed)) + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size) +{ + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + + + +#define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2) + +static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + size2 = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size) +{ + SRes res = SZ_OK; + GET_LookToRead2 + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size != 0) + { + p->pos = 0; + p->size = 0; + if (*size > p->bufSize) + *size = p->bufSize; + res = ISeekInStream_Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (*size > size2) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset) +{ + GET_LookToRead2 + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size) +{ + GET_LookToRead2 + size_t rem = p->size - p->pos; + if (rem == 0) + return ISeekInStream_Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin) +{ + GET_LookToRead2 + p->pos = p->size = 0; + return ISeekInStream_Seek(p->realStream, pos, origin); +} + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead) +{ + p->vt.Look = lookahead ? + LookToRead2_Look_Lookahead : + LookToRead2_Look_Exact; + p->vt.Skip = LookToRead2_Skip; + p->vt.Read = LookToRead2_Read; + p->vt.Seek = LookToRead2_Seek; +} + + + +static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook) + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook *p) +{ + p->vt.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead) + return ILookInStream_Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead *p) +{ + p->vt.Read = SecToRead_Read; +} diff --git a/crnlib/lzma/7zTypes.h b/crnlib/lzma/7zTypes.h new file mode 100644 index 00000000..5d3c7483 --- /dev/null +++ b/crnlib/lzma/7zTypes.h @@ -0,0 +1,597 @@ +/* 7zTypes.h -- Basic types +2024-01-24 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + /* + // C11/C++11: + #include + #define MY_ALIGN(n) alignas(n) + */ + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY_FACILITY_WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY_FACILITY_WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef Z7_DECL_Int32_AS_long +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef Z7_DECL_Int64_AS_long + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else +#if defined(__clang__) || defined(__GNUC__) +#include +typedef int64_t Int64; +typedef uint64_t UInt64; +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +// #define UINT64_CONST(n) n ## ULL +#endif +#endif + +#endif + +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; +#else +typedef size_t SizeT; +#endif + +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include +typedef uintptr_t MY_uintptr_t; +#endif +*/ + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define Z7_STDCALL __stdcall +#else +#define Z7_STDCALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define Z7_NO_INLINE __declspec(noinline) +#else +#define Z7_NO_INLINE +#endif + +#define Z7_FORCE_INLINE __forceinline + +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE +#endif + +#define Z7_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL +#elif defined(MY_CPU_AMD64) +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL +#else +#define Z7_FASTCALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) +{ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +Z7_C_IFACE_DECL (IByteOut) +{ + void (*Write)(IByteOutPtr p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +Z7_C_IFACE_DECL (ISeqInStream) +{ + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); +/* it can return SZ_ERROR_INPUT_EOF */ +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); + + +Z7_C_IFACE_DECL (ISeqOutStream) +{ + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +Z7_C_IFACE_DECL (ISeekInStream) +{ + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +Z7_C_IFACE_DECL (ILookInStream) +{ + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(ILookInStreamPtr p, size_t offset); + /* offset must be <= output(*size) of Look */ + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); + + +typedef struct +{ + ILookInStream vt; + ISeekInStreamPtr realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + ILookInStreamPtr realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +Z7_C_IFACE_DECL (ICompressProgress) +{ + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; + +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef Z7_container_of + +/* +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +/* +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) +*/ + +#endif + +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) +*/ +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL +#endif + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) + + +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +#define k_PropVar_TimePrec_0 0 +#define k_PropVar_TimePrec_Unix 1 +#define k_PropVar_TimePrec_DOS 2 +#define k_PropVar_TimePrec_HighPrec 3 +#define k_PropVar_TimePrec_Base 16 +#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7) +#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9) + +EXTERN_C_END + +#endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/crnlib/lzma/7zVersion.h b/crnlib/lzma/7zVersion.h new file mode 100644 index 00000000..2416c7da --- /dev/null +++ b/crnlib/lzma/7zVersion.h @@ -0,0 +1,27 @@ +#define MY_VER_MAJOR 24 +#define MY_VER_MINOR 9 +#define MY_VER_BUILD 0 +#define MY_VERSION_NUMBERS "24.09" +#define MY_VERSION MY_VERSION_NUMBERS + +#ifdef MY_CPU_NAME + #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")" +#else + #define MY_VERSION_CPU MY_VERSION +#endif + +#define MY_DATE "2024-11-29" +#undef MY_COPYRIGHT +#undef MY_VERSION_COPYRIGHT_DATE +#define MY_AUTHOR_NAME "Igor Pavlov" +#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov" + +#ifdef USE_COPYRIGHT_CR + #define MY_COPYRIGHT MY_COPYRIGHT_CR +#else + #define MY_COPYRIGHT MY_COPYRIGHT_PD +#endif + +#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE diff --git a/crnlib/lzma/7zWindows.h b/crnlib/lzma/7zWindows.h new file mode 100644 index 00000000..0967a799 --- /dev/null +++ b/crnlib/lzma/7zWindows.h @@ -0,0 +1,101 @@ +/* 7zWindows.h -- StdAfx +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_7Z_WINDOWS_H +#define ZIP7_INC_7Z_WINDOWS_H + +#ifdef _WIN32 + +#if defined(__clang__) +# pragma clang diagnostic push +#endif + +#if defined(_MSC_VER) + +#pragma warning(push) +#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' + +#if _MSC_VER == 1900 +// for old kit10 versions +// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext': +#endif +// win10 Windows Kit: +#endif // _MSC_VER + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +// for msvc6 without sdk2003 +#define RPC_NO_WINDOWS_H +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +// #if defined(__GNUC__) && !defined(__clang__) +#include +#else +#include +#endif +// #include +// #include + +// but if precompiled with clang-cl then we need +// #include +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(__clang__) +# pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64) +#ifndef _W64 + +typedef long LONG_PTR, *PLONG_PTR; +typedef unsigned long ULONG_PTR, *PULONG_PTR; +typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; + +#define Z7_OLD_WIN_SDK +#endif // _W64 +#endif // _MSC_VER == 1200 + +#ifdef Z7_OLD_WIN_SDK + +#ifndef INVALID_FILE_ATTRIBUTES +#define INVALID_FILE_ATTRIBUTES ((DWORD)-1) +#endif +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif +#ifndef FILE_SPECIAL_ACCESS +#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS) +#endif + +// ShlObj.h: +// #define BIF_NEWDIALOGSTYLE 0x0040 + +#pragma warning(disable : 4201) +// #pragma warning(disable : 4115) + +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + +#endif // Z7_OLD_WIN_SDK + +#ifdef UNDER_CE +#undef VARIANT_TRUE +#define VARIANT_TRUE ((VARIANT_BOOL)-1) +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER >= 1400 && _MSC_VER <= 1600 + // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed + // string.h + // #pragma warning(disable : 4514) +#endif +#endif + + +/* #include "7zTypes.h" */ + +#endif diff --git a/crnlib/lzma/Aes.c b/crnlib/lzma/Aes.c new file mode 100644 index 00000000..fc2c492d --- /dev/null +++ b/crnlib/lzma/Aes.c @@ -0,0 +1,429 @@ +/* Aes.c -- AES encryption / decryption +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "Aes.h" + +AES_CODE_FUNC g_AesCbc_Decode; +#ifndef Z7_SFX +AES_CODE_FUNC g_AesCbc_Encode; +AES_CODE_FUNC g_AesCtr_Code; +UInt32 g_Aes_SupportedFunctions_Flags; +#endif + +MY_ALIGN(64) +static UInt32 T[256 * 4]; +MY_ALIGN(64) +static const Byte Sbox[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; + + +MY_ALIGN(64) +static UInt32 D[256 * 4]; +MY_ALIGN(64) +static Byte InvS[256]; + +#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF) + +#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24)) + +#define gb0(x) ( (x) & 0xFF) +#define gb1(x) (((x) >> ( 8)) & 0xFF) +#define gb2(x) (((x) >> (16)) & 0xFF) +#define gb3(x) (((x) >> (24))) + +#define gb(n, x) gb ## n(x) + +#define TT(x) (T + (x << 8)) +#define DD(x) (D + (x << 8)) + + +// #define Z7_SHOW_AES_STATUS + +#ifdef MY_CPU_X86_OR_AMD64 + + #if defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_HW_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_HW_VAES + #endif + #endif + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) + #define USE_HW_AES + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_HW_VAES + #endif + #elif defined(_MSC_VER) + #define USE_HW_AES + #define USE_HW_VAES + #endif + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_AES) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_AES + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) + #define USE_HW_AES + #endif + #endif + #endif + #endif +#endif + +#ifdef USE_HW_AES +// #pragma message("=== Aes.c USE_HW_AES === ") +#ifdef Z7_SHOW_AES_STATUS +#include +#define PRF(x) x +#else +#define PRF(x) +#endif +#endif + + +void AesGenTables(void) +{ + unsigned i; + for (i = 0; i < 256; i++) + InvS[Sbox[i]] = (Byte)i; + + for (i = 0; i < 256; i++) + { + { + const UInt32 a1 = Sbox[i]; + const UInt32 a2 = xtime(a1); + const UInt32 a3 = a2 ^ a1; + TT(0)[i] = Ui32(a2, a1, a1, a3); + TT(1)[i] = Ui32(a3, a2, a1, a1); + TT(2)[i] = Ui32(a1, a3, a2, a1); + TT(3)[i] = Ui32(a1, a1, a3, a2); + } + { + const UInt32 a1 = InvS[i]; + const UInt32 a2 = xtime(a1); + const UInt32 a4 = xtime(a2); + const UInt32 a8 = xtime(a4); + const UInt32 a9 = a8 ^ a1; + const UInt32 aB = a8 ^ a2 ^ a1; + const UInt32 aD = a8 ^ a4 ^ a1; + const UInt32 aE = a8 ^ a4 ^ a2; + DD(0)[i] = Ui32(aE, a9, aD, aB); + DD(1)[i] = Ui32(aB, aE, a9, aD); + DD(2)[i] = Ui32(aD, aB, aE, a9); + DD(3)[i] = Ui32(a9, aD, aB, aE); + } + } + + { + AES_CODE_FUNC d = AesCbc_Decode; + #ifndef Z7_SFX + AES_CODE_FUNC e = AesCbc_Encode; + AES_CODE_FUNC c = AesCtr_Code; + UInt32 flags = 0; + #endif + + #ifdef USE_HW_AES + if (CPU_IsSupported_AES()) + { + // #pragma message ("AES HW") + PRF(printf("\n===AES HW\n")); + d = AesCbc_Decode_HW; + + #ifndef Z7_SFX + e = AesCbc_Encode_HW; + c = AesCtr_Code_HW; + flags = k_Aes_SupportedFunctions_HW; + #endif + + #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_HW_VAES + if (CPU_IsSupported_VAES_AVX2()) + { + PRF(printf("\n===vaes avx2\n")); + d = AesCbc_Decode_HW_256; + #ifndef Z7_SFX + c = AesCtr_Code_HW_256; + flags |= k_Aes_SupportedFunctions_HW_256; + #endif + } + #endif + #endif + } + #endif + + g_AesCbc_Decode = d; + #ifndef Z7_SFX + g_AesCbc_Encode = e; + g_AesCtr_Code = c; + g_Aes_SupportedFunctions_Flags = flags; + #endif + } +} + + +#define HT(i, x, s) TT(x)[gb(x, s[(i + x) & 3])] + +#define HT4(m, i, s, p) m[i] = \ + HT(i, 0, s) ^ \ + HT(i, 1, s) ^ \ + HT(i, 2, s) ^ \ + HT(i, 3, s) ^ w[p + i] + +#define HT16(m, s, p) \ + HT4(m, 0, s, p); \ + HT4(m, 1, s, p); \ + HT4(m, 2, s, p); \ + HT4(m, 3, s, p); \ + +#define FT(i, x) Sbox[gb(x, m[(i + x) & 3])] +#define FT4(i) dest[i] = Ui32(FT(i, 0), FT(i, 1), FT(i, 2), FT(i, 3)) ^ w[i]; + + +#define HD(i, x, s) DD(x)[gb(x, s[(i - x) & 3])] + +#define HD4(m, i, s, p) m[i] = \ + HD(i, 0, s) ^ \ + HD(i, 1, s) ^ \ + HD(i, 2, s) ^ \ + HD(i, 3, s) ^ w[p + i]; + +#define HD16(m, s, p) \ + HD4(m, 0, s, p); \ + HD4(m, 1, s, p); \ + HD4(m, 2, s, p); \ + HD4(m, 3, s, p); \ + +#define FD(i, x) InvS[gb(x, m[(i - x) & 3])] +#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i]; + +void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) +{ + unsigned i, m; + const UInt32 *wLim; + UInt32 t; + UInt32 rcon = 1; + + keySize /= 4; + w[0] = ((UInt32)keySize / 2) + 3; + w += 4; + + for (i = 0; i < keySize; i++, key += 4) + w[i] = GetUi32(key); + + t = w[(size_t)keySize - 1]; + wLim = w + (size_t)keySize * 3 + 28; + m = 0; + do + { + if (m == 0) + { + t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]); + rcon <<= 1; + if (rcon & 0x100) + rcon = 0x1b; + m = keySize; + } + else if (m == 4 && keySize > 6) + t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]); + m--; + t ^= w[0]; + w[keySize] = t; + } + while (++w != wLim); +} + +void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) +{ + unsigned i, num; + Aes_SetKey_Enc(w, key, keySize); + num = keySize + 20; + w += 8; + for (i = 0; i < num; i++) + { + UInt32 r = w[i]; + w[i] = + DD(0)[Sbox[gb0(r)]] ^ + DD(1)[Sbox[gb1(r)]] ^ + DD(2)[Sbox[gb2(r)]] ^ + DD(3)[Sbox[gb3(r)]]; + } +} + +/* Aes_Encode and Aes_Decode functions work with little-endian words. + src and dest are pointers to 4 UInt32 words. + src and dest can point to same block */ + +// Z7_FORCE_INLINE +static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) +{ + UInt32 s[4]; + UInt32 m[4]; + UInt32 numRounds2 = w[0]; + w += 4; + s[0] = src[0] ^ w[0]; + s[1] = src[1] ^ w[1]; + s[2] = src[2] ^ w[2]; + s[3] = src[3] ^ w[3]; + w += 4; + for (;;) + { + HT16(m, s, 0) + if (--numRounds2 == 0) + break; + HT16(s, m, 4) + w += 8; + } + w += 4; + FT4(0) + FT4(1) + FT4(2) + FT4(3) +} + +Z7_FORCE_INLINE +static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) +{ + UInt32 s[4]; + UInt32 m[4]; + UInt32 numRounds2 = w[0]; + w += 4 + numRounds2 * 8; + s[0] = src[0] ^ w[0]; + s[1] = src[1] ^ w[1]; + s[2] = src[2] ^ w[2]; + s[3] = src[3] ^ w[3]; + for (;;) + { + w -= 8; + HD16(m, s, 4) + if (--numRounds2 == 0) + break; + HD16(s, m, 0) + } + FD4(0) + FD4(1) + FD4(2) + FD4(3) +} + +void AesCbc_Init(UInt32 *p, const Byte *iv) +{ + unsigned i; + for (i = 0; i < 4; i++) + p[i] = GetUi32(iv + i * 4); +} + +void Z7_FASTCALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks) +{ + for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) + { + p[0] ^= GetUi32(data); + p[1] ^= GetUi32(data + 4); + p[2] ^= GetUi32(data + 8); + p[3] ^= GetUi32(data + 12); + + Aes_Encode(p + 4, p, p); + + SetUi32(data, p[0]) + SetUi32(data + 4, p[1]) + SetUi32(data + 8, p[2]) + SetUi32(data + 12, p[3]) + } +} + +void Z7_FASTCALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) +{ + UInt32 in[4], out[4]; + for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) + { + in[0] = GetUi32(data); + in[1] = GetUi32(data + 4); + in[2] = GetUi32(data + 8); + in[3] = GetUi32(data + 12); + + Aes_Decode(p + 4, out, in); + + SetUi32(data, p[0] ^ out[0]) + SetUi32(data + 4, p[1] ^ out[1]) + SetUi32(data + 8, p[2] ^ out[2]) + SetUi32(data + 12, p[3] ^ out[3]) + + p[0] = in[0]; + p[1] = in[1]; + p[2] = in[2]; + p[3] = in[3]; + } +} + +void Z7_FASTCALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) +{ + for (; numBlocks != 0; numBlocks--) + { + UInt32 temp[4]; + unsigned i; + + if (++p[0] == 0) + p[1]++; + + Aes_Encode(p + 4, temp, p); + + for (i = 0; i < 4; i++, data += 4) + { + const UInt32 t = temp[i]; + + #ifdef MY_CPU_LE_UNALIGN + *((UInt32 *)(void *)data) ^= t; + #else + data[0] = (Byte)(data[0] ^ (t & 0xFF)); + data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF)); + data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF)); + data[3] = (Byte)(data[3] ^ ((t >> 24))); + #endif + } + } +} + +#undef xtime +#undef Ui32 +#undef gb0 +#undef gb1 +#undef gb2 +#undef gb3 +#undef gb +#undef TT +#undef DD +#undef USE_HW_AES +#undef PRF diff --git a/crnlib/lzma/Aes.h b/crnlib/lzma/Aes.h new file mode 100644 index 00000000..2373afb9 --- /dev/null +++ b/crnlib/lzma/Aes.h @@ -0,0 +1,60 @@ +/* Aes.h -- AES encryption / decryption +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_AES_H +#define ZIP7_INC_AES_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define AES_BLOCK_SIZE 16 + +/* Call AesGenTables one time before other AES functions */ +void AesGenTables(void); + +/* UInt32 pointers must be 16-byte aligned */ + +/* 16-byte (4 * 32-bit words) blocks: 1 (IV) + 1 (keyMode) + 15 (AES-256 roundKeys) */ +#define AES_NUM_IVMRK_WORDS ((1 + 1 + 15) * 4) + +/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */ +/* keySize = 16 or 24 or 32 (bytes) */ +typedef void (Z7_FASTCALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize); +void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize); +void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize); + +/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */ +void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */ + +/* data - 16-byte aligned pointer to data */ +/* numBlocks - the number of 16-byte blocks in data array */ +typedef void (Z7_FASTCALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); + +extern AES_CODE_FUNC g_AesCbc_Decode; +#ifndef Z7_SFX +extern AES_CODE_FUNC g_AesCbc_Encode; +extern AES_CODE_FUNC g_AesCtr_Code; +#define k_Aes_SupportedFunctions_HW (1 << 2) +#define k_Aes_SupportedFunctions_HW_256 (1 << 3) +extern UInt32 g_Aes_SupportedFunctions_Flags; +#endif + + +#define Z7_DECLARE_AES_CODE_FUNC(funcName) \ + void Z7_FASTCALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); + +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code) + +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode_HW) +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW) + +Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW_256) +Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW_256) + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/AesOpt.c b/crnlib/lzma/AesOpt.c new file mode 100644 index 00000000..99fc507a --- /dev/null +++ b/crnlib/lzma/AesOpt.c @@ -0,0 +1,1002 @@ +/* AesOpt.c -- AES optimized code for x86 AES hardware instructions +Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Aes.h" +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + + #if defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_INTEL_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_INTEL_VAES + #endif + #endif + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) + #define USE_INTEL_AES + #if !defined(__AES__) + #define ATTRIB_AES __attribute__((__target__("aes"))) + #endif + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_INTEL_VAES + #if !defined(__AES__) || !defined(__VAES__) || !defined(__AVX__) || !defined(__AVX2__) + #define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx,avx2"))) + #endif + #endif + #elif defined(_MSC_VER) + #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) + #define USE_INTEL_AES + #if (_MSC_VER >= 1910) + #define USE_INTEL_VAES + #endif + #endif + #ifndef USE_INTEL_AES + #define Z7_USE_AES_HW_STUB + #endif + #ifndef USE_INTEL_VAES + #define Z7_USE_VAES_HW_STUB + #endif + #endif + + #ifndef USE_INTEL_AES + // #define Z7_USE_AES_HW_STUB // for debug + #endif + #ifndef USE_INTEL_VAES + // #define Z7_USE_VAES_HW_STUB // for debug + #endif + + +#ifdef USE_INTEL_AES + +#include + +#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB) +#define AES_TYPE_keys UInt32 +#define AES_TYPE_data Byte +// #define AES_TYPE_keys __m128i +// #define AES_TYPE_data __m128i +#endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES +#endif + +#define AES_FUNC_START(name) \ + void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) + // void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src) + +#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src) + +#if 1 +// use aligned SSE load/store for data. +// It is required for our Aes functions, that data is aligned for 16-bytes. +// So we can use this branch of code. +// and compiler can use fused load-op SSE instructions: +// xorps xmm0, XMMWORD PTR [rdx] +#define LOAD_128(pp) (*(__m128i *)(void *)(pp)) +#define STORE_128(pp, _v) *(__m128i *)(void *)(pp) = _v +// use aligned SSE load/store for data. Alternative code with direct access +// #define LOAD_128(pp) _mm_load_si128(pp) +// #define STORE_128(pp, _v) _mm_store_si128(pp, _v) +#else +// use unaligned load/store for data: movdqu XMMWORD PTR [rdx] +#define LOAD_128(pp) _mm_loadu_si128(pp) +#define STORE_128(pp, _v) _mm_storeu_si128(pp, _v) +#endif + +AES_FUNC_START2 (AesCbc_Encode_HW) +{ + if (numBlocks == 0) + return; + { + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; + __m128i m = *p; + const __m128i k0 = p[2]; + const __m128i k1 = p[3]; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + do + { + UInt32 r = numRounds2; + const __m128i *w = p + 4; + __m128i temp = LOAD_128(data); + MM_XOR (temp, k0) + MM_XOR (m, temp) + MM_OP_m (_mm_aesenc_si128, k1) + do + { + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) + w += 2; + } + while (--r); + MM_OP_m (_mm_aesenclast_si128, w[0]) + STORE_128(data, m); + data++; + } + while (--numBlocks); + *p = m; + } +} + + +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1) +#define WOP_3(op) WOP_2 (op) op (m2, 2) +#define WOP_4(op) WOP_3 (op) op (m3, 3) +#ifdef MY_CPU_AMD64 +#define WOP_5(op) WOP_4 (op) op (m4, 4) +#define WOP_6(op) WOP_5 (op) op (m5, 5) +#define WOP_7(op) WOP_6 (op) op (m6, 6) +#define WOP_8(op) WOP_7 (op) op (m7, 7) +#endif +/* +#define WOP_9(op) WOP_8 (op) op (m8, 8); +#define WOP_10(op) WOP_9 (op) op (m9, 9); +#define WOP_11(op) WOP_10(op) op (m10, 10); +#define WOP_12(op) WOP_11(op) op (m11, 11); +#define WOP_13(op) WOP_12(op) op (m12, 12); +#define WOP_14(op) WOP_13(op) op (m13, 13); +*/ + +#ifdef MY_CPU_AMD64 + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 +#else + #define NUM_WAYS 4 + #define WOP_M1 WOP_4 +#endif + +#define WOP(op) op (m0, 0) WOP_M1(op) + +#define DECLARE_VAR(reg, ii) __m128i reg; +#define LOAD_data_ii(ii) LOAD_128(data + (ii)) +#define LOAD_data( reg, ii) reg = LOAD_data_ii(ii); +#define STORE_data( reg, ii) STORE_128(data + (ii), reg); +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, LOAD_128(data + (ii- 1))) +#endif + +#define MM_OP_key(op, reg) MM_OP(op, reg, key); + +#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg) +#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg) +#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg) +#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg) +#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) + +#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr; +#define CTR_END( reg, ii) STORE_128(data + (ii), _mm_xor_si128(reg, \ + LOAD_128 (data + (ii)))); +#define WOP_KEY(op, n) { \ + const __m128i key = w[n]; \ + WOP(op) } + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + + +#ifdef USE_INTEL_VAES + +#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src) +#define AVX_DECLARE_VAR(reg, ii) __m256i reg; + +#if 1 +// use unaligned AVX load/store for data. +// It is required for our Aes functions, that data is aligned for 16-bytes. +// But we need 32-bytes reading. +// So we use intrinsics for unaligned AVX load/store. +// notes for _mm256_storeu_si256: +// msvc2022: uses vmovdqu and keeps the order of instruction sequence. +// new gcc11 uses vmovdqu +// old gcc9 could use pair of instructions: +// vmovups %xmm7, -224(%rax) +// vextracti128 $0x1, %ymm7, -208(%rax) +#define AVX_LOAD(p) _mm256_loadu_si256((const __m256i *)(const void *)(p)) +#define AVX_STORE(p, _v) _mm256_storeu_si256((__m256i *)(void *)(p), _v); +#else +// use aligned AVX load/store for data. +// for debug: we can use this branch, if we are sure that data is aligned for 32-bytes. +// msvc2022 uses vmovdqu still +// gcc uses vmovdqa (that requires 32-bytes alignment) +#define AVX_LOAD(p) (*(const __m256i *)(const void *)(p)) +#define AVX_STORE(p, _v) (*(__m256i *)(void *)(p)) = _v; +#endif + +#define AVX_LOAD_data( reg, ii) reg = AVX_LOAD((const __m256i *)(const void *)data + (ii)); +#define AVX_STORE_data( reg, ii) AVX_STORE((__m256i *)(void *)data + (ii), reg) +/* +AVX_XOR_data_M1() needs unaligned memory load, even if (data) +is aligned for 256-bits, because we read 32-bytes chunk that +crosses (data) position: from (data - 16bytes) to (data + 16bytes). +*/ +#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, _mm256_loadu_si256((const __m256i *)(const void *)(data - 1) + (ii))) + +#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) +#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) +#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) +#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) +#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) +#define AVX_CTR_START(reg, ii) \ + MM_OP (_mm256_add_epi64, ctr2, two) \ + reg = _mm256_xor_si256(ctr2, key); + +#define AVX_CTR_END(reg, ii) \ + AVX_STORE((__m256i *)(void *)data + (ii), _mm256_xor_si256(reg, \ + AVX_LOAD ((__m256i *)(void *)data + (ii)))); + +#define AVX_WOP_KEY(op, n) { \ + const __m256i key = w[n]; \ + WOP(op) } + +#define NUM_AES_KEYS_MAX 15 + +#define WIDE_LOOP_START_AVX(OP) \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS * 2) \ + { __m256i keys[NUM_AES_KEYS_MAX]; \ + OP \ + { UInt32 ii; for (ii = 0; ii < numRounds; ii++) \ + keys[ii] = _mm256_broadcastsi128_si256(p[ii]); } \ + dataEnd -= NUM_WAYS * 2; \ + do { \ + +#define WIDE_LOOP_END_AVX(OP) \ + data += NUM_WAYS * 2; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS * 2; \ + OP \ + _mm256_zeroupper(); \ + } \ + +/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified, + MSVC still can insert vzeroupper instruction. */ + +#endif + + + +AES_FUNC_START2 (AesCbc_Decode_HW) +{ + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; + __m128i iv = *p; + const __m128i * const wStart = p + (size_t)*(const UInt32 *)(p + 1) * 2 + 2 - 1; + const __m128i *dataEnd; + p += 2; + + WIDE_LOOP_START + { + const __m128i *w = wStart; + WOP (DECLARE_VAR) + WOP (LOAD_data) + WOP_KEY (AES_XOR, 1) + do + { + WOP_KEY (AES_DEC, 0) + + w--; + } + while (w != p); + WOP_KEY (AES_DEC_LAST, 0) + + MM_XOR (m0, iv) + WOP_M1 (XOR_data_M1) + LOAD_data(iv, NUM_WAYS - 1) + WOP (STORE_data) + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const __m128i *w = wStart - 1; + __m128i m = _mm_xor_si128 (w[2], LOAD_data_ii(0)); + + do + { + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdec_si128, w[0]) + w -= 2; + } + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdeclast_si128, w[0]) + MM_XOR (m, iv) + LOAD_data(iv, 0) + STORE_data(m, 0) + } + + p[-2] = iv; +} + + +AES_FUNC_START2 (AesCtr_Code_HW) +{ + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; + __m128i ctr = *p; + const UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1; + const __m128i *dataEnd; + const __m128i one = _mm_cvtsi32_si128(1); + + p += 2; + + WIDE_LOOP_START + { + const __m128i *w = p; + UInt32 r = numRoundsMinus2; + WOP (DECLARE_VAR) + WOP (CTR_START) + WOP_KEY (AES_XOR, 0) + w += 1; + do + { + WOP_KEY (AES_ENC, 0) + w += 1; + } + while (--r); + WOP_KEY (AES_ENC_LAST, 0) + WOP (CTR_END) + } + WIDE_LOOP_END + + SINGLE_LOOP + { + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; + const __m128i *w = p; + __m128i m; + MM_OP (_mm_add_epi64, ctr, one) + m = _mm_xor_si128 (ctr, p[0]); + w += 1; + do + { + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) + w += 2; + } + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenclast_si128, w[1]) + CTR_END (m, 0) + } + + p[-2] = ctr; +} + + + +#ifdef USE_INTEL_VAES + +/* +GCC before 2013-Jun: + : + #ifdef __AVX__ + #include + #endif +GCC after 2013-Jun: + : + #include +CLANG 3.8+: +{ + : + #if !defined(_MSC_VER) || defined(__AVX__) + #include + #endif + + if (the compiler is clang for Windows and if global arch is not set for __AVX__) + [ if (defined(_MSC_VER) && !defined(__AVX__)) ] + { + doesn't include + and we have 2 ways to fix it: + 1) we can define required __AVX__ before + or + 2) we can include after + } +} + +If we include manually for GCC/CLANG, it's +required that must be included before . +*/ + +/* +#if defined(__clang__) && defined(_MSC_VER) +#define __AVX__ +#define __AVX2__ +#define __VAES__ +#endif +*/ + +#include +#if defined(__clang__) && defined(_MSC_VER) + #if !defined(__AVX__) + #include + #endif + #if !defined(__AVX2__) + #include + #endif + #if !defined(__VAES__) + #include + #endif +#endif // __clang__ && _MSC_VER + +#ifndef ATTRIB_VAES + #define ATTRIB_VAES +#endif + +#define VAES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_VAES \ +AES_FUNC_START (name) + +VAES_FUNC_START2 (AesCbc_Decode_HW_256) +{ + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; + __m128i iv = *p; + const __m128i *dataEnd; + const UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + p += 2; + + WIDE_LOOP_START_AVX(;) + { + const __m256i *w = keys + numRounds - 2; + + WOP (AVX_DECLARE_VAR) + WOP (AVX_LOAD_data) + AVX_WOP_KEY (AVX_AES_XOR, 1) + + do + { + AVX_WOP_KEY (AVX_AES_DEC, 0) + w--; + } + while (w != keys); + AVX_WOP_KEY (AVX_AES_DEC_LAST, 0) + + AVX_XOR (m0, _mm256_setr_m128i(iv, LOAD_data_ii(0))) + WOP_M1 (AVX_XOR_data_M1) + LOAD_data (iv, NUM_WAYS * 2 - 1) + WOP (AVX_STORE_data) + } + WIDE_LOOP_END_AVX(;) + + SINGLE_LOOP + { + const __m128i *w = p - 2 + (size_t)*(const UInt32 *)(p + 1 - 2) * 2; + __m128i m = _mm_xor_si128 (w[2], LOAD_data_ii(0)); + do + { + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdec_si128, w[0]) + w -= 2; + } + while (w != p); + MM_OP_m (_mm_aesdec_si128, w[1]) + MM_OP_m (_mm_aesdeclast_si128, w[0]) + + MM_XOR (m, iv) + LOAD_data(iv, 0) + STORE_data(m, 0) + } + + p[-2] = iv; +} + + +/* +SSE2: _mm_cvtsi32_si128 : movd +AVX: _mm256_setr_m128i : vinsertf128 +AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm + _mm256_extracti128_si256 : vextracti128 + _mm256_broadcastsi128_si256 : vbroadcasti128 +*/ + +#define AVX_CTR_LOOP_START \ + ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \ + two = _mm256_setr_m128i(one, one); \ + two = _mm256_add_epi64(two, two); \ + +// two = _mm256_setr_epi64x(2, 0, 2, 0); + +#define AVX_CTR_LOOP_ENC \ + ctr = _mm256_extracti128_si256 (ctr2, 1); \ + +VAES_FUNC_START2 (AesCtr_Code_HW_256) +{ + __m128i *p = (__m128i *)(void *)ivAes; + __m128i *data = (__m128i *)(void *)data8; + __m128i ctr = *p; + const UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; + const __m128i *dataEnd; + const __m128i one = _mm_cvtsi32_si128(1); + __m256i ctr2, two; + p += 2; + + WIDE_LOOP_START_AVX (AVX_CTR_LOOP_START) + { + const __m256i *w = keys; + UInt32 r = numRounds - 2; + WOP (AVX_DECLARE_VAR) + AVX_WOP_KEY (AVX_CTR_START, 0) + + w += 1; + do + { + AVX_WOP_KEY (AVX_AES_ENC, 0) + w += 1; + } + while (--r); + AVX_WOP_KEY (AVX_AES_ENC_LAST, 0) + + WOP (AVX_CTR_END) + } + WIDE_LOOP_END_AVX (AVX_CTR_LOOP_ENC) + + SINGLE_LOOP + { + UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; + const __m128i *w = p; + __m128i m; + MM_OP (_mm_add_epi64, ctr, one) + m = _mm_xor_si128 (ctr, p[0]); + w += 1; + do + { + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenc_si128, w[1]) + w += 2; + } + while (--numRounds2); + MM_OP_m (_mm_aesenc_si128, w[0]) + MM_OP_m (_mm_aesenclast_si128, w[1]) + CTR_END (m, 0) + } + + p[-2] = ctr; +} + +#endif // USE_INTEL_VAES + +#else // USE_INTEL_AES + +/* no USE_INTEL_AES */ + +#if defined(Z7_USE_AES_HW_STUB) +// We can compile this file with another C compiler, +// or we can compile asm version. +// So we can generate real code instead of this stub function. +// #if defined(_MSC_VER) +#pragma message("AES HW_SW stub was used") +// #endif + +#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB) +#define AES_TYPE_keys UInt32 +#define AES_TYPE_data Byte +#endif + +#define AES_FUNC_START(name) \ + void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \ + +#define AES_COMPAT_STUB(name) \ + AES_FUNC_START(name); \ + AES_FUNC_START(name ## _HW) \ + { name(p, data, numBlocks); } + +AES_COMPAT_STUB (AesCbc_Encode) +AES_COMPAT_STUB (AesCbc_Decode) +AES_COMPAT_STUB (AesCtr_Code) +#endif // Z7_USE_AES_HW_STUB + +#endif // USE_INTEL_AES + + +#ifndef USE_INTEL_VAES +#if defined(Z7_USE_VAES_HW_STUB) +// #if defined(_MSC_VER) +#pragma message("VAES HW_SW stub was used") +// #endif + +#define VAES_COMPAT_STUB(name) \ + void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ + void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ + { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); } + +VAES_COMPAT_STUB (AesCbc_Decode_HW) +VAES_COMPAT_STUB (AesCtr_Code_HW) +#endif +#endif // ! USE_INTEL_VAES + + + + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_AES) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_AES + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) + #define USE_HW_AES + #endif + #endif + #endif + #endif + +#ifdef USE_HW_AES + +// #pragma message("=== AES HW === ") +// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_AES + +#if defined(__clang__) || defined(__GNUC__) +#if !defined(__ARM_FEATURE_AES) && \ + !defined(__ARM_FEATURE_CRYPTO) + #ifdef MY_CPU_ARM64 +#if defined(__clang__) + #define ATTRIB_AES __attribute__((__target__("crypto"))) +#else + #define ATTRIB_AES __attribute__((__target__("+crypto"))) +#endif + #else +#if defined(__clang__) + #define ATTRIB_AES __attribute__((__target__("armv8-a,aes"))) +#else + #define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) +#endif + #endif +#endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#ifndef ATTRIB_AES + #define ATTRIB_AES +#endif + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) +#include +#else +/* + clang-17.0.1: error : Cannot select: intrinsic %llvm.arm.neon.aese + clang + 3.8.1 : __ARM_NEON : defined(__ARM_FEATURE_CRYPTO) + 7.0.1 : __ARM_NEON : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO) + 11.?.0 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO) + 13.0.1 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES) + 16 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 +*/ +#if defined(__clang__) && __clang_major__ < 16 +#if !defined(__ARM_FEATURE_AES) && \ + !defined(__ARM_FEATURE_CRYPTO) +// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 +// #if defined(__clang__) && __clang_major__ < 13 + #define __ARM_FEATURE_CRYPTO 1 +// #else + #define __ARM_FEATURE_AES 1 +// #endif + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif // clang + +#if defined(__clang__) + +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") + #undef __ARM_ARCH + #define __ARM_ARCH 8 + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // clang + +#include + +#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__ARM_FEATURE_AES) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #undef __ARM_FEATURE_CRYPTO + #undef __ARM_FEATURE_AES + #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") +#endif + +#endif // Z7_MSC_VER_ORIGINAL + +typedef uint8x16_t v128; + +#define AES_FUNC_START(name) \ + void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks) + // void Z7_FASTCALL name(v128 *p, v128 *data, size_t numBlocks) + +#define AES_FUNC_START2(name) \ +AES_FUNC_START (name); \ +ATTRIB_AES \ +AES_FUNC_START (name) + +#define MM_OP(op, dest, src) dest = op(dest, src); +#define MM_OP_m(op, src) MM_OP(op, m, src) +#define MM_OP1_m(op) m = op(m); + +#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src) +#define MM_XOR_m( src) MM_XOR(m, src) + +#define AES_E_m(k) MM_OP_m (vaeseq_u8, k) +#define AES_E_MC_m(k) AES_E_m (k) MM_OP1_m(vaesmcq_u8) + + +AES_FUNC_START2 (AesCbc_Encode_HW) +{ + if (numBlocks == 0) + return; + { + v128 * const p = (v128 *)(void *)ivAes; + v128 *data = (v128 *)(void *)data8; + v128 m = *p; + const UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const v128 *w = p + (size_t)numRounds2 * 2; + const v128 k0 = p[2]; + const v128 k1 = p[3]; + const v128 k2 = p[4]; + const v128 k3 = p[5]; + const v128 k4 = p[6]; + const v128 k5 = p[7]; + const v128 k6 = p[8]; + const v128 k7 = p[9]; + const v128 k8 = p[10]; + const v128 k9 = p[11]; + const v128 k_z4 = w[-2]; + const v128 k_z3 = w[-1]; + const v128 k_z2 = w[0]; + const v128 k_z1 = w[1]; + const v128 k_z0 = w[2]; + // we don't use optimization veorq_u8(*data, k_z0) that can reduce one cycle, + // because gcc/clang compilers are not good for that optimization. + do + { + MM_XOR_m (*data) + AES_E_MC_m (k0) + AES_E_MC_m (k1) + AES_E_MC_m (k2) + AES_E_MC_m (k3) + AES_E_MC_m (k4) + AES_E_MC_m (k5) + if (numRounds2 >= 6) + { + AES_E_MC_m (k6) + AES_E_MC_m (k7) + if (numRounds2 != 6) + { + AES_E_MC_m (k8) + AES_E_MC_m (k9) + } + } + AES_E_MC_m (k_z4) + AES_E_MC_m (k_z3) + AES_E_MC_m (k_z2) + AES_E_m (k_z1) + MM_XOR_m (k_z0) + *data++ = m; + } + while (--numBlocks); + *p = m; + } +} + + +#define WOP_1(op) +#define WOP_2(op) WOP_1 (op) op (m1, 1) +#define WOP_3(op) WOP_2 (op) op (m2, 2) +#define WOP_4(op) WOP_3 (op) op (m3, 3) +#define WOP_5(op) WOP_4 (op) op (m4, 4) +#define WOP_6(op) WOP_5 (op) op (m5, 5) +#define WOP_7(op) WOP_6 (op) op (m6, 6) +#define WOP_8(op) WOP_7 (op) op (m7, 7) + + #define NUM_WAYS 8 + #define WOP_M1 WOP_8 + +#define WOP(op) op (m0, 0) WOP_M1(op) + +#define DECLARE_VAR(reg, ii) v128 reg; +#define LOAD_data( reg, ii) reg = data[ii]; +#define STORE_data( reg, ii) data[ii] = reg; +#if (NUM_WAYS > 1) +#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]) +#endif + +#define MM_OP_key(op, reg) MM_OP (op, reg, key) + +#define AES_D_m(k) MM_OP_m (vaesdq_u8, k) +#define AES_D_IMC_m(k) AES_D_m (k) MM_OP1_m (vaesimcq_u8) + +#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg) +#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg) +#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg) + +#define AES_D_IMC( reg, ii) AES_D (reg, ii) reg = vaesimcq_u8(reg); +#define AES_E_MC( reg, ii) AES_E (reg, ii) reg = vaesmcq_u8(reg); + +#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one) reg = vreinterpretq_u8_u64(ctr); +#define CTR_END( reg, ii) MM_XOR (data[ii], reg) + +#define WOP_KEY(op, n) { \ + const v128 key = w[n]; \ + WOP(op) } + +#define WIDE_LOOP_START \ + dataEnd = data + numBlocks; \ + if (numBlocks >= NUM_WAYS) \ + { dataEnd -= NUM_WAYS; do { \ + +#define WIDE_LOOP_END \ + data += NUM_WAYS; \ + } while (data <= dataEnd); \ + dataEnd += NUM_WAYS; } \ + +#define SINGLE_LOOP \ + for (; data < dataEnd; data++) + + +AES_FUNC_START2 (AesCbc_Decode_HW) +{ + v128 *p = (v128 *)(void *)ivAes; + v128 *data = (v128 *)(void *)data8; + v128 iv = *p; + const v128 * const wStart = p + (size_t)*(const UInt32 *)(p + 1) * 2; + const v128 *dataEnd; + p += 2; + + WIDE_LOOP_START + { + const v128 *w = wStart; + WOP (DECLARE_VAR) + WOP (LOAD_data) + WOP_KEY (AES_D_IMC, 2) + do + { + WOP_KEY (AES_D_IMC, 1) + WOP_KEY (AES_D_IMC, 0) + w -= 2; + } + while (w != p); + WOP_KEY (AES_D, 1) + WOP_KEY (AES_XOR, 0) + MM_XOR (m0, iv) + WOP_M1 (XOR_data_M1) + LOAD_data(iv, NUM_WAYS - 1) + WOP (STORE_data) + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = wStart; + v128 m; LOAD_data(m, 0) + AES_D_IMC_m (w[2]) + do + { + AES_D_IMC_m (w[1]) + AES_D_IMC_m (w[0]) + w -= 2; + } + while (w != p); + AES_D_m (w[1]) + MM_XOR_m (w[0]) + MM_XOR_m (iv) + LOAD_data(iv, 0) + STORE_data(m, 0) + } + + p[-2] = iv; +} + + +AES_FUNC_START2 (AesCtr_Code_HW) +{ + v128 *p = (v128 *)(void *)ivAes; + v128 *data = (v128 *)(void *)data8; + uint64x2_t ctr = vreinterpretq_u64_u8(*p); + const v128 * const wEnd = p + (size_t)*(const UInt32 *)(p + 1) * 2; + const v128 *dataEnd; +// the bug in clang: +// __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2); +#if defined(__clang__) && (__clang_major__ <= 9) +#pragma GCC diagnostic ignored "-Wvector-conversion" +#endif + const uint64x2_t one = vsetq_lane_u64(1, vdupq_n_u64(0), 0); + p += 2; + + WIDE_LOOP_START + { + const v128 *w = p; + WOP (DECLARE_VAR) + WOP (CTR_START) + do + { + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E_MC, 1) + w += 2; + } + while (w != wEnd); + WOP_KEY (AES_E_MC, 0) + WOP_KEY (AES_E, 1) + WOP_KEY (AES_XOR, 2) + WOP (CTR_END) + } + WIDE_LOOP_END + + SINGLE_LOOP + { + const v128 *w = p; + v128 m; + CTR_START (m, 0) + do + { + AES_E_MC_m (w[0]) + AES_E_MC_m (w[1]) + w += 2; + } + while (w != wEnd); + AES_E_MC_m (w[0]) + AES_E_m (w[1]) + MM_XOR_m (w[2]) + CTR_END (m, 0) + } + + p[-2] = vreinterpretq_u8_u64(ctr); +} + +#endif // USE_HW_AES + +#endif // MY_CPU_ARM_OR_ARM64 + +#undef NUM_WAYS +#undef WOP_M1 +#undef WOP +#undef DECLARE_VAR +#undef LOAD_data +#undef STORE_data +#undef USE_INTEL_AES +#undef USE_HW_AES diff --git a/crnlib/lzma/Alloc.c b/crnlib/lzma/Alloc.c new file mode 100644 index 00000000..55251503 --- /dev/null +++ b/crnlib/lzma/Alloc.c @@ -0,0 +1,605 @@ +/* Alloc.c -- Memory allocation functions +2024-02-18 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#ifdef _WIN32 +#include "7zWindows.h" +#endif +#include + +#include "Alloc.h" + +#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \ + (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64) + #define Z7_USE_DYN_GetLargePageMinimum +#endif + +// for debug: +#if 0 +#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ") +#define Z7_ALLOC_NO_OFFSET_ALLOCATOR +#endif +#endif + +// #define SZ_ALLOC_DEBUG +/* #define SZ_ALLOC_DEBUG */ + +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef SZ_ALLOC_DEBUG + +#include +#include +static int g_allocCount = 0; +#ifdef _WIN32 +static int g_allocCountMid = 0; +static int g_allocCountBig = 0; +#endif + + +#define CONVERT_INT_TO_STR(charType, tempSize) \ + char temp[tempSize]; unsigned i = 0; \ + while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \ + *s++ = (charType)('0' + (unsigned)val); \ + while (i != 0) { i--; *s++ = temp[i]; } \ + *s = 0; + +static void ConvertUInt64ToString(UInt64 val, char *s) +{ + CONVERT_INT_TO_STR(char, 24) +} + +#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + +static void ConvertUInt64ToHex(UInt64 val, char *s) +{ + UInt64 v = val; + unsigned i; + for (i = 1;; i++) + { + v >>= 4; + if (v == 0) + break; + } + s[i] = 0; + do + { + unsigned t = (unsigned)(val & 0xF); + val >>= 4; + s[--i] = GET_HEX_CHAR(t); + } + while (i); +} + +#define DEBUG_OUT_STREAM stderr + +static void Print(const char *s) +{ + fputs(s, DEBUG_OUT_STREAM); +} + +static void PrintAligned(const char *s, size_t align) +{ + size_t len = strlen(s); + for(;;) + { + fputc(' ', DEBUG_OUT_STREAM); + if (len >= align) + break; + ++len; + } + Print(s); +} + +static void PrintLn(void) +{ + Print("\n"); +} + +static void PrintHex(UInt64 v, size_t align) +{ + char s[32]; + ConvertUInt64ToHex(v, s); + PrintAligned(s, align); +} + +static void PrintDec(int v, size_t align) +{ + char s[32]; + ConvertUInt64ToString((unsigned)v, s); + PrintAligned(s, align); +} + +static void PrintAddr(void *p) +{ + PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12); +} + + +#define PRINT_REALLOC(name, cnt, size, ptr) { \ + Print(name " "); \ + if (!ptr) PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#define PRINT_ALLOC(name, cnt, size, ptr) { \ + Print(name " "); \ + PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ + Print(name " "); \ + PrintDec(--cnt, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#else + +#ifdef _WIN32 +#define PRINT_ALLOC(name, cnt, size, ptr) +#endif +#define PRINT_FREE(name, cnt, ptr) +#define Print(s) +#define PrintLn() +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#define PrintHex(v, align) +#endif +#define PrintAddr(p) + +#endif + + +/* +by specification: + malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free() + realloc(NULL, size) : the call is equivalent to malloc(size) + realloc(non_NULL, 0) : the call is equivalent to free(ptr) + +in main compilers: + malloc(0) : returns non_NULL + realloc(NULL, 0) : returns non_NULL + realloc(non_NULL, 0) : returns NULL +*/ + + +void *MyAlloc(size_t size) +{ + if (size == 0) + return NULL; + // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL) + #ifdef SZ_ALLOC_DEBUG + { + void *p = malloc(size); + if (p) + { + PRINT_ALLOC("Alloc ", g_allocCount, size, p) + } + return p; + } + #else + return malloc(size); + #endif +} + +void MyFree(void *address) +{ + PRINT_FREE("Free ", g_allocCount, address) + + free(address); +} + +void *MyRealloc(void *address, size_t size) +{ + if (size == 0) + { + MyFree(address); + return NULL; + } + // PRINT_REALLOC("Realloc ", g_allocCount, size, address) + #ifdef SZ_ALLOC_DEBUG + { + void *p = realloc(address, size); + if (p) + { + PRINT_REALLOC("Realloc ", g_allocCount, size, address) + } + return p; + } + #else + return realloc(address, size); + #endif +} + + +#ifdef _WIN32 + +void *MidAlloc(size_t size) +{ + if (size == 0) + return NULL; + #ifdef SZ_ALLOC_DEBUG + { + void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p) + } + return p; + } + #else + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + #endif +} + +void MidFree(void *address) +{ + PRINT_FREE("Free-Mid", g_allocCountMid, address) + + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifdef Z7_LARGE_PAGES + +#ifdef MEM_LARGE_PAGES + #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES +#else + #define MY_MEM_LARGE_PAGES 0x20000000 +#endif + +extern +SIZE_T g_LargePageSize; +SIZE_T g_LargePageSize = 0; +typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); + +void SetLargePageSize(void) +{ + SIZE_T size; +#ifdef Z7_USE_DYN_GetLargePageMinimum +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + + const + Func_GetLargePageMinimum fn = + (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetLargePageMinimum"); + if (!fn) + return; + size = fn(); +#else + size = GetLargePageMinimum(); +#endif + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; +} + +#endif // Z7_LARGE_PAGES + +void *BigAlloc(size_t size) +{ + if (size == 0) + return NULL; + + PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL) + + #ifdef Z7_LARGE_PAGES + { + SIZE_T ps = g_LargePageSize; + if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + { + size_t size2; + ps--; + size2 = (size + ps) & ~ps; + if (size2 >= size) + { + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) + return p; + } + } + } + } + #endif + + return MidAlloc(size); +} + +void BigFree(void *address) +{ + PRINT_FREE("Free-Big", g_allocCountBig, address) + MidFree(address); +} + +#endif // _WIN32 + + +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); } +const ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +#ifdef _WIN32 +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); } +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; +const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; +#endif + +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR + +#define ADJUST_ALLOC_SIZE 0 +/* +#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) +*/ +/* + Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if + MyAlloc() can return address that is NOT multiple of sizeof(void *). +*/ + +/* + uintptr_t : C99 (optional) + : unsupported in VS6 +*/ +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; + +#if 0 \ + || (defined(__CHERI__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8)) +// for 128-bit pointers (cheri): +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1)))) +#else +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1)))) +#endif + +#endif + +#if !defined(_WIN32) \ + && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ + || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) + #define USE_posix_memalign +#endif + +#ifndef USE_posix_memalign +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) +#endif + +/* + This posix_memalign() is for test purposes only. + We also need special Free() function instead of free(), + if this posix_memalign() is used. +*/ + +/* +static int posix_memalign(void **ptr, size_t align, size_t size) +{ + size_t newSize = size + align; + void *p; + void *pAligned; + *ptr = NULL; + if (newSize < size) + return 12; // ENOMEM + p = MyAlloc(newSize); + if (!p) + return 12; // ENOMEM + pAligned = MY_ALIGN_PTR_UP_PLUS(p, align); + ((void **)pAligned)[-1] = p; + *ptr = pAligned; + return 0; +} +*/ + +/* + ALLOC_ALIGN_SIZE >= sizeof(void *) + ALLOC_ALIGN_SIZE >= cache_line_size +*/ + +#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) + +void *z7_AlignedAlloc(size_t size) +{ +#ifndef USE_posix_memalign + + void *p; + void *pAligned; + size_t newSize; + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + + newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + p = MyAlloc(newSize); + + if (!p) + return NULL; + pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE); + + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(p); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + ((void **)pAligned)[-1] = p; + + return pAligned; + +#else + + void *p; + if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) + return NULL; + + Print(" posix_memalign="); PrintAddr(p); + PrintLn(); + + return p; + +#endif +} + + +void z7_AlignedFree(void *address) +{ +#ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); +#else + free(address); +#endif +} + + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); +} + + +static void SzAlignedFree(ISzAllocPtr pp, void *address) +{ + UNUSED_VAR(pp) +#ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); +#else + free(address); +#endif +} + + +const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; + + + +/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#if 1 + #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +#else + // we can use this simplified code, + // if (CAlignOffsetAlloc::offset == (k * sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1]) +#endif +#endif + + +#if 0 +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#include +static void PrintPtr(const char *s, const void *p) +{ + const Byte *p2 = (const Byte *)&p; + unsigned i; + printf("%s %p ", s, p); + for (i = sizeof(p); i != 0;) + { + i--; + printf("%02x", p2[i]); + } + printf("\n"); +} +#endif +#endif + + +static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) +{ +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); +#else + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); + void *adr; + void *pAligned; + size_t newSize; + size_t extra; + size_t alignSize = (size_t)1 << p->numAlignBits; + + if (alignSize < sizeof(void *)) + alignSize = sizeof(void *); + + if (p->offset >= alignSize) + return NULL; + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + extra = p->offset & (sizeof(void *) - 1); + newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + adr = ISzAlloc_Alloc(p->baseAlloc, newSize); + + if (!adr) + return NULL; + + pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; + +#if 0 + printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size); + PrintPtr("base", adr); + PrintPtr("alig", pAligned); +#endif + + PrintLn(); + Print("- Aligned: "); + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(adr); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + REAL_BLOCK_PTR_VAR(pAligned) = adr; + + return pAligned; +#endif +} + + +static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) +{ +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + z7_AlignedFree(address); +#else + if (address) + { + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); + PrintLn(); + Print("- Aligned Free: "); + PrintLn(); + ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); + } +#endif +} + + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) +{ + p->vt.Alloc = AlignOffsetAlloc_Alloc; + p->vt.Free = AlignOffsetAlloc_Free; +} diff --git a/crnlib/lzma/Alloc.h b/crnlib/lzma/Alloc.h new file mode 100644 index 00000000..4c684613 --- /dev/null +++ b/crnlib/lzma/Alloc.h @@ -0,0 +1,76 @@ +/* Alloc.h -- Memory allocation functions +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_ALLOC_H +#define ZIP7_INC_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* + MyFree(NULL) : is allowed, as free(NULL) + MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL + MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL +MyRealloc() is similar to realloc() for the following cases: + MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr) + MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed + MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed +*/ + +void *MyAlloc(size_t size); +void MyFree(void *address); +void *MyRealloc(void *address, size_t size); + +void *z7_AlignedAlloc(size_t size); +void z7_AlignedFree(void *p); + +#ifdef _WIN32 + +#ifdef Z7_LARGE_PAGES +void SetLargePageSize(void); +#endif + +void *MidAlloc(size_t size); +void MidFree(void *address); +void *BigAlloc(size_t size); +void BigFree(void *address); + +/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */ + +#else + +#define MidAlloc(size) z7_AlignedAlloc(size) +#define MidFree(address) z7_AlignedFree(address) +#define BigAlloc(size) z7_AlignedAlloc(size) +#define BigFree(address) z7_AlignedFree(address) + +#endif + +extern const ISzAlloc g_Alloc; + +#ifdef _WIN32 +extern const ISzAlloc g_BigAlloc; +extern const ISzAlloc g_MidAlloc; +#else +#define g_BigAlloc g_AlignedAlloc +#define g_MidAlloc g_AlignedAlloc +#endif + +extern const ISzAlloc g_AlignedAlloc; + + +typedef struct +{ + ISzAlloc vt; + ISzAllocPtr baseAlloc; + unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */ + size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */ +} CAlignOffsetAlloc; + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); + + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Bcj2.c b/crnlib/lzma/Bcj2.c new file mode 100644 index 00000000..c562e47c --- /dev/null +++ b/crnlib/lzma/Bcj2.c @@ -0,0 +1,290 @@ +/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) +2023-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bcj2.h" +#include "CpuArch.h" + +#define kTopValue ((UInt32)1 << 24) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +// UInt32 bcj2_stats[256 + 2][2]; + +void Bcj2Dec_Init(CBcj2Dec *p) +{ + unsigned i; + p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; + p->ip = 0; + p->temp = 0; + p->range = 0; + p->code = 0; + for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) + p->probs[i] = kBitModelTotal >> 1; +} + +SRes Bcj2Dec_Decode(CBcj2Dec *p) +{ + UInt32 v = p->temp; + // const Byte *src; + if (p->range <= 5) + { + UInt32 code = p->code; + p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ + for (; p->range != 5; p->range++) + { + if (p->range == 1 && code != 0) + return SZ_ERROR_DATA; + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + return SZ_OK; + } + code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + p->code = code; + } + if (code == 0xffffffff) + return SZ_ERROR_DATA; + p->range = 0xffffffff; + } + // else + { + unsigned state = p->state; + // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop + if (BCJ2_IS_32BIT_STREAM(state)) + { + const Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return SZ_OK; + p->bufs[state] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v = GetBe32a(cur) - ip; + p->ip = ip; + } + state = BCJ2_DEC_STATE_ORIG_0; + } + if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) + { + Byte *dest = p->dest; + for (;;) + { + if (dest == p->destLim) + { + p->state = state; + p->temp = v; + return SZ_OK; + } + *dest++ = (Byte)v; + p->dest = dest; + if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) + break; + v >>= 8; + } + } + } + + // src = p->bufs[BCJ2_STREAM_MAIN]; + for (;;) + { + /* + if (BCJ2_IS_32BIT_STREAM(p->state)) + p->state = BCJ2_DEC_STATE_OK; + else + */ + { + if (p->range < kTopValue) + { + if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + p->temp = v; + return SZ_OK; + } + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + { + const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; + const Byte *srcLim; + Byte *dest = p->dest; + { + const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); + SizeT num = (SizeT)(p->destLim - dest); + if (num >= rem) + num = rem; + #define NUM_ITERS 4 + #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 + num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) + #else + num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) + #endif + srcLim = src + num; + } + + #define NUM_SHIFT_BITS 24 + #define ONE_ITER(indx) { \ + const unsigned b = src[indx]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + /* ++dest */; /* v = b; */ } + + if (src != srcLim) + for (;;) + { + /* The dependency chain of 2-cycle for (v) calculation is not big problem here. + But we can remove dependency chain with v = b in the end of loop. */ + ONE_ITER(0) + #if (NUM_ITERS > 1) + ONE_ITER(1) + #if (NUM_ITERS > 2) + ONE_ITER(2) + #if (NUM_ITERS > 3) + ONE_ITER(3) + #if (NUM_ITERS > 4) + ONE_ITER(4) + #if (NUM_ITERS > 5) + ONE_ITER(5) + #if (NUM_ITERS > 6) + ONE_ITER(6) + #if (NUM_ITERS > 7) + ONE_ITER(7) + #endif + #endif + #endif + #endif + #endif + #endif + #endif + + src += NUM_ITERS; + if (src == srcLim) + break; + } + + if (src == srcLim) + #if (NUM_ITERS > 1) + for (;;) + #endif + { + #if (NUM_ITERS > 1) + if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) + #endif + { + const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = src; + p->dest = dest; + p->ip += (UInt32)num; + /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + p->state = + src == p->lims[BCJ2_STREAM_MAIN] ? + (unsigned)BCJ2_STREAM_MAIN : + (unsigned)BCJ2_DEC_STATE_ORIG; + p->temp = v; + return SZ_OK; + } + #if (NUM_ITERS > 1) + ONE_ITER(0) + src++; + #endif + } + + { + const SizeT num = (SizeT)(dest - p->dest); + p->dest = dest; // p->dest += num; + p->bufs[BCJ2_STREAM_MAIN] += num; // = src; + p->ip += (UInt32)num; + } + { + UInt32 bound, ttt; + CBcj2Prob *prob; // unsigned index; + /* + prob = p->probs + (unsigned)((Byte)v == 0xe8 ? + 2 + (Byte)(v >> 8) : + ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); + */ + { + const unsigned c = ((v + 0x17) >> 6) & 1; + prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + // (Byte) + // 8x->0 : e9->1 : xxe8->xx+2 + // 8x->0x100 : e9->0x101 : xxe8->xx + // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); + // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); + } + ttt = *prob; + bound = (p->range >> kNumBitModelTotalBits) * ttt; + if (p->code < bound) + { + // bcj2_stats[prob - p->probs][0]++; + p->range = bound; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + continue; + } + { + // bcj2_stats[prob - p->probs][1]++; + p->range -= bound; + p->code -= bound; + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } + } + } + } + { + /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ + // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; + const Byte *cur = p->bufs[cj]; + Byte *dest; + SizeT rem; + if (cur == p->lims[cj]) + { + p->state = cj; + break; + } + v = GetBe32a(cur); + p->bufs[cj] = cur + 4; + { + const UInt32 ip = p->ip + 4; + v -= ip; + p->ip = ip; + } + dest = p->dest; + rem = (SizeT)(p->destLim - dest); + if (rem < 4) + { + if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; + if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; + if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} + p->temp = v; + p->dest = dest + rem; + p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; + break; + } + SetUi32(dest, v) + v >>= 24; + p->dest = dest + 4; + } + } + + if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC]) + { + p->range <<= 8; + p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; + } + return SZ_OK; +} + +#undef NUM_ITERS +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/crnlib/lzma/Bcj2.h b/crnlib/lzma/Bcj2.h new file mode 100644 index 00000000..2eda0c82 --- /dev/null +++ b/crnlib/lzma/Bcj2.h @@ -0,0 +1,332 @@ +/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2) +2023-03-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BCJ2_H +#define ZIP7_INC_BCJ2_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define BCJ2_NUM_STREAMS 4 + +enum +{ + BCJ2_STREAM_MAIN, + BCJ2_STREAM_CALL, + BCJ2_STREAM_JUMP, + BCJ2_STREAM_RC +}; + +enum +{ + BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS, + BCJ2_DEC_STATE_ORIG_1, + BCJ2_DEC_STATE_ORIG_2, + BCJ2_DEC_STATE_ORIG_3, + + BCJ2_DEC_STATE_ORIG, + BCJ2_DEC_STATE_ERROR /* after detected data error */ +}; + +enum +{ + BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, + BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */ +}; + + +/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */ +#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2) + +/* +CBcj2Dec / CBcj2Enc +bufs sizes: + BUF_SIZE(n) = lims[n] - bufs[n] +bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4: + (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 + (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 +*/ + +// typedef UInt32 CBcj2Prob; +typedef UInt16 CBcj2Prob; + +/* +BCJ2 encoder / decoder internal requirements: + - If last bytes of stream contain marker (e8/e8/0f8x), then + there is also encoded symbol (0 : no conversion) in RC stream. + - One case of overlapped instructions is supported, + if last byte of converted instruction is (0f) and next byte is (8x): + marker [xx xx xx 0f] 8x + then the pair (0f 8x) is treated as marker. +*/ + +/* ---------- BCJ2 Decoder ---------- */ + +/* +CBcj2Dec: +(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: + bufs[BCJ2_STREAM_MAIN] >= dest && + bufs[BCJ2_STREAM_MAIN] - dest >= + BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) + reserve = bufs[BCJ2_STREAM_MAIN] - dest - + ( BUF_SIZE(BCJ2_STREAM_CALL) + + BUF_SIZE(BCJ2_STREAM_JUMP) ) + and additional conditions: + if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve != 1) : if (ver < v23.00) + } + else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init()) + { + (reserve >= 6) : if (ver < v23.00) + (reserve >= 4) : if (ver >= v23.00) + We need that (reserve) because after first call of Bcj2Dec_Decode(), + CBcj2Dec::temp can contain up to 4 bytes for writing to (dest). + } + (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode(). + (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers, + and (reserve) is calculated from full (final) sizes of all streams before first call. +*/ + +typedef struct +{ + const Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + Byte *dest; + const Byte *destLim; + + unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ + + UInt32 ip; /* property of starting base for decoding */ + UInt32 temp; /* Byte temp[4]; */ + UInt32 range; + UInt32 code; + CBcj2Prob probs[2 + 256]; +} CBcj2Dec; + + +/* Note: + Bcj2Dec_Init() sets (CBcj2Dec::ip = 0) + if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init() +*/ +void Bcj2Dec_Init(CBcj2Dec *p); + + +/* Bcj2Dec_Decode(): + returns: + SZ_OK + SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct +*/ +SRes Bcj2Dec_Decode(CBcj2Dec *p); + +/* To check that decoding was finished you can compare + sizes of processed streams with sizes known from another sources. + You must do at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. + additional optional checks: + - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC) + - the checks Bcj2Dec_IsMaybeFinished*() + also before actual decoding you can check that the + following condition is met for stream sizes: + ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) ) +*/ + +/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for + additional input data in BCJ2_STREAM_MAIN stream. + Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN) + +/* if the stream decoding was finished correctly, then range decoder + part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0). + Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding. +*/ +#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0) + +/* use Bcj2Dec_IsMaybeFinished() only as additional check + after at least one mandatory check from the two following options: + - the check for size of processed output (ORIG) stream. + - the check for size of processed input (MAIN) stream. +*/ +#define Bcj2Dec_IsMaybeFinished(_p_) ( \ + Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \ + Bcj2Dec_IsMaybeFinished_code(_p_)) + + + +/* ---------- BCJ2 Encoder ---------- */ + +typedef enum +{ + BCJ2_ENC_FINISH_MODE_CONTINUE, + BCJ2_ENC_FINISH_MODE_END_BLOCK, + BCJ2_ENC_FINISH_MODE_END_STREAM +} EBcj2Enc_FinishMode; + +/* + BCJ2_ENC_FINISH_MODE_CONTINUE: + process non finished encoding. + It notifies the encoder that additional further calls + can provide more input data (src) than provided by current call. + In that case the CBcj2Enc encoder still can move (src) pointer + up to (srcLim), but CBcj2Enc encoder can store some of the last + processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer. + at return: + (CBcj2Enc::src will point to position that includes + processed data and data copied to (temp[]) buffer) + That data from (temp[]) buffer will be used in further calls. + + BCJ2_ENC_FINISH_MODE_END_BLOCK: + finish encoding of current block (ended at srcLim) without RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) && + CBcj2Enc::src == CBcj2Enc::srcLim) + : it shows that block encoding was finished. And the encoder is + ready for new (src) data or for stream finish operation. + finished block means + { + CBcj2Enc has completed block encoding up to (srcLim). + (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will + not cross block boundary at (srcLim). + temporary CBcj2Enc buffer for (ORIG) src data is empty. + 3 output uncompressed streams (MAIN, CALL, JUMP) were flushed. + RC stream was not flushed. And RC stream will cross block boundary. + } + Note: some possible implementation of BCJ2 encoder could + write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(), + and it could calculate symbol for RC in another call of Bcj2Enc_Encode(). + BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol. + And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls. + So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK + to ensure that RC symbol is calculated and written in proper block. + + BCJ2_ENC_FINISH_MODE_END_STREAM + finish encoding of stream (ended at srcLim) fully including RC flushing. + at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED) + : it shows that stream encoding was finished fully, + and all output streams were flushed fully. + also Bcj2Enc_IsFinished() can be called. +*/ + + +/* + 32-bit relative offset in JUMP/CALL commands is + - (mod 4 GiB) for 32-bit x86 code + - signed Int32 for 64-bit x86-64 code + BCJ2 encoder also does internal relative to absolute address conversions. + And there are 2 possible ways to do it: + before v23: we used 32-bit variables and (mod 4 GiB) conversion + since v23: we use 64-bit variables and (signed Int32 offset) conversion. + The absolute address condition for conversion in v23: + ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64) + note that if (fileSize64 > 2 GiB). there is difference between + old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23). + And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases. +*/ + +/* +// for old (v22) way for conversion: +typedef UInt32 CBcj2Enc_ip_unsigned; +typedef Int32 CBcj2Enc_ip_signed; +#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31) +*/ +typedef UInt64 CBcj2Enc_ip_unsigned; +typedef Int64 CBcj2Enc_ip_signed; + +/* maximum size of file that can be used for conversion condition */ +#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2) + +/* default value of fileSize64_minus1 variable that means + that absolute address limitation will not be used */ +#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1) + +/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */ +#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \ + ((CBcj2Enc_ip_unsigned)(fileSize) - 1) + +/* set CBcj2Enc::fileSize64_minus1 variable from size of file */ +#define Bcj2Enc_SET_FileSize(p, fileSize) \ + (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize); + + +typedef struct +{ + Byte *bufs[BCJ2_NUM_STREAMS]; + const Byte *lims[BCJ2_NUM_STREAMS]; + const Byte *src; + const Byte *srcLim; + + unsigned state; + EBcj2Enc_FinishMode finishMode; + + Byte context; + Byte flushRem; + Byte isFlushState; + + Byte cache; + UInt32 range; + UInt64 low; + UInt64 cacheSize; + + // UInt32 context; // for marker version, it can include marker flag. + + /* (ip64) and (fileIp64) correspond to virtual source stream position + that doesn't include data in temp[] */ + CBcj2Enc_ip_unsigned ip64; /* current (ip) position */ + CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */ + CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */ + UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */ + // UInt32 relatExcludeBits; + + UInt32 tempTarget; + unsigned tempPos; /* the number of bytes that were copied to temp[] buffer + (tempPos <= 4) outside of Bcj2Enc_Encode() */ + // Byte temp[4]; // for marker version + Byte temp[8]; + CBcj2Prob probs[2 + 256]; +} CBcj2Enc; + +void Bcj2Enc_Init(CBcj2Enc *p); + + +/* +Bcj2Enc_Encode(): at exit: + p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream + (bufs[p->State] == lims[p->State]) + p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream + (src == srcLim) + p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream +*/ +void Bcj2Enc_Encode(CBcj2Enc *p); + +/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream. + CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer. + (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after + fully processed data and after data copied to temp buffer. + So if the caller needs to get real number of fully processed input + bytes (without look ahead data in temp buffer), + the caller must subtruct (CBcj2Enc::tempPos) value from processed size + value that is calculated based on current (CBcj2Enc::src): + cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) - + Bcj2Enc_Get_AvailInputSize_in_Temp(&enc); +*/ +/* get the size of input data that was stored in temp[] buffer: */ +#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos) + +#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0) + +/* Note : the decoder supports overlapping of marker (0f 80). + But we can eliminate such overlapping cases by setting + the limit for relative offset conversion as + CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB) +*/ +/* default value for CBcj2Enc::relatLimit */ +#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24) +#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31) +// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5 + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Bcj2Enc.c b/crnlib/lzma/Bcj2Enc.c new file mode 100644 index 00000000..155cdae1 --- /dev/null +++ b/crnlib/lzma/Bcj2Enc.c @@ -0,0 +1,506 @@ +/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2) +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +/* #define SHOW_STAT */ +#ifdef SHOW_STAT +#include +#define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src)); +#else +#define PRF2(s) +#endif + +#include "Bcj2.h" +#include "CpuArch.h" + +#define kTopValue ((UInt32)1 << 24) +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +void Bcj2Enc_Init(CBcj2Enc *p) +{ + unsigned i; + p->state = BCJ2_ENC_STATE_ORIG; + p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; + p->context = 0; + p->flushRem = 5; + p->isFlushState = 0; + p->cache = 0; + p->range = 0xffffffff; + p->low = 0; + p->cacheSize = 1; + p->ip64 = 0; + p->fileIp64 = 0; + p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED; + p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT; + // p->relatExcludeBits = 0; + p->tempPos = 0; + for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) + p->probs[i] = kBitModelTotal >> 1; +} + +// Z7_NO_INLINE +Z7_FORCE_INLINE +static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p) +{ + const UInt32 low = (UInt32)p->low; + const unsigned high = (unsigned) + #if defined(Z7_MSC_VER_ORIGINAL) \ + && defined(MY_CPU_X86) \ + && defined(MY_CPU_LE) \ + && !defined(MY_CPU_64BIT) + // we try to rid of __aullshr() call in MSVS-x86 + (((const UInt32 *)&p->low)[1]); // [1] : for little-endian only + #else + (p->low >> 32); + #endif + if (low < (UInt32)0xff000000 || high != 0) + { + Byte *buf = p->bufs[BCJ2_STREAM_RC]; + do + { + if (buf == p->lims[BCJ2_STREAM_RC]) + { + p->state = BCJ2_STREAM_RC; + p->bufs[BCJ2_STREAM_RC] = buf; + return True; + } + *buf++ = (Byte)(p->cache + high); + p->cache = 0xff; + } + while (--p->cacheSize); + p->bufs[BCJ2_STREAM_RC] = buf; + p->cache = (Byte)(low >> 24); + } + p->cacheSize++; + p->low = low << 8; + return False; +} + + +/* +We can use 2 alternative versions of code: +1) non-marker version: + Byte CBcj2Enc::context + Byte temp[8]; + Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer. + Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction + with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call. + +2) marker version: + UInt32 CBcj2Enc::context + Byte CBcj2Enc::temp[4]; + MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker. + it's allowed that + one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest, + and another call of Bcj2Enc_Encode_2() does offset conversion. + So different values of (fileIp) and (fileSize) are possible + in these different Bcj2Enc_Encode_2() calls. + +Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop. +So we use non-marker version. +*/ + +/* + Corner cases with overlap in multi-block. + before v23: there was one corner case, where converted instruction + could start in one sub-stream and finish in next sub-stream. + If multi-block (solid) encoding is used, + and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream. + and (0f) is last byte of previous sub-stream + and (8x) is first byte of current sub-stream + then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder. + BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage, + if that offset meets limit requirements. + If encoder allows 32-bit offset conversion for such overlap case, + then the data in 3 uncompressed BCJ2 streams for some sub-stream + can depend from data of previous sub-stream. + That corner case is not big problem, and it's rare case. + Since v23.00 we do additional check to prevent conversions in such overlap cases. +*/ + +/* + Bcj2Enc_Encode_2() output variables at exit: + { + if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG)) + { + it means that encoder needs more input data. + if (p->srcLim == p->src) at exit, then + { + (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) + all input data were read and processed, and we are ready for + new input data. + } + else + { + (p->srcLim != p->src) + (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) + The encoder have found e8/e9/0f_8x marker, + and p->src points to last byte of that marker, + Bcj2Enc_Encode_2() needs more input data to get totally + 5 bytes (last byte of marker and 32-bit branch offset) + as continuous array starting from p->src. + (p->srcLim - p->src < 5) requirement is met after exit. + So non-processed resedue from p->src to p->srcLim is always less than 5 bytes. + } + } + } +*/ + +Z7_NO_INLINE +static void Bcj2Enc_Encode_2(CBcj2Enc *p) +{ + if (!p->isFlushState) + { + const Byte *src; + UInt32 v; + { + const unsigned state = p->state; + if (BCJ2_IS_32BIT_STREAM(state)) + { + Byte *cur = p->bufs[state]; + if (cur == p->lims[state]) + return; + SetBe32a(cur, p->tempTarget) + p->bufs[state] = cur + 4; + } + } + p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit + src = p->src; + v = p->context; + + // #define WRITE_CONTEXT p->context = v; // for marker version + #define WRITE_CONTEXT p->context = (Byte)v; + #define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT + + for (;;) + { + // const Byte *src; + // UInt32 v; + CBcj2Enc_ip_unsigned ip; + if (p->range < kTopValue) + { + // to reduce register pressure and code size: we save and restore local variables. + WRITE_CONTEXT_AND_SRC + if (Bcj2_RangeEnc_ShiftLow(p)) + return; + p->range <<= 8; + src = p->src; + v = p->context; + } + // src = p->src; + // #define MARKER_FLAG ((UInt32)1 << 17) + // if ((v & MARKER_FLAG) == 0) // for marker version + { + const Byte *srcLim; + Byte *dest = p->bufs[BCJ2_STREAM_MAIN]; + { + const SizeT remSrc = (SizeT)(p->srcLim - src); + SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); + if (rem >= remSrc) + rem = remSrc; + srcLim = src + rem; + } + /* p->context contains context of previous byte: + bits [0 : 7] : src[-1], if (src) was changed in this call + bits [8 : 31] : are undefined for non-marker version + */ + // v = p->context; + #define NUM_SHIFT_BITS 24 + #define CONV_FLAG ((UInt32)1 << 16) + #define ONE_ITER { \ + b = src[0]; \ + *dest++ = (Byte)b; \ + v = (v << NUM_SHIFT_BITS) | b; \ + if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ + if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ + ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ + src++; if (src == srcLim) { break; } } + + if (src != srcLim) + for (;;) + { + /* clang can generate ineffective code with setne instead of two jcc instructions. + we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */ + unsigned b; + ONE_ITER + ONE_ITER + } + + ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]); + p->bufs[BCJ2_STREAM_MAIN] = dest; + p->ip64 = ip; + + if (src == srcLim) + { + WRITE_CONTEXT_AND_SRC + if (src != p->srcLim) + { + p->state = BCJ2_STREAM_MAIN; + return; + } + /* (p->src == p->srcLim) + (p->state == BCJ2_ENC_STATE_ORIG) */ + if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) + return; + /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */ + // (p->flushRem == 5); + p->isFlushState = 1; + break; + } + src++; + // p->src = src; + } + // ip = p->ip; // for marker version + /* marker was found */ + /* (v) contains marker that was found: + bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7] + : value of src[-2] : xx/xx/0f + bits [0 : 7] : value of src[-1] : e8/e9/8x + */ + { + { + #if NUM_SHIFT_BITS != 24 + v &= ~(UInt32)CONV_FLAG; + #endif + // UInt32 relat = 0; + if ((SizeT)(p->srcLim - src) >= 4) + { + /* + if (relat != 0 || (Byte)v != 0xe8) + BoolInt isBigOffset = True; + */ + const UInt32 relat = GetUi32(src); + /* + #define EXCLUDE_FLAG ((UInt32)1 << 4) + #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0) + if (p->relatExcludeBits != 0) + { + const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1); + isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0); + } + // isBigOffset = False; // for debug + */ + ip -= p->fileIp64; + // Use the following if check, if (ip) is 64-bit: + if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9) + if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1) + if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit) + v |= CONV_FLAG; + } + else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE) + { + // (p->srcLim - src < 4) + // /* + // for non-marker version + p->ip64--; // p->ip = ip - 1; + p->bufs[BCJ2_STREAM_MAIN]--; + src--; + v >>= NUM_SHIFT_BITS; + // (0 < p->srcLim - p->src <= 4) + // */ + // v |= MARKER_FLAG; // for marker version + /* (p->state == BCJ2_ENC_STATE_ORIG) */ + WRITE_CONTEXT_AND_SRC + return; + } + { + const unsigned c = ((v + 0x17) >> 6) & 1; + CBcj2Prob *prob = p->probs + (unsigned) + (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); + /* + ((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) : + ((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1)); + */ + const unsigned ttt = *prob; + const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt; + if ((v & CONV_FLAG) == 0) + { + // static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy); + // v = (Byte)v; // for marker version + p->range = bound; + *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + // WRITE_CONTEXT_AND_SRC + continue; + } + p->low += bound; + p->range -= bound; + *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); + } + // p->context = src[3]; + { + // const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP); + const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; + ip = p->ip64; + v = GetUi32(src); // relat + ip += 4; + p->ip64 = ip; + src += 4; + // p->src = src; + { + const UInt32 absol = (UInt32)ip + v; + Byte *cur = p->bufs[cj]; + v >>= 24; + // WRITE_CONTEXT + if (cur == p->lims[cj]) + { + p->state = cj; + p->tempTarget = absol; + WRITE_CONTEXT_AND_SRC + return; + } + SetBe32a(cur, absol) + p->bufs[cj] = cur + 4; + } + } + } + } + } // end of loop + } + + for (; p->flushRem != 0; p->flushRem--) + if (Bcj2_RangeEnc_ShiftLow(p)) + return; + p->state = BCJ2_ENC_STATE_FINISHED; +} + + +/* +BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer. +So base function Bcj2Enc_Encode_2() + in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with + (p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim) +Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer. + so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(), + then (p->src == p->srcLim). + And the caller's code is simpler with Bcj2Enc_Encode(). +*/ + +Z7_NO_INLINE +void Bcj2Enc_Encode(CBcj2Enc *p) +{ + PRF2("\n----") + if (p->tempPos != 0) + { + /* extra: number of bytes that were copied from (src) to (temp) buffer in this call */ + unsigned extra = 0; + /* We will touch only minimal required number of bytes in input (src) stream. + So we will add input bytes from (src) stream to temp[] with step of 1 byte. + We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call + in first loop iteration because + - previous call of Bcj2Enc_Encode() could use another (finishMode), + - previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG). + the case with full temp[] buffer (p->tempPos == 4) is possible here. + */ + for (;;) + { + // (0 < p->tempPos <= 5) // in non-marker version + /* p->src : the current src data position including extra bytes + that were copied to temp[] buffer in this call */ + const Byte *src = p->src; + const Byte *srcLim = p->srcLim; + const EBcj2Enc_FinishMode finishMode = p->finishMode; + if (src != srcLim) + { + /* if there are some src data after the data copied to temp[], + then we use MODE_CONTINUE for temp data */ + p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; + } + p->src = p->temp; + p->srcLim = p->temp + p->tempPos; + PRF2(" ") + Bcj2Enc_Encode_2(p); + { + const unsigned num = (unsigned)(p->src - p->temp); + const unsigned tempPos = p->tempPos - num; + unsigned i; + p->tempPos = tempPos; + for (i = 0; i < tempPos; i++) + p->temp[i] = p->temp[(SizeT)i + num]; + // tempPos : number of bytes in temp buffer + p->src = src; + p->srcLim = srcLim; + p->finishMode = finishMode; + if (p->state != BCJ2_ENC_STATE_ORIG) + { + // (p->tempPos <= 4) // in non-marker version + /* if (the reason of exit from Bcj2Enc_Encode_2() + is not BCJ2_ENC_STATE_ORIG), + then we exit from Bcj2Enc_Encode() with same reason */ + // optional code begin : we rollback (src) and tempPos, if it's possible: + if (extra >= tempPos) + extra = tempPos; + p->src = src - extra; + p->tempPos = tempPos - extra; + // optional code end : rollback of (src) and tempPos + return; + } + /* (p->tempPos <= 4) + (p->state == BCJ2_ENC_STATE_ORIG) + so encoder needs more data than in temp[] */ + if (src == srcLim) + return; // src buffer has no more input data. + /* (src != srcLim) + so we can provide more input data from src for Bcj2Enc_Encode_2() */ + if (extra >= tempPos) + { + /* (extra >= tempPos) means that temp buffer contains + only data from src buffer of this call. + So now we can encode without temp buffer */ + p->src = src - tempPos; // rollback (src) + p->tempPos = 0; + break; + } + // we append one additional extra byte from (src) to temp[] buffer: + p->temp[tempPos] = *src; + p->tempPos = tempPos + 1; + // (0 < p->tempPos <= 5) // in non-marker version + p->src = src + 1; + extra++; + } + } + } + + PRF2("++++") + // (p->tempPos == 0) + Bcj2Enc_Encode_2(p); + PRF2("====") + + if (p->state == BCJ2_ENC_STATE_ORIG) + { + const Byte *src = p->src; + const Byte *srcLim = p->srcLim; + const unsigned rem = (unsigned)(srcLim - src); + /* (rem <= 4) here. + if (p->src != p->srcLim), then + - we copy non-processed bytes from (p->src) to temp[] buffer, + - we set p->src equal to p->srcLim. + */ + if (rem) + { + unsigned i = 0; + p->src = srcLim; + p->tempPos = rem; + // (0 < p->tempPos <= 4) + do + p->temp[i] = src[i]; + while (++i != rem + 1); + } + // (p->tempPos <= 4) + // (p->src == p->srcLim) + } +} + +#undef PRF2 +#undef CONV_FLAG +#undef MARKER_FLAG +#undef WRITE_CONTEXT +#undef WRITE_CONTEXT_AND_SRC +#undef ONE_ITER +#undef NUM_SHIFT_BITS +#undef kTopValue +#undef kNumBitModelTotalBits +#undef kBitModelTotal +#undef kNumMoveBits diff --git a/crnlib/lzma/Bra.c b/crnlib/lzma/Bra.c new file mode 100644 index 00000000..818cc78b --- /dev/null +++ b/crnlib/lzma/Bra.c @@ -0,0 +1,709 @@ +/* Bra.c -- Branch converters for RISC code +2024-01-20 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "RotateDefs.h" +#include "CpuArch.h" + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV(name) z7_ ## name + +#define Z7_BRANCH_FUNC_MAIN(name) \ +static \ +Z7_FORCE_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) + +#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ + { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ + +#ifdef Z7_EXTRACT_ONLY +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) +#else +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) +#endif + +#if defined(__clang__) +#define BR_EXTERNAL_FOR +#define BR_NEXT_ITERATION continue; +#else +#define BR_EXTERNAL_FOR for (;;) +#define BR_NEXT_ITERATION break; +#endif + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + // GCC is not good for __builtin_expect() here + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // #define Z7_unlikely [[unlikely]] + // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_likely [[likely]] +#else + // #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << (24 - 4); + const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); + size &= ~(SizeT)3; + // if (size == 0) return p; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + BR_EXTERNAL_FOR + { + // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (;;) + { + UInt32 v; + if Z7_UNLIKELY(p == lim) + return p; + v = GetUi32a(p); + p += 4; + if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) + { + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x03ffffff; + v |= 0x94000000; + SetUi32a(p - 4, v) + BR_NEXT_ITERATION + } + // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) + v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) + { + UInt32 z, c; + // v = rotrFixed(v, 8); + v += flag; if Z7_UNLIKELY(v & mask) continue; + z = (v & 0xffffffe0) | (v >> 26); + c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; + BR_CONVERT_VAL(z, c) + v &= 0x1f; + v |= 0x90000000; + v |= z << 26; + v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); + SetUi32a(p - 4, v) + } + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to next instruction */ + pc += 8 - 4; + + for (;;) + { + for (;;) + { + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + } + { + UInt32 v = GetUi32a(p - 4); + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x00ffffff; + v |= 0xeb000000; + SetUi32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + // v = GetBe32a(p); + v = *(UInt32 *)(void *)p; + p += 4; + // if ((v & 0xfc000003) == 0x48000001) break; + // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; + if Z7_UNLIKELY( + ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) + & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; + } + { + v = Z7_CONV_NATIVE_TO_BE_32(v); + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= 0x03ffffff; + v |= 0x48000000; + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) + + +#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED +#define BR_SPARC_USE_ROTATE +#endif + +Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) +{ + // Byte *p = data; + const Byte *lim; + const UInt32 flag = (UInt32)1 << 22; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + for (;;) + { + UInt32 v; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + /* // the code without GetBe32a(): + { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } + */ + v = GetBe32a(p); + p += 4; + #ifdef BR_SPARC_USE_ROTATE + v = rotlFixed(v, 2); + v += (flag << 2) - 1; + if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) + #else + v += (UInt32)5 << 29; + v ^= (UInt32)7 << 29; + v += flag; + if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) + #endif + break; + } + { + // UInt32 v = GetBe32a(p - 4); + #ifndef BR_SPARC_USE_ROTATE + v <<= 2; + #endif + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= (flag << 3) - 1; + #ifdef BR_SPARC_USE_ROTATE + v -= (flag << 2) - 1; + v = rotrFixed(v, 2); + #else + v -= (flag << 2); + v >>= 2; + v |= (UInt32)1 << 30; + #endif + SetBe32a(p - 4, v) + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) +{ + // Byte *p = data; + Byte *lim; + size &= ~(SizeT)1; + // if (size == 0) return p; + if (size <= 2) return p; + size -= 2; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to the +2 instructions from current instruction */ + // pc += 4 - 4; + // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; + // #define ARMT_TAIL_PROC { goto armt_tail; } + #define ARMT_TAIL_PROC { return p; } + + do + { + /* in MSVC 32-bit x86 compilers: + UInt32 version : it loads value from memory with movzx + Byte version : it loads value to 8-bit register (AL/CL) + movzx version is slightly faster in some cpus + */ + unsigned b1; + // Byte / unsigned + b1 = p[1]; + // optimized version to reduce one (p >= lim) check: + // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) + for (;;) + { + unsigned b3; // Byte / UInt32 + /* (Byte)(b3) normalization can use low byte computations in MSVC. + It gives smaller code, and no loss of speed in some compilers/cpus. + But new MSVC 32-bit x86 compilers use more slow load + from memory to low byte register in that case. + So we try to use full 32-bit computations for faster code. + */ + // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; + } + { + /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. + But gcc/clang for arm64 can use bfi instruction for full code here */ + UInt32 v = + ((UInt32)GetUi16a(p - 2) << 11) | + ((UInt32)GetUi16a(p) & 0x7FF); + /* + UInt32 v = + ((UInt32)p[1 - 2] << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + */ + p += 2; + { + UInt32 c = BR_PC_GET >> 1; + BR_CONVERT_VAL(v, c) + } + SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) + SetUi16a(p - 2, (UInt16)(v | 0xf800)) + /* + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xf8 | (v >> 8)); + */ + } + } + while (p < lim); + return p; + // armt_tail: + // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; + // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); + // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); + // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) + + +// #define BR_IA64_NO_INLINE + +Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)15; + lim = p + size; + pc -= 1 << 4; + pc >>= 4 - 1; + // pc -= 1 << 1; + + for (;;) + { + unsigned m; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); + p += 16; + pc += 1 << 1; + if (m &= 3) + break; + } + { + p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. + do + { + const UInt32 t = + #if defined(MY_CPU_X86_OR_AMD64) + // we use 32-bit load here to reduce code size on x86: + GetUi32(p); + #else + GetUi16(p); + #endif + UInt32 z = GetUi32(p + 1) >> m; + p += 5; + if (((t >> m) & (0x70 << 1)) == 0 + && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) + { + UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; + z ^= v; + #ifdef BR_IA64_NO_INLINE + v |= (v & ((UInt32)1 << (23 + 1))) >> 3; + { + UInt32 c = pc; + BR_CONVERT_VAL(v, c) + } + v &= (0x1fffff << 1) | 1; + #else + { + if (encoding) + { + // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits + pc &= (0x1fffff << 1) | 1; + v += pc; + } + else + { + // pc |= 0xc00000 << 1; // we need to set at least 2 bits + pc |= ~(UInt32)((0x1fffff << 1) | 1); + v -= pc; + } + } + v &= ~(UInt32)(0x600000 << 1); + #endif + v += (0x700000 << 1); + v &= (0x8fffff << 1) | 1; + z |= v; + z <<= m; + SetUi32(p + 1 - 5, z) + } + m++; + } + while (m &= 3); // while (m < 4); + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) + + +#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; +#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; + +#if 1 && defined(MY_CPU_LE_UNALIGN) + #define RISCV_USE_UNALIGNED_LOAD +#endif + +#ifdef RISCV_USE_UNALIGNED_LOAD + #define RISCV_GET_UI32(p) GetUi32(p) + #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } +#else + #define RISCV_GET_UI32(p) \ + ((UInt32)GetUi16a(p) + \ + ((UInt32)GetUi16a((p) + 2) << 16)) + #define RISCV_SET_UI32(p, v) { \ + SetUi16a(p, (UInt16)(v)) \ + SetUi16a((p) + 2, (UInt16)(v >> 16)) } +#endif + +#if 1 && defined(MY_CPU_LE) + #define RISCV_USE_16BIT_LOAD +#endif + +#ifdef RISCV_USE_16BIT_LOAD + #define RISCV_LOAD_VAL(p) GetUi16a(p) +#else + #define RISCV_LOAD_VAL(p) (*(p)) +#endif + +#define RISCV_INSTR_SIZE 2 +#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) +#define RISCV_STEP_2 4 +#define RISCV_REG_VAL (2 << 7) +#define RISCV_CMD_VAL 3 +#if 1 + // for code size optimization: + #define RISCV_DELTA_7F 0x7f +#else + #define RISCV_DELTA_7F 0 +#endif + +#define RISCV_CHECK_1(v, b) \ + (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) + +#if 1 + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ + << 18) \ + < ((r) & 0x1d)) +#else + // this branch gives larger code, because + // compilers generate larger code for big constants. + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + < ((r) & 0x1d)) +#endif + + +#define RISCV_SCAN_LOOP \ + Byte *lim; \ + size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ + if (size <= 6) return p; \ + size -= 6; \ + lim = p + size; \ + BR_PC_INIT \ + for (;;) \ + { \ + UInt32 a, v; \ + /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ + for (;;) \ + { \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ + if ((a & 0x77) == 0) break; \ + a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ + p += RISCV_INSTR_SIZE * 2; \ + if ((a & 0x77) == 0) \ + { \ + p -= RISCV_INSTR_SIZE; \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + break; \ + } \ + } +// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL +// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL +// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC +// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC + +Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP + v = a; + a = RISCV_GET_UI32(p); +#ifndef RISCV_USE_16BIT_LOAD + v += (UInt32)p[1] << 8; +#endif + + if ((v & 8) == 0) // JAL + { + if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + v = ((a & 1u << 31) >> 11) + | ((a & 0x3ff << 21) >> 20) + | ((a & 1 << 20) >> 9) + | (a & 0xff << 12); + BR_CONVERT_VAL_ENC(v) + // ((v & 1) == 0) + // v: bits [1 : 20] contain offset bits +#if 0 && defined(RISCV_USE_UNALIGNED_LOAD) + a &= 0xfff; + a |= ((UInt32)(v << 23)) + | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) + | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); + RISCV_SET_UI32(p, a) +#else // aligned +#if 0 + SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) +#else + p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v <<= 15; + v = Z7_BSWAP32(v); + SetUi16a(p + 2, (UInt16)v) +#else + p[2] = (Byte)(v >> 9); + p[3] = (Byte)(v >> 1); +#endif +#endif // aligned + } + p += 4; + continue; + } // JAL + + { + // AUIPC + if (v & 0xe80) // (not x0) and (not x2) + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (RISCV_CHECK_1(v, b)) + { + { + const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, temp) + } + a &= 0xfffff000; + { +#if 1 + const int t = -1 >> 1; + if (t != -1) + a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation + else +#endif + a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). + } + BR_CONVERT_VAL_ENC(a) +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + a = Z7_BSWAP32(a); + RISCV_SET_UI32(p + 4, a) +#else + SetBe32(p + 4, a) +#endif + p += 8; + } + else + p += RISCV_STEP_1; + } + else + { + UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + v = RISCV_GET_UI32(p + 4); + r = (r << 7) + 0x17 + (v & 0xfffff000); + a = (a >> 12) | (v << 20); + RISCV_SET_UI32(p, r) + RISCV_SET_UI32(p + 4, a) + p += 8; + } + else + p += RISCV_STEP_2; + } + } + } // for +} + + +Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP +#ifdef RISCV_USE_16BIT_LOAD + if ((a & 8) == 0) + { +#else + v = a; + a += (UInt32)p[1] << 8; + if ((v & 8) == 0) + { +#endif + // JAL + a -= 0x100 - RISCV_DELTA_7F; + if (a & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; +#if 0 // unaligned + a = GetUi32(p); + v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) + | (UInt32)(a >> 7) & ((UInt32)0xff << 9) +#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v = GetUi16a(p + 2); + v = Z7_BSWAP32(v) >> 15 +#else + v = (UInt32)p[3] << 1 + | (UInt32)p[2] << 9 +#endif + | (UInt32)((a & 0xf000) << 5); + BR_CONVERT_VAL_DEC(v) + a = a_old + | (v << 11 & 1u << 31) + | (v << 20 & 0x3ff << 21) + | (v << 9 & 1 << 20) + | (v & 0xff << 12); + RISCV_SET_UI32(p, a) + } + p += 4; + continue; + } // JAL + + { + // AUIPC + v = a; +#if 1 && defined(RISCV_USE_UNALIGNED_LOAD) + a = GetUi32(p); +#else + a |= (UInt32)GetUi16a(p + 2) << 16; +#endif + if ((v & 0xe80) == 0) // x0/x2 + { + const UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + UInt32 b; +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + b = RISCV_GET_UI32(p + 4); + b = Z7_BSWAP32(b); +#else + b = GetBe32(p + 4); +#endif + v = a >> 12; + BR_CONVERT_VAL_DEC(b) + a = (r << 7) + 0x17; + a += (b + 0x800) & 0xfffff000; + v |= b << 20; + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + else + p += RISCV_STEP_2; + } + else + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (!RISCV_CHECK_1(v, b)) + p += RISCV_STEP_1; + else + { + v = (a & 0xfffff000) | (b >> 20); + a = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + } + } + } // for +} diff --git a/crnlib/lzma/Bra.h b/crnlib/lzma/Bra.h new file mode 100644 index 00000000..060ee170 --- /dev/null +++ b/crnlib/lzma/Bra.h @@ -0,0 +1,105 @@ +/* Bra.h -- Branch converters for executables +2024-01-20 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_BRA_H +#define ZIP7_INC_BRA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define PPC BAD_PPC_11 // for debug */ + +#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc +#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec +#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc + +#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc) +#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state) + +typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); +typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); + +#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86)); + +#define Z7_BRANCH_FUNCS_DECL(name) \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name)); + +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT) +Z7_BRANCH_FUNCS_DECL (BranchConv_PPC) +Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC) +Z7_BRANCH_FUNCS_DECL (BranchConv_IA64) +Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV) + +/* +These functions convert data that contain CPU instructions. +Each such function converts relative addresses to absolute addresses in some +branch instructions: CALL (in all converters) and JUMP (X86 converter only). +Such conversion allows to increase compression ratio, if we compress that data. + +There are 2 types of converters: + Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc); + Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state); +Each Converter supports 2 versions: one for encoding +and one for decoding (_Enc/_Dec postfixes in function name). + +In params: + data : data buffer + size : size of data + pc : current virtual Program Counter (Instruction Pointer) value +In/Out param: + state : pointer to state variable (for X86 converter only) + +Return: + The pointer to position in (data) buffer after last byte that was processed. + If the caller calls converter again, it must call it starting with that position. + But the caller is allowed to move data in buffer. So pointer to + current processed position also will be changed for next call. + Also the caller must increase internal (pc) value for next call. + +Each converter has some characteristics: Endian, Alignment, LookAhead. + Type Endian Alignment LookAhead + + X86 little 1 4 + ARMT little 2 2 + RISCV little 2 6 + ARM little 4 0 + ARM64 little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + (data) must be aligned for (Alignment). + processed size can be calculated as: + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0) + it means that converter needs more data for processing. + If (size < Alignment + LookAhead) + then (processed == 0) is allowed. + +Example code for conversion in loop: + UInt32 pc = 0; + size = 0; + for (;;) + { + size += Load_more_input_data(data + size); + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0 && no_more_input_data_after_size) + break; // we stop convert loop + data += processed; + size -= processed; + pc += processed; + } +*/ + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Bra86.c b/crnlib/lzma/Bra86.c new file mode 100644 index 00000000..9c468ba7 --- /dev/null +++ b/crnlib/lzma/Bra86.c @@ -0,0 +1,187 @@ +/* Bra86.c -- Branch converter for X86 code (BCJ) +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" +#include "CpuArch.h" + + +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name + +#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) + +#ifdef MY_CPU_LE_UNALIGN + #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; + #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) +#else + #define BR86_PREPARE_BCJ_SCAN + // bad for MSVC X86 (partial write to byte reg): + #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) + // bad for old MSVC (partial write to byte reg): + // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) +#endif + +static +Z7_FORCE_INLINE +Z7_ATTRIB_NO_VECTOR +Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) +{ + if (size < 5) + return p; + { + // Byte *p = data; + const Byte *lim = p + size - 4; + unsigned mask = (unsigned)*state; // & 7; +#ifdef BR_CONV_USE_OPT_PC_PTR + /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), + because call/jump offset is relative to the next instruction. + if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), + because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. + */ + pc += 4; +#endif + BR_PC_INIT + goto start; + + for (;; mask |= 4) + { + // cont: mask |= 4; + start: + if (p >= lim) + goto fin; + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + } + goto main_loop; + + m0: p--; + m1: p--; + m2: p--; + if (mask == 0) + goto a3; + if (p > lim) + goto fin_p; + + // if (((0x17u >> mask) & 1) == 0) + if (mask > 4 || mask == 3) + { + mask >>= 1; + continue; // goto cont; + } + mask >>= 1; + if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) + continue; // goto cont; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + { + mask <<= 3; + if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) + { + v ^= (((UInt32)0x100 << mask) - 1); + #ifdef MY_CPU_X86 + // for X86 : we can recalculate (c) to reduce register pressure + c = BR_PC_GET; + #endif + BR_CONVERT_VAL(v, c) + } + mask = 0; + } + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + + main_loop: + if (p >= lim) + goto fin; + for (;;) + { + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto a0; } + if (BR86_IS_BCJ_BYTE(1)) { goto a1; } + if (BR86_IS_BCJ_BYTE(2)) { goto a2; } + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + if (p >= lim) + goto fin; + } + + a0: p--; + a1: p--; + a2: p--; + a3: + if (p > lim) + goto fin_p; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; + } + } + +fin_p: + p--; +fin: + // the following processing for tail is optional and can be commented + /* + lim += 4; + for (; p < lim; p++, mask >>= 1) + if ((*p & 0xfe) == 0xe8) + break; + */ + *state = (UInt32)mask; + return p; + } +} + + +#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ + { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } + +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) +#ifndef Z7_EXTRACT_ONLY +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) +#endif diff --git a/crnlib/lzma/BraIA64.c b/crnlib/lzma/BraIA64.c new file mode 100644 index 00000000..1b61927c --- /dev/null +++ b/crnlib/lzma/BraIA64.c @@ -0,0 +1,14 @@ +/* BraIA64.c -- Converter for IA-64 code +2023-02-20 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// the code was moved to Bra.c + +#ifdef _MSC_VER +#pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#endif + +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wempty-translation-unit" +#endif diff --git a/crnlib/lzma/Compiler.h b/crnlib/lzma/Compiler.h new file mode 100644 index 00000000..6f8db4ca --- /dev/null +++ b/crnlib/lzma/Compiler.h @@ -0,0 +1,236 @@ +/* Compiler.h : Compiler specific defines and pragmas +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H + +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED + #endif +#endif +*/ + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#endif + + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif + +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined + +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/crnlib/lzma/CpuArch.c b/crnlib/lzma/CpuArch.c new file mode 100644 index 00000000..2560f908 --- /dev/null +++ b/crnlib/lzma/CpuArch.c @@ -0,0 +1,970 @@ +/* CpuArch.c -- CPU specific code +Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID +#endif + +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + most functions use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#else + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#endif + +#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + x86_cpuid_MACRO(p, func) +} + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + x86_cpuid_MACRO_2(p, func, subFunc) +} + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; + __asm__ __volatile__ ( + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: + #endif + __asm ret 0 +} + +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +static +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + UNUSED_VAR(subFunc) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm mov ecx, [esp + 12] // subFunc + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 4 +} + +#else // MY_CPU_AMD64 + + #if _MSC_VER >= 1600 + #include + #define MY_cpuidex __cpuidex + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + __cpuidex((int *)p, func, subFunc); +} + + #else +/* + __cpuid (func == (0 or 7)) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FASTCALL / NO_INLINE func. + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! +*/ +static +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) +{ + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); +} + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + MY_cpuidex_HACK(subFunc, func, (Int32 *)p); +} + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE +#endif +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + MY_cpuidex((Int32 *)p, (Int32)func, 0); +} + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + Int32 a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} + +#endif // MY_CPU_AMD64 +#endif // _MSC_VER + +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif +#undef NEED_CHECK_FOR_CPUID + + +static +BoolInt x86cpuid_Func_1(UInt32 *p) +{ + CHECK_CPUID_IS_SUPPORTED + z7_x86_cpuid(p, 1); + return True; +} + +/* +static const UInt32 kVendors[][1] = +{ + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } +}; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); + + switch (x86cpuid_GetFirm(&p)) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return False; // v23 : unknown processors are not In-Order +} +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) +{ +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) +{ + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 15) & 1; +} + +BoolInt CPU_IsSupported_SSE(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 26) & 1; +} + +#endif + + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return a[2]; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) +{ + CHECK_SYS_SSE_SUPPORT + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + return (BoolInt)(d[1] >> 29) & 1; + } +} + + +BoolInt CPU_IsSupported_SHA512(void) +{ + if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid_subFunc(d, 7, 0); + if (d[0] < 1) // d[0] - is max supported subleaf value + return False; + z7_x86_cpuid_subFunc(d, 7, 1); + return (BoolInt)(d[0]) & 1; + } +} + +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better + +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ + +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif +#endif + +#if defined(ATTRIB_XGETBV) +#include +#endif + + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) +{ +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} + +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ + #ifdef _WIN32 + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) + return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ + #endif + + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5); // avx2 + } +} + +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 + } +} +#endif + +BoolInt CPU_IsSupported_VAES_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB(void) +{ + CHECK_CPUID_IS_SUPPORTED + { + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); + if (d[0] < 0x80000001) + return False; + z7_x86_cpuid(d, 0x80000001); + return (BoolInt)(d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include "7zWindows.h" + +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ + +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + +BoolInt CPU_IsSupported_CRC32(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +BoolInt CPU_IsSupported_SHA512(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); +} + +/* +BoolInt CPU_IsSupported_SHA3(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); +} +*/ + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include () +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif + +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include +#define USE_HWCAP +#endif + +#ifdef USE_HWCAP + +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include () +#include + #endif + #endif +#endif + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else + MY_HWCAP_CHECK_FUNC(NEON) +#endif + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) +#ifdef MY_CPU_ARM64 +// supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. +// we define them here, if they are not defined +#ifndef HWCAP_SHA3 +// #define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SHA512 +// #pragma message("=== HWCAP_SHA512 define === ") +#define HWCAP_SHA512 (1 << 21) +#endif +MY_HWCAP_CHECK_FUNC (SHA512) +// MY_HWCAP_CHECK_FUNC (SHA3) +#endif + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/crnlib/lzma/CpuArch.h b/crnlib/lzma/CpuArch.h new file mode 100644 index 00000000..699aab4b --- /dev/null +++ b/crnlib/lzma/CpuArch.h @@ -0,0 +1,678 @@ +/* CpuArch.h -- CPU specific code +Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) +*/ + +#if !defined(_M_ARM64EC) +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else + #define MY_CPU_NAME "arm64" +#endif + #define MY_CPU_SIZEOF_POINTER 8 +#endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_ARM32 + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#define MY_CPU_PPC_OR_PPC64 + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + +#if defined(__riscv) \ + || defined(__riscv__) + #define MY_CPU_RISCV + #if __riscv_xlen == 32 + #define MY_CPU_NAME "riscv32" + #elif __riscv_xlen == 64 + #define MY_CPU_NAME "riscv64" + #else + #define MY_CPU_NAME "riscv" + #endif +#endif + + +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef GetUi64 +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif + +#ifndef SetUi64 +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } +#endif + + +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) + +#if 0 +// Z7_BSWAP16 can be slow for x86-msvc +#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p))) +#else +#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16) +#endif + +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } + +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); } +#endif + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif + +#ifndef SetBe64 +#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 56); \ + _ppp_[1] = (Byte)(_vvv_ >> 48); \ + _ppp_[2] = (Byte)(_vvv_ >> 40); \ + _ppp_[3] = (Byte)(_vvv_ >> 32); \ + _ppp_[4] = (Byte)(_vvv_ >> 24); \ + _ppp_[5] = (Byte)(_vvv_ >> 16); \ + _ppp_[6] = (Byte)(_vvv_ >> 8); \ + _ppp_[7] = (Byte)_vvv_; } +#endif + +#ifndef GetBe16 +#ifdef GetBe16_to32 +#define GetBe16(p) ( (UInt16) GetBe16_to32(p)) +#else +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) +#endif +#endif + + +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV +#endif + + +#if defined(MY_CPU_BE) + +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetUi64a(p) GetUi64(p) +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) + +#elif defined(MY_CPU_LE) + +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetBe64a(p) GetBe64(p) +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) + +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif + + +#ifndef GetBe16_to32 +#define GetBe16_to32(p) GetBe16(p) +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + + +#ifdef MY_CPU_X86_OR_AMD64 + +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_SHA512(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif +BoolInt CPU_IsSupported_SHA512(void); + +#endif + +#if defined(__APPLE__) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +#endif + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Delta.c b/crnlib/lzma/Delta.c new file mode 100644 index 00000000..fc7e9fe9 --- /dev/null +++ b/crnlib/lzma/Delta.c @@ -0,0 +1,169 @@ +/* Delta.c -- Delta converter +2021-02-09 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Delta.h" + +void Delta_Init(Byte *state) +{ + unsigned i; + for (i = 0; i < DELTA_STATE_SIZE; i++) + state[i] = 0; +} + + +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + + { + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do + { + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + + { + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } + { + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) + { + --p; *p = (Byte)(*p - p[dif]); + } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); + } + } +} + + +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) + { + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else + { + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) + { + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) + { + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do + { + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + + { + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; + } + } + } + + do + *state++ = *data; + while (++data != lim); +} diff --git a/crnlib/lzma/Delta.h b/crnlib/lzma/Delta.h new file mode 100644 index 00000000..be77c6c6 --- /dev/null +++ b/crnlib/lzma/Delta.h @@ -0,0 +1,19 @@ +/* Delta.h -- Delta converter +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_DELTA_H +#define ZIP7_INC_DELTA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define DELTA_STATE_SIZE 256 + +void Delta_Init(Byte *state); +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size); +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/DllSecur.c b/crnlib/lzma/DllSecur.c new file mode 100644 index 00000000..d65f7edc --- /dev/null +++ b/crnlib/lzma/DllSecur.c @@ -0,0 +1,99 @@ +/* DllSecur.c -- DLL loading security +2023-12-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#ifdef _WIN32 + +#include "7zWindows.h" + +#include "DllSecur.h" + +#ifndef UNDER_CE + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); + +#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400 +#define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800 + +#define DELIM "\0" + +static const char * const g_Dlls = + "userenv" + DELIM "setupapi" + DELIM "apphelp" + DELIM "propsys" + DELIM "dwmapi" + DELIM "cryptbase" + DELIM "oleacc" + DELIM "clbcatq" + DELIM "version" + #ifndef _CONSOLE + DELIM "uxtheme" + #endif + DELIM; + +#endif + +#ifdef __clang__ + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +#if defined (_MSC_VER) && _MSC_VER >= 1900 +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif + +#define IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN \ + if ((UInt16)GetVersion() != 6) { \ + const \ + Func_SetDefaultDllDirectories setDllDirs = \ + (Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \ + "SetDefaultDllDirectories"); \ + if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; } + +void My_SetDefaultDllDirectories(void) +{ + #ifndef UNDER_CE + IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN + #endif +} + + +void LoadSecurityDlls(void) +{ + #ifndef UNDER_CE + // at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ??? + IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN + { + wchar_t buf[MAX_PATH + 100]; + const char *dll; + unsigned pos = GetSystemDirectoryW(buf, MAX_PATH + 2); + if (pos == 0 || pos > MAX_PATH) + return; + if (buf[pos - 1] != '\\') + buf[pos++] = '\\'; + for (dll = g_Dlls; *dll != 0;) + { + wchar_t *dest = &buf[pos]; + for (;;) + { + const char c = *dll++; + if (c == 0) + break; + *dest++ = (Byte)c; + } + dest[0] = '.'; + dest[1] = 'd'; + dest[2] = 'l'; + dest[3] = 'l'; + dest[4] = 0; + // lstrcatW(buf, L".dll"); + LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + } + } + #endif +} + +#endif // _WIN32 diff --git a/crnlib/lzma/DllSecur.h b/crnlib/lzma/DllSecur.h new file mode 100644 index 00000000..87bacc63 --- /dev/null +++ b/crnlib/lzma/DllSecur.h @@ -0,0 +1,20 @@ +/* DllSecur.h -- DLL loading for security +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_DLL_SECUR_H +#define ZIP7_INC_DLL_SECUR_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#ifdef _WIN32 + +void My_SetDefaultDllDirectories(void); +void LoadSecurityDlls(void); + +#endif + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/LzFind.c b/crnlib/lzma/LzFind.c new file mode 100644 index 00000000..8975fc20 --- /dev/null +++ b/crnlib/lzma/LzFind.c @@ -0,0 +1,1746 @@ +/* LzFind.c -- Match finder for LZ algorithms +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include +// #include + +#include "CpuArch.h" +#include "LzFind.h" +#include "LzHash.h" + +#define kBlockMoveAlign (1 << 7) // alignment for memmove() +#define kBlockSizeAlign (1 << 16) // alignment for block allocation +#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary + +#define kEmptyHashValue 0 + +#define kMaxValForNormalize ((UInt32)0) +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug + +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#define GET_AVAIL_BYTES(p) \ + Inline_MatchFinder_GetNumAvailableBytes(p) + + +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + + +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + // if (!p->directInput) + { + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; + } +} + + +static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) +{ + if (blockSize == 0) + return 0; + if (!p->bufBase || p->blockSize != blockSize) + { + // size_t blockSizeT; + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + // blockSizeT = blockSize; + + // printf("\nblockSize = 0x%x\n", blockSize); + /* + #if defined _WIN64 + // we can allocate 4GiB, but still use UInt32 for (p->blockSize) + // we use UInt32 type for (p->blockSize), because + // we don't want to wrap over 4 GiB, + // when we use (p->streamPos - p->pos) that is UInt32. + if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) + { + blockSizeT = ((size_t)1 << 32); + printf("\nchanged to blockSizeT = 4GiB\n"); + } + #endif + */ + + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufBase); + // return 0; // for debug + } + return (p->bufBase != NULL); +} + +static const Byte *MatchFinder_GetPointerToCurrentPos(void *p) +{ + return ((CMatchFinder *)p)->buffer; +} + +static UInt32 MatchFinder_GetNumAvailableBytes(void *p) +{ + return GET_AVAIL_BYTES((CMatchFinder *)p); +} + + +Z7_NO_INLINE +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + + /* We use (p->streamPos - p->pos) value. + (p->streamPos < p->pos) is allowed. */ + + if (p->directInput) + { + UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); + if (curSize > p->directInputRem) + curSize = (UInt32)p->directInputRem; + p->streamPos += curSize; + p->directInputRem -= curSize; + if (p->directInputRem == 0) + p->streamEndWasReached = 1; + return; + } + + for (;;) + { + const Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufBase + p->blockSize - dest); + if (size == 0) + { + /* we call ReadBlock() after NeedMove() and MoveBlock(). + NeedMove() and MoveBlock() povide more than (keepSizeAfter) + to the end of (blockSize). + So we don't execute this branch in normal code flow. + We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). + */ + // p->result = SZ_ERROR_FAIL; // we can show error here + return; + } + + // #define kRead 3 + // if (size > kRead) size = kRead; // for debug + + /* + // we need cast (Byte *)dest. + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + */ + p->result = ISeqInStream_Read(p->stream, + p->bufBase + (dest - p->bufBase), &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) + return; + /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function + (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ + } + + // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) +} + + + +Z7_NO_INLINE +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore; + const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; + p->buffer = p->bufBase + keepBefore; + memmove(p->bufBase, + p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + keepBefore + (size_t)GET_AVAIL_BYTES(p)); +} + +/* We call MoveBlock() before ReadBlock(). + So MoveBlock() can be wasteful operation, if the whole input data + can fit in current block even without calling MoveBlock(). + in important case where (dataSize <= historySize) + condition (p->blockSize > dataSize + p->keepSizeAfter) is met + So there is no MoveBlock() in that case case. +*/ + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + if (p->directInput) + return 0; + if (p->streamEndWasReached || p->result != SZ_OK) + return 0; + return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) + MatchFinder_ReadBlock(p); +} + + + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + p->numHashBytes_Min = 2; + p->numHashOutBits = 0; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ + unsigned i; + p->buffer = NULL; + p->bufBase = NULL; + p->directInput = 0; + p->stream = NULL; + p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { + UInt32 r = (UInt32)i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + p->crc[i] = r; + } +} + +#undef kCrcPoly + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hash); + p->hash = NULL; +} + +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) +{ + const size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return NULL; + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); +} + +#if (kBlockSizeReserveMin < kBlockSizeAlign * 2) + #error Stop_Compiling_Bad_Reserve +#endif + + + +static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) +{ + UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); + /* + if (historySize > kMaxHistorySize) + return 0; + */ + // printf("\nhistorySize == 0x%x\n", historySize); + + if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow + return 0; + + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; + const UInt32 rem = kBlockSizeMax - blockSize; + const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) + + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here + if (blockSize >= kBlockSizeMax + || rem < kBlockSizeReserveMin) // we reject settings that will be slow + return 0; + if (reserve >= rem) + blockSize = kBlockSizeMax; + else + { + blockSize += reserve; + blockSize &= ~(UInt32)(kBlockSizeAlign - 1); + } + } + // printf("\n LzFind_blockSize = %x\n", blockSize); + // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); + return blockSize; +} + + +// input is historySize +static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + +// input is historySize +static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + hs >>= 1; + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + /* we need one additional byte in (p->keepSizeBefore), + since we use MoveBlock() after (p->pos++) and before dictionary using */ + // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + + keepAddBufferAfter += matchMaxLen; + /* we need (p->keepSizeAfter >= p->numHashBytes) */ + if (keepAddBufferAfter < p->numHashBytes) + keepAddBufferAfter = p->numHashBytes; + // keepAddBufferAfter -= 2; // for debug + p->keepSizeAfter = keepAddBufferAfter; + + if (p->directInput) + p->blockSize = 0; + if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) + { + size_t hashSizeSum; + { + UInt32 hs; + UInt32 hsCur; + + if (p->numHashOutBits != 0) + { + unsigned numBits = p->numHashOutBits; + const unsigned nbMax = + (p->numHashBytes == 2 ? 16 : + (p->numHashBytes == 3 ? 24 : 32)); + if (numBits > nbMax) + numBits = nbMax; + if (numBits >= 32) + hs = (UInt32)0 - 1; + else + hs = ((UInt32)1 << numBits) - 1; + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); + if (hs > hs2) + hs = hs2; + } + hsCur = hs; + if (p->expectedDataSize < historySize) + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); + if (hsCur > hs2) + hsCur = hs2; + } + } + else + { + hs = MatchFinder_GetHashMask(p, historySize); + hsCur = hs; + if (p->expectedDataSize < historySize) + { + hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); + if (hsCur > hs) // is it possible? + hsCur = hs; + } + } + + p->hashMask = hsCur; + + hashSizeSum = hs; + hashSizeSum++; + if (hashSizeSum < hs) + return 0; + { + UInt32 fixedHashSize = 0; + if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size; + if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; + hashSizeSum += fixedHashSize; + p->fixedHashSize = fixedHashSize; + } + } + + p->matchMaxLen = matchMaxLen; + + { + size_t newSize; + size_t numSons; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it + p->historySize = historySize; + p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) + + numSons = newCyclicBufferSize; + if (p->btMode) + numSons <<= 1; + newSize = hashSizeSum + numSons; + + if (numSons < newCyclicBufferSize || newSize < numSons) + return 0; + + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + + // 22.02: we don't reallocate buffer, if old size is enough + if (p->hash && p->numRefs >= newSize) + return 1; + + MatchFinder_FreeThisClassMemory(p, alloc); + p->numRefs = newSize; + p->hash = AllocRefs(newSize, alloc); + + if (p->hash) + { + p->son = p->hash + hashSizeSum; + return 1; + } + } + } + + MatchFinder_Free(p, alloc); + return 0; +} + + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + UInt32 k; + UInt32 n = kMaxValForNormalize - p->pos; + if (n == 0) + n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) + + k = p->cyclicBufferSize - p->cyclicBufferPos; + if (k < n) + n = k; + + k = GET_AVAIL_BYTES(p); + { + const UInt32 ksa = p->keepSizeAfter; + UInt32 mm = p->matchMaxLen; + if (k > ksa) + k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock + else if (k >= mm) + { + // the limitation for (p->lenLimit) update + k -= mm; // optimization : to reduce the number of checks + k++; + // k = 1; // non-optimized version : for debug + } + else + { + mm = k; + if (k != 0) + k = 1; + } + p->lenLimit = mm; + } + if (k < n) + n = k; + + p->posLimit = p->pos + n; +} + + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + const size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + const size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_4(CMatchFinder *p) +{ + if (!p->directInput) + p->buffer = p->bufBase; + { + /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. + the code in CMatchFinderMt expects (pos = 1) */ + p->pos = + p->streamPos = + 1; // it's smallest optimal value. do not change it + // 0; // for debug + } + p->result = SZ_OK; + p->streamEndWasReached = 0; +} + + +// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + +void MatchFinder_Init(void *_p) +{ + CMatchFinder *p = (CMatchFinder *)_p; + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_4(p); + // if (readData) + MatchFinder_ReadBlock(p); + + /* if we init (cyclicBufferPos = pos), then we can use one variable + instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) + // p->cyclicBufferPos = 0; // smallest value + // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. + MatchFinder_SetLimits(p); +} + + + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + + #define USE_LZFIND_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_256 + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1600) + #define USE_LZFIND_SATUR_SUB_128 + #endif + #if (_MSC_VER >= 1900) + #define USE_LZFIND_SATUR_SUB_256 + #endif + #endif + +#elif defined(MY_CPU_ARM64) \ + /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */ + + #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) + #define USE_LZFIND_SATUR_SUB_128 + #ifdef MY_CPU_ARM64 + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) + #else + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon"))) + #endif + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_LZFIND_SATUR_SUB_128 + #endif + #endif + + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#endif + + +#ifdef USE_LZFIND_SATUR_SUB_128 + +// #define Z7_SHOW_HW_STATUS + +#ifdef Z7_SHOW_HW_STATUS +#include +#define PRF(x) x +PRF(;) +#else +#define PRF(x) +#endif + + +#ifdef MY_CPU_ARM_OR_ARM64 + +#ifdef MY_CPU_ARM64 +// #define FORCE_LZFIND_SATUR_SUB_128 +#endif +typedef uint32x4_t LzFind_v128; +#define SASUB_128_V(v, s) \ + vsubq_u32(vmaxq_u32(v, s), s) + +#else // MY_CPU_ARM_OR_ARM64 + +#include // sse4.1 + +typedef __m128i LzFind_v128; +// SSE 4.1 +#define SASUB_128_V(v, s) \ + _mm_sub_epi32(_mm_max_epu32(v, s), s) + +#endif // MY_CPU_ARM_OR_ARM64 + + +#define SASUB_128(i) \ + *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \ + *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2); + + +Z7_NO_INLINE +static +#ifdef LZFIND_ATTRIB_SSE41 +LZFIND_ATTRIB_SSE41 +#endif +void +Z7_FASTCALL +LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + const LzFind_v128 sub2 = + #ifdef MY_CPU_ARM_OR_ARM64 + vdupq_n_u32(subValue); + #else + _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + #endif + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SASUB_128(0) SASUB_128(1) items += 2 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; + } + while (items != lim); +} + + + +#ifdef USE_LZFIND_SATUR_SUB_256 + +#include // avx +/* +clang :immintrin.h uses +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX2__) +#include +#endif +so we need for clang-cl */ + +#if defined(__clang__) +#include +#include +#endif + +// AVX2: +#define SASUB_256(i) \ + *( __m256i *)( void *)(items + (i) * 8) = \ + _mm256_sub_epi32(_mm256_max_epu32( \ + *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); + +Z7_NO_INLINE +static +#ifdef LZFIND_ATTRIB_AVX2 +LZFIND_ATTRIB_AVX2 +#endif +void +Z7_FASTCALL +LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + const __m256i sub2 = _mm256_set_epi32( + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SASUB_256(0) SASUB_256(1) items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; + } + while (items != lim); +} +#endif // USE_LZFIND_SATUR_SUB_256 + +#ifndef FORCE_LZFIND_SATUR_SUB_128 +typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)( + UInt32 subValue, CLzRef *items, const CLzRef *lim); +static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; +#endif // FORCE_LZFIND_SATUR_SUB_128 + +#endif // USE_LZFIND_SATUR_SUB_128 + + +// kEmptyHashValue must be zero +// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; } +#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; } + +#ifdef FORCE_LZFIND_SATUR_SUB_128 + +#define DEFAULT_SaturSub LzFind_SaturSub_128 + +#else + +#define DEFAULT_SaturSub LzFind_SaturSub_32 + +Z7_NO_INLINE +static +void +Z7_FASTCALL +LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + } + while (items != lim); +} + +#endif + + +Z7_NO_INLINE +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) +{ + #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7) + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + { + SASUB_32(0) + items++; + } + { + const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1); + CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask); + numItems &= k_Align_Mask; + if (items != lim) + { + #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128) + if (g_LzFind_SaturSub) + g_LzFind_SaturSub(subValue, items, lim); + else + #endif + DEFAULT_SaturSub(subValue, items, lim); + } + items = lim; + } + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0; numItems--) + { + SASUB_32(0) + items++; + } +} + + + +// call MatchFinder_CheckLimits() only after (p->pos++) update + +Z7_NO_INLINE +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (// !p->streamEndWasReached && p->result == SZ_OK && + p->keepSizeAfter == GET_AVAIL_BYTES(p)) + { + // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); + } + + if (p->pos == kMaxValForNormalize) + if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. + /* + if we disable normalization for last bytes of data, and + if (data_size == 4 GiB), we don't call wastfull normalization, + but (pos) will be wrapped over Zero (0) in that case. + And we cannot resume later to normal operation + */ + { + // MatchFinder_Normalize(p); + /* after normalization we need (p->pos >= p->historySize + 1); */ + /* we can reduce subValue to aligned value, if want to keep alignment + of (p->pos) and (p->buffer) for speculated accesses. */ + const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; + // const UInt32 subValue = (1 << 15); // for debug + // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); + MatchFinder_REDUCE_OFFSETS(p, subValue) + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize); + { + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + MatchFinder_Normalize3(subValue, p->son, numSonRefs); + } + } + + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + + MatchFinder_SetLimits(p); +} + + +/* + (lenLimit > maxLen) +*/ +Z7_FORCE_INLINE +static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, unsigned maxLen) +{ + /* + son[_cyclicBufferPos] = curMatch; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return d; + { + const Byte *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = len; + *d++ = delta - 1; + if (len == lenLimit) + return d; + } + } + } + } + */ + + const Byte *lim = cur + lenLimit; + son[_cyclicBufferPos] = curMatch; + + do + { + UInt32 delta; + + if (curMatch == 0) + break; + // if (curMatch2 >= curMatch) return NULL; + delta = pos - curMatch; + if (delta >= _cyclicBufferSize) + break; + { + ptrdiff_t diff; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) + { + const Byte *c = cur; + while (*c == c[diff]) + { + if (++c == lim) + { + d[0] = (UInt32)(lim - cur); + d[1] = delta - 1; + return d + 2; + } + } + { + const unsigned len = (unsigned)(c - cur); + if (maxLen < len) + { + maxLen = len; + d[0] = (UInt32)len; + d[1] = delta - 1; + d += 2; + } + } + } + } + } + while (--cutValue); + + return d; +} + + +Z7_FORCE_INLINE +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, UInt32 maxLen) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (cmCheck < curMatch) + do + { + const UInt32 delta = pos - curMatch; + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + const UInt32 pair0 = pair[0]; + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = (UInt32)len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair0; + *ptr0 = pair[1]; + return d; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + // const UInt32 curMatch2 = pair[1]; + // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + // curMatch = curMatch2; + curMatch = pair[1]; + ptr1 = pair + 1; + len1 = len; + } + else + { + *ptr0 = curMatch; + curMatch = pair[0]; + ptr0 = pair; + len0 = len; + } + } + } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return d; +} + + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (// curMatch >= pos || // failure + cmCheck < curMatch) + do + { + const UInt32 delta = pos - curMatch; + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + curMatch = pair[1]; + ptr1 = pair + 1; + len1 = len; + } + else + { + *ptr0 = curMatch; + curMatch = pair[0]; + ptr0 = pair; + len0 = len; + } + } + } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return; +} + + +#define MOVE_POS \ + p->cyclicBufferPos++; \ + p->buffer++; \ + { const UInt32 pos1 = p->pos + 1; \ + p->pos = pos1; \ + if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + +#define MOVE_POS_RET MOVE_POS return distances; + +Z7_NO_INLINE +static void MatchFinder_MovePos(CMatchFinder *p) +{ + /* we go here at the end of stream data, when (avail < num_hash_bytes) + We don't update sons[cyclicBufferPos << btMode]. + So (sons) record will contain junk. And we cannot resume match searching + to normal operation, even if we will provide more input data in buffer. + p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue + if (p->btMode) + p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue + */ + MOVE_POS +} + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + UInt32 hv; const Byte *cur; UInt32 curMatch; \ + UInt32 lenLimit = p->lenLimit; \ + if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) +#define SKIP_HEADER(minLen) \ + do { GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \ + p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define SKIP_FOOTER \ + SkipMatchesSpec(MF_PARAMS(p)); \ + MOVE_POS \ + } while (--num); + +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \ + MOVE_POS_RET + +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) + +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) + + + +#define UPDATE_maxLen { \ + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ + const Byte *c = cur + maxLen; \ + const Byte *lim = cur + lenLimit; \ + for (; c != lim; c++) if (*(c + diff) != *c) break; \ + maxLen = (unsigned)(c - cur); } + +static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + GET_MATCHES_HEADER(2) + HASH2_CALC + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(1) +} + +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(2) +} + + +#define SET_mmm \ + mmm = p->cyclicBufferSize; \ + if (pos < mmm) \ + mmm = pos; + + +static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + UInt32 mmm; + UInt32 h2, d2, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(3) + + HASH3_CALC + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash[h2]; + + curMatch = (hash + kFix3HashSize)[hv]; + + hash[h2] = pos; + (hash + kFix3HashSize)[hv] = pos; + + SET_mmm + + maxLen = 2; + + if (d2 < mmm && *(cur - d2) == *cur) + { + UPDATE_maxLen + distances[0] = (UInt32)maxLen; + distances[1] = d2 - 1; + distances += 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET + } + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + SET_mmm + + maxLen = 3; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET + } + break; + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET + } + break; + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + SET_mmm + + maxLen = 3; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET + } + break; + } + + GET_MATCHES_FOOTER_HC(maxLen) +} + + +static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances) +{ + CMatchFinder *p = (CMatchFinder *)_p; + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET + } + break; + } + + GET_MATCHES_FOOTER_HC(maxLen) +} + + +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_HC(2) +} + + +static void Bt2_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + SKIP_HEADER(2) + { + HASH2_CALC + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + } + SKIP_FOOTER +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(3) + { + HASH_ZIP_CALC + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt3_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + SKIP_HEADER(3) + { + UInt32 h2; + UInt32 *hash; + HASH3_CALC + hash = p->hash; + curMatch = (hash + kFix3HashSize)[hv]; + hash[h2] = + (hash + kFix3HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt4_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + SKIP_HEADER(4) + { + UInt32 h2, h3; + UInt32 *hash; + HASH4_CALC + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + SKIP_HEADER(5) + { + UInt32 h2, h3; + UInt32 *hash; + HASH5_CALC + hash = p->hash; + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + + +#define HC_SKIP_HEADER(minLen) \ + do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ + const Byte *cur; \ + UInt32 *hash; \ + UInt32 *son; \ + UInt32 pos = p->pos; \ + UInt32 num2 = num; \ + /* (p->pos == p->posLimit) is not allowed here !!! */ \ + { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + num -= num2; \ + { const UInt32 cycPos = p->cyclicBufferPos; \ + son = p->son + cycPos; \ + p->cyclicBufferPos = cycPos + num2; } \ + cur = p->buffer; \ + hash = p->hash; \ + do { \ + UInt32 curMatch; \ + UInt32 hv; + + +#define HC_SKIP_FOOTER \ + cur++; pos++; *son++ = curMatch; \ + } while (--num2); \ + p->buffer = cur; \ + p->pos = pos; \ + if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ + }} while(num); \ + + +static void Hc4_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + HC_SKIP_HEADER(4) + + UInt32 h2, h3; + HASH4_CALC + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +static void Hc5_MatchFinder_Skip(void *_p, UInt32 num) +{ + CMatchFinder *p = (CMatchFinder *)_p; + HC_SKIP_HEADER(5) + + UInt32 h2, h3; + HASH5_CALC + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(3) + + HASH_ZIP_CALC + curMatch = hash[hv]; + hash[hv] = pos; + + HC_SKIP_FOOTER +} + + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) +{ + vTable->Init = MatchFinder_Init; + vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + if (p->numHashBytes <= 4) + { + vTable->GetMatches = Hc4_MatchFinder_GetMatches; + vTable->Skip = Hc4_MatchFinder_Skip; + } + else + { + vTable->GetMatches = Hc5_MatchFinder_GetMatches; + vTable->Skip = Hc5_MatchFinder_Skip; + } + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = Bt2_MatchFinder_GetMatches; + vTable->Skip = Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = Bt3_MatchFinder_GetMatches; + vTable->Skip = Bt3_MatchFinder_Skip; + } + else if (p->numHashBytes == 4) + { + vTable->GetMatches = Bt4_MatchFinder_GetMatches; + vTable->Skip = Bt4_MatchFinder_Skip; + } + else + { + vTable->GetMatches = Bt5_MatchFinder_GetMatches; + vTable->Skip = Bt5_MatchFinder_Skip; + } +} + + + +void LzFindPrepare(void) +{ + #ifndef FORCE_LZFIND_SATUR_SUB_128 + #ifdef USE_LZFIND_SATUR_SUB_128 + LZFIND_SATUR_SUB_CODE_FUNC f = NULL; + #ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== LzFind NEON") + PRF(printf("\n=== LzFind NEON\n")); + f = LzFind_SaturSub_128; + } + // f = 0; // for debug + } + #else // MY_CPU_ARM_OR_ARM64 + if (CPU_IsSupported_SSE41()) + { + // #pragma message ("=== LzFind SSE41") + PRF(printf("\n=== LzFind SSE41\n")); + f = LzFind_SaturSub_128; + + #ifdef USE_LZFIND_SATUR_SUB_256 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== LzFind AVX2") + PRF(printf("\n=== LzFind AVX2\n")); + f = LzFind_SaturSub_256; + } + #endif + } + #endif // MY_CPU_ARM_OR_ARM64 + g_LzFind_SaturSub = f; + #endif // USE_LZFIND_SATUR_SUB_128 + #endif // FORCE_LZFIND_SATUR_SUB_128 +} + + +#undef MOVE_POS +#undef MOVE_POS_RET +#undef PRF diff --git a/crnlib/lzma/LzFind.h b/crnlib/lzma/LzFind.h new file mode 100644 index 00000000..fd778747 --- /dev/null +++ b/crnlib/lzma/LzFind.h @@ -0,0 +1,160 @@ +/* LzFind.h -- Match finder for LZ algorithms +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZ_FIND_H +#define ZIP7_INC_LZ_FIND_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef UInt32 CLzRef; + +typedef struct +{ + const Byte *buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + Byte streamEndWasReached; + Byte btMode; + Byte bigHash; + Byte directInput; + + UInt32 matchMaxLen; + CLzRef *hash; + CLzRef *son; + UInt32 hashMask; + UInt32 cutValue; + + Byte *bufBase; + ISeqInStreamPtr stream; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + size_t directInputRem; + UInt32 historySize; + UInt32 fixedHashSize; + Byte numHashBytes_Min; + Byte numHashOutBits; + Byte _pad2_[2]; + SRes result; + UInt32 crc[256]; + size_t numRefs; + + UInt64 expectedDataSize; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos)) + +/* +#define Inline_MatchFinder_IsFinishedOK(p) \ + ((p)->streamEndWasReached \ + && (p)->streamPos == (p)->pos \ + && (!(p)->directInput || (p)->directInputRem == 0)) +*/ + +int MatchFinder_NeedMove(CMatchFinder *p); +/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */ +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* (directInput = 0) is default value. + It's required to provide correct (directInput) value + before calling MatchFinder_Create(). + You can set (directInput) by any of the following calls: + - MatchFinder_SET_DIRECT_INPUT_BUF() + - MatchFinder_SET_STREAM() + - MatchFinder_SET_STREAM_MODE() +*/ + +#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \ + (p)->stream = NULL; \ + (p)->directInput = 1; \ + (p)->buffer = (_src_); \ + (p)->directInputRem = (_srcLen_); } + +/* +#define MatchFinder_SET_STREAM_MODE(p) { \ + (p)->directInput = 0; } +*/ + +#define MatchFinder_SET_STREAM(p, _stream_) { \ + (p)->stream = _stream_; \ + (p)->directInput = 0; } + + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); + +/* +#define MatchFinder_INIT_POS(p, val) \ + (p)->pos = (val); \ + (p)->streamPos = (val); +*/ + +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +#define MatchFinder_REDUCE_OFFSETS(p, subValue) \ + (p)->pos -= (subValue); \ + (p)->streamPos -= (subValue); + + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); + +typedef struct +{ + Mf_Init_Func Init; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder2; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); + +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_4(CMatchFinder *p); +// void MatchFinder_Init(CMatchFinder *p); +void MatchFinder_Init(void *p); + +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +void LzFindPrepare(void); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/LzFindMt.c b/crnlib/lzma/LzFindMt.c new file mode 100644 index 00000000..260e3210 --- /dev/null +++ b/crnlib/lzma/LzFindMt.c @@ -0,0 +1,1414 @@ +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms +2024-01-22 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +#include "CpuArch.h" + +#include "LzHash.h" +#include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +extern UInt64 g_NumIters_Tree; +extern UInt64 g_NumIters_Loop; +extern UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +#define kMtHashBlockSize ((UInt32)1 << 17) +#define kMtHashNumBlocks (1 << 1) + +#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) + +#define kMtBtBlockSize ((UInt32)1 << 16) +#define kMtBtNumBlocks (1 << 4) + +#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) + +/* + HASH functions: + We use raw 8/16 bits from a[1] and a[2], + xored with crc(a[0]) and crc(a[3]). + We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. + our crc() function provides one-to-one correspondence for low 8-bit values: + (crc[0...0xFF] & 0xFF) <-> [0...0xFF] +*/ + +#define MF(mt) ((mt)->MatchFinder) +#define MF_CRC (p->crc) + +// #define MF(mt) (&(mt)->MatchFinder) +// #define MF_CRC (p->MatchFinder.crc) + +#define MT_HASH2_CALC \ + h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +/* +#define MT_HASH3_CALC__NO_2 { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } + // (kHash4Size - 1); +*/ + + +Z7_NO_INLINE +static void MtSync_Construct(CMtSync *p) +{ + p->affinity = 0; + p->wasCreated = False; + p->csWasInitialized = False; + p->csWasEntered = False; + Thread_CONSTRUCT(&p->thread) + Event_Construct(&p->canStart); + Event_Construct(&p->wasStopped); + Semaphore_Construct(&p->freeSemaphore); + Semaphore_Construct(&p->filledSemaphore); +} + + +// #define DEBUG_BUFFER_LOCK // define it to debug lock state + +#ifdef DEBUG_BUFFER_LOCK +#include +#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); +#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); +#else +#define BUFFER_MUST_BE_LOCKED(p) +#define BUFFER_MUST_BE_UNLOCKED(p) +#endif + +#define LOCK_BUFFER(p) { \ + BUFFER_MUST_BE_UNLOCKED(p); \ + CriticalSection_Enter(&(p)->cs); \ + (p)->csWasEntered = True; } + +#define UNLOCK_BUFFER(p) { \ + BUFFER_MUST_BE_LOCKED(p); \ + CriticalSection_Leave(&(p)->cs); \ + (p)->csWasEntered = False; } + + +Z7_NO_INLINE +static UInt32 MtSync_GetNextBlock(CMtSync *p) +{ + UInt32 numBlocks = 0; + if (p->needStart) + { + BUFFER_MUST_BE_UNLOCKED(p) + p->numProcessedBlocks = 1; + p->needStart = False; + p->stopWriting = False; + p->exit = False; + Event_Reset(&p->wasStopped); + Event_Set(&p->canStart); + } + else + { + UNLOCK_BUFFER(p) + // we free current block + numBlocks = p->numProcessedBlocks++; + Semaphore_Release1(&p->freeSemaphore); + } + + // buffer is UNLOCKED here + Semaphore_Wait(&p->filledSemaphore); + LOCK_BUFFER(p) + return numBlocks; +} + + +/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ + +Z7_NO_INLINE +static void MtSync_StopWriting(CMtSync *p) +{ + if (!Thread_WasCreated(&p->thread) || p->needStart) + return; + + PRF(printf("\nMtSync_StopWriting %p\n", p)); + + if (p->csWasEntered) + { + /* we don't use buffer in this thread after StopWriting(). + So we UNLOCK buffer. + And we restore default UNLOCKED state for stopped thread */ + UNLOCK_BUFFER(p) + } + + /* We send (p->stopWriting) message and release freeSemaphore + to free current block. + So the thread will see (p->stopWriting) at some + iteration after Wait(freeSemaphore). + The thread doesn't need to fill all avail free blocks, + so we can get fast thread stop. + */ + + p->stopWriting = True; + Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! + + PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); + Event_Wait(&p->wasStopped); + PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); + + /* 21.03 : we don't restore samaphore counters here. + We will recreate and reinit samaphores in next start */ + + p->needStart = True; +} + + +Z7_NO_INLINE +static void MtSync_Destruct(CMtSync *p) +{ + PRF(printf("\nMtSync_Destruct %p\n", p)); + + if (Thread_WasCreated(&p->thread)) + { + /* we want thread to be in Stopped state before sending EXIT command. + note: stop(btSync) will stop (htSync) also */ + MtSync_StopWriting(p); + /* thread in Stopped state here : (p->needStart == true) */ + p->exit = True; + // if (p->needStart) // it's (true) + Event_Set(&p->canStart); // we send EXIT command to thread + Thread_Wait_Close(&p->thread); // we wait thread finishing + } + + if (p->csWasInitialized) + { + CriticalSection_Delete(&p->cs); + p->csWasInitialized = False; + } + p->csWasEntered = False; + + Event_Close(&p->canStart); + Event_Close(&p->wasStopped); + Semaphore_Close(&p->freeSemaphore); + Semaphore_Close(&p->filledSemaphore); + + p->wasCreated = False; +} + + +// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } +// we want to get real system error codes here instead of SZ_ERROR_THREAD +#define RINOK_THREAD(x) RINOK_WRes(x) + + +// call it before each new file (when new starting is required): +Z7_NO_INLINE +static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) +{ + WRes wres; + // BUFFER_MUST_BE_UNLOCKED(p) + if (!p->needStart || p->csWasEntered) + return SZ_ERROR_FAIL; + wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); + if (wres == 0) + wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) +{ + WRes wres; + + if (p->wasCreated) + return SZ_OK; + + RINOK_THREAD(CriticalSection_Init(&p->cs)) + p->csWasInitialized = True; + p->csWasEntered = False; + + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)) + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)) + + p->needStart = True; + p->exit = True; /* p->exit is unused before (canStart) Event. + But in case of some unexpected code failure we will get fast exit from thread */ + + // return ERROR_TOO_MANY_POSTS; // for debug + // return EINVAL; // for debug + + if (p->affinity != 0) + wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); + else + wres = Thread_Create(&p->thread, startAddress, obj); + + RINOK_THREAD(wres) + p->wasCreated = True; + return SZ_OK; +} + + +Z7_NO_INLINE +static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) +{ + const WRes wres = MtSync_Create_WRes(p, startAddress, obj); + if (wres == 0) + return 0; + MtSync_Destruct(p); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +// ---------- HASH THREAD ---------- + +#define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 21)) // for debug +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#ifdef MY_CPU_LE_UNALIGN + #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) +#else + #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) +#endif + +#define GetHeads_DECL(name) \ + static void GetHeads ## name(const Byte *p, UInt32 pos, \ + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) + +#define GetHeads_LOOP(v) \ + for (; numHeads != 0; numHeads--) { \ + const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; } + +#define DEF_GetHeads2(name, v, action) \ + GetHeads_DECL(name) { action \ + GetHeads_LOOP(v) } + +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) + +DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) +DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +// BT3 is not good for crc collisions for big hashMask values. + +/* +GetHeads_DECL(3b) +{ + UNUSED_VAR(hashMask); + UNUSED_VAR(crc); + { + const Byte *pLim = p + numHeads; + if (numHeads == 0) + return; + pLim--; + while (p < pLim) + { + UInt32 v1 = GetUi32(p); + UInt32 v0 = v1 & 0xFFFFFF; + UInt32 h0, h1; + p += 2; + v1 >>= 8; + h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; + h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; + heads += 2; + } + if (p == pLim) + { + UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); + *heads = pos - hash[v0]; + hash[v0] = pos; + } + } +} +*/ + +/* +GetHeads_DECL(4) +{ + unsigned sh = 0; + UNUSED_VAR(crc) + while ((hashMask & 0x80000000) == 0) + { + hashMask <<= 1; + sh++; + } + GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) +} +#define GetHeads4b GetHeads4 +*/ + +#define USE_GetHeads_LOCAL_CRC + +#ifdef USE_GetHeads_LOCAL_CRC + +GetHeads_DECL(4) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + // crc1[i] = rotlFixed(v, 8) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(4b) +{ + UInt32 crc0[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + crc0[i] = crc[i] & hashMask; + } + GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) +} + +GetHeads_DECL(5) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + UInt32 crc2[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(5b) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) +} + +#else + +DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) +DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) +DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) + +#endif + + +static void HashThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->hashSync; + PRF(printf("\nHashThreadFunc\n")); + + for (;;) + { + UInt32 blockIndex = 0; + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); + Event_Wait(&p->canStart); + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); + if (p->exit) + { + PRF(printf("\nHashThreadFunc : exit \n")); + return; + } + + MatchFinder_Init_HighHash(MF(mt)); + + for (;;) + { + PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); + + { + CMatchFinder *mf = MF(mt); + if (MatchFinder_NeedMove(mf)) + { + CriticalSection_Enter(&mt->btSync.cs); + CriticalSection_Enter(&mt->hashSync.cs); + { + const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf); + ptrdiff_t offset; + MatchFinder_MoveBlock(mf); + offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf); + mt->pointerToCurPos -= offset; + mt->buffer -= offset; + } + CriticalSection_Leave(&mt->hashSync.cs); + CriticalSection_Leave(&mt->btSync.cs); + continue; + } + + Semaphore_Wait(&p->freeSemaphore); + + if (p->exit) // exit is unexpected here. But we check it here for some failure case + return; + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + + MatchFinder_ReadIfRequired(mf); + { + UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); + UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); + heads[0] = 2; + heads[1] = num; + + /* heads[1] contains the number of avail bytes: + if (avail < mf->numHashBytes) : + { + it means that stream was finished + HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. + HASH_THREAD doesn't stop, + HASH_THREAD fills only the header (2 numbers) for all next blocks: + {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} + } + else + { + HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; + } + */ + + if (num >= mf->numHashBytes) + { + num = num - mf->numHashBytes + 1; + if (num > kMtHashBlockSize - 2) + num = kMtHashBlockSize - 2; + + if (mf->pos > (UInt32)kMtMaxValForNormalize - num) + { + const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + MatchFinder_REDUCE_OFFSETS(mf, subValue) + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); + } + + heads[0] = 2 + num; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + } + + mf->pos += num; // wrap over zero is allowed at the end of stream + mf->buffer += num; + } + } + + Semaphore_Release1(&p->filledSemaphore); + } // for() processing end + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } // for() thread end +} + + + + +// ---------- BT THREAD ---------- + +/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. + here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + +#define MFMT_GM_INLINE + +#ifdef MFMT_GM_INLINE + +/* + we use size_t for (pos) instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + + +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +#endif + + +static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) +{ + UInt32 numProcessed = 0; + UInt32 curPos = 2; + + /* GetMatchesSpec() functions don't create (len = 1) + in [len, dist] match pairs, if (p->numHashBytes >= 2) + Also we suppose here that (matchMaxLen >= 2). + So the following code for (reserve) is not required + UInt32 reserve = (p->matchMaxLen * 2); + const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX + if (reserve < kNumHashBytes_Max - 1) + reserve = kNumHashBytes_Max - 1; + const UInt32 limit = kMtBtBlockSize - (reserve); + */ + + const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + + d[1] = p->hashNumAvail; + + if (p->failure_BT) + { + // printf("\n == 1 BtGetMatches() p->failure_BT\n"); + d[0] = 0; + // d[1] = 0; + return; + } + + while (curPos < limit) + { + if (p->hashBufPos == p->hashBufPosLimit) + { + // MatchFinderMt_GetNextBlock_Hash(p); + UInt32 avail; + { + const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); + const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); + const UInt32 *h = p->hashBuf + k; + avail = h[1]; + p->hashBufPosLimit = k + h[0]; + p->hashNumAvail = avail; + p->hashBufPos = k + 2; + } + + { + /* we must prevent UInt32 overflow for avail total value, + if avail was increased with new hash block */ + UInt32 availSum = numProcessed + avail; + if (availSum < numProcessed) + availSum = (UInt32)(Int32)-1; + d[1] = availSum; + } + + if (avail >= p->numHashBytes) + continue; + + // if (p->hashBufPos != p->hashBufPosLimit) exit(1); + + /* (avail < p->numHashBytes) + It means that stream was finished. + And (avail) - is a number of remaining bytes, + we fill (d) for (avail) bytes for LZ_THREAD (receiver). + but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ + + /* here we suppose that we have space enough: + (kMtBtBlockSize - curPos >= p->hashNumAvail) */ + p->hashNumAvail = 0; + d[0] = curPos + avail; + d += curPos; + for (; avail != 0; avail--) + *d++ = 0; + return; + } + { + UInt32 size = p->hashBufPosLimit - p->hashBufPos; + UInt32 pos = p->pos; + UInt32 cyclicBufferPos = p->cyclicBufferPos; + UInt32 lenLimit = p->matchMaxLen; + if (lenLimit >= p->hashNumAvail) + lenLimit = p->hashNumAvail; + { + UInt32 size2 = p->hashNumAvail - lenLimit + 1; + if (size2 < size) + size = size2; + size2 = p->cyclicBufferSize - cyclicBufferPos; + if (size2 < size) + size = size2; + } + + if (pos > (UInt32)kMtMaxValForNormalize - size) + { + const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); + pos -= subValue; + p->pos = pos; + MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); + } + + #ifndef MFMT_GM_INLINE + while (curPos < limit && size-- != 0) + { + UInt32 *startDistances = d + curPos; + UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], + pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + startDistances + 1, p->numHashBytes - 1) - startDistances); + *startDistances = num - 1; + curPos += num; + cyclicBufferPos++; + pos++; + p->buffer++; + } + #else + { + UInt32 posRes = pos; + const UInt32 *d_end; + { + d_end = GetMatchesSpecN_2( + p->buffer + lenLimit - 1, + pos, p->buffer, p->son, p->cutValue, d + curPos, + p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, p->hashBuf + p->hashBufPos + size, + cyclicBufferPos, p->cyclicBufferSize, + &posRes); + } + { + if (!d_end) + { + // printf("\n == 2 BtGetMatches() p->failure_BT\n"); + // internal data failure + p->failure_BT = True; + d[0] = 0; + // d[1] = 0; + return; + } + } + curPos = (UInt32)(d_end - d); + { + const UInt32 processed = posRes - pos; + pos = posRes; + p->hashBufPos += processed; + cyclicBufferPos += processed; + p->buffer += processed; + } + } + #endif + + { + const UInt32 processed = pos - p->pos; + numProcessed += processed; + p->hashNumAvail -= processed; + p->pos = pos; + } + if (cyclicBufferPos == p->cyclicBufferSize) + cyclicBufferPos = 0; + p->cyclicBufferPos = cyclicBufferPos; + } + } + + d[0] = curPos; +} + + +static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) +{ + CMtSync *sync = &p->hashSync; + + BUFFER_MUST_BE_UNLOCKED(sync) + + if (!sync->needStart) + { + LOCK_BUFFER(sync) + } + + BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); + + /* We suppose that we have called GetNextBlock() from start. + So buffer is LOCKED */ + + UNLOCK_BUFFER(sync) +} + + +Z7_NO_INLINE +static void BtThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->btSync; + for (;;) + { + UInt32 blockIndex = 0; + Event_Wait(&p->canStart); + + for (;;) + { + PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); + /* (p->exit == true) is possible after (p->canStart) at first loop iteration + and is unexpected after more Wait(freeSemaphore) iterations */ + if (p->exit) + return; + + Semaphore_Wait(&p->freeSemaphore); + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + + BtFillBlock(mt, blockIndex++); + + Semaphore_Release1(&p->filledSemaphore); + } + + // we stop HASH_THREAD here + MtSync_StopWriting(&mt->hashSync); + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } +} + + +void MatchFinderMt_Construct(CMatchFinderMt *p) +{ + p->hashBuf = NULL; + MtSync_Construct(&p->hashSync); + MtSync_Construct(&p->btSync); +} + +static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hashBuf); + p->hashBuf = NULL; +} + +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) +{ + /* + HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. + So we must be sure that HASH_THREAD will not use CriticalSection(s) + after deleting CriticalSection here. + + we call ReleaseStream(p) + that calls StopWriting(btSync) + that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. + after StopWriting() it's safe to destruct MtSync(s) in any order */ + + MatchFinderMt_ReleaseStream(p); + + MtSync_Destruct(&p->btSync); + MtSync_Destruct(&p->hashSync); + + LOG_ITER( + printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", + (UInt32)(g_NumIters_Tree / 1000), + (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), + (UInt32)(g_NumIters_Loop / 1000), + (UInt32)(g_NumIters_Bytes / 1000) + )); + + MatchFinderMt_FreeMem(p, alloc); +} + + +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + + +static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } +static THREAD_FUNC_DECL BtThreadFunc2(void *p) +{ + Byte allocaDummy[0x180]; + unsigned i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)0; + if (allocaDummy[0] == 0) + BtThreadFunc((CMatchFinderMt *)p); + return 0; +} + + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) +{ + CMatchFinder *mf = MF(p); + p->historySize = historySize; + if (kMtBtBlockSize <= matchMaxLen * 4) + return SZ_ERROR_PARAM; + if (!p->hashBuf) + { + p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); + if (!p->hashBuf) + return SZ_ERROR_MEM; + p->btBuf = p->hashBuf + kHashBufferSize; + } + keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); + keepAddBufferAfter += kMtHashBlockSize; + if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) + return SZ_ERROR_MEM; + + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)) + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)) + return SZ_OK; +} + + +SRes MatchFinderMt_InitMt(CMatchFinderMt *p) +{ + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)) + return MtSync_Init(&p->btSync, kMtBtNumBlocks); +} + + +static void MatchFinderMt_Init(void *_p) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + CMatchFinder *mf = MF(p); + + p->btBufPos = + p->btBufPosLimit = NULL; + p->hashBufPos = + p->hashBufPosLimit = 0; + p->hashNumAvail = 0; // 21.03 + + p->failure_BT = False; + + /* Init without data reading. We don't want to read data in this thread */ + MatchFinder_Init_4(mf); + + MatchFinder_Init_LowHash(mf); + + p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); + p->btNumAvailBytes = 0; + p->failure_LZ_BT = False; + // p->failure_LZ_LZ = False; + + p->lzPos = + 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + p->hash = mf->hash; + p->fixedHashSize = mf->fixedHashSize; + // p->hash4Mask = mf->hash4Mask; + p->crc = mf->crc; + // memcpy(p->crc, mf->crc, sizeof(mf->crc)); + + p->son = mf->son; + p->matchMaxLen = mf->matchMaxLen; + p->numHashBytes = mf->numHashBytes; + + /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ + // mf->streamPos = mf->pos = 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + /* we must init (p->pos = mf->pos) for BT, because + BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ + p->pos = mf->pos; // do not change it + + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); + p->cyclicBufferSize = mf->cyclicBufferSize; + p->buffer = mf->buffer; + p->cutValue = mf->cutValue; + // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. +} + + +/* ReleaseStream is required to finish multithreading */ +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) +{ + // Sleep(1); // for debug + MtSync_StopWriting(&p->btSync); + // Sleep(200); // for debug + /* p->MatchFinder->ReleaseStream(); */ +} + + +Z7_NO_INLINE +static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) +{ + if (p->failure_LZ_BT) + p->btBufPos = p->failureBuf; + else + { + const UInt32 bi = MtSync_GetNextBlock(&p->btSync); + const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); + { + const UInt32 numItems = bt[0]; + p->btBufPosLimit = bt + numItems; + p->btNumAvailBytes = bt[1]; + p->btBufPos = bt + 2; + if (numItems < 2 || numItems > kMtBtBlockSize) + { + p->failureBuf[0] = 0; + p->btBufPos = p->failureBuf; + p->btBufPosLimit = p->failureBuf + 1; + p->failure_LZ_BT = True; + // p->btNumAvailBytes = 0; + /* we don't want to decrease AvailBytes, that was load before. + that can be unxepected for the code that have loaded anopther value before */ + } + } + + if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) + { + /* we don't check (lzPos) over exact avail bytes in (btBuf). + (fixedHashSize) is small, so normalization is fast */ + const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + p->lzPos -= subValue; + MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); + } + } + return p->btNumAvailBytes; +} + + + +static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + return p->pointerToCurPos; +} + + +#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + + +static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + if (p->btBufPos != p->btBufPosLimit) + return p->btNumAvailBytes; + return MatchFinderMt_GetNextBlock_Bt(p); +} + + +// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } +#define CHECK_FAILURE_LZ(_match_, _pos_) + +static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, c2; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH2_CALC + + c2 = hash[h2]; + hash[h2] = m; + + if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 2; + *d++ = m - c2 - 1; + } + } + + return d; +} + +static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + return d + 2; + } + d[0] = 2; + d += 2; + } + } + + if (c3 >= matchMinPos) + { + CHECK_FAILURE_LZ(c3, m) + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + } + + return d; +} + + +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; + +/* +static +UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + UInt32 matchMinPos; + UInt32 avail = p->btNumAvailBytes - 1; + p->btBufPos = btLim; + + { + p->btNumAvailBytes = avail; + + #define BT_HASH_BYTES_MAX 5 + + matchMinPos = p->lzPos; + + if (len != 0) + matchMinPos -= bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) + { + INCREASE_LZ_POS + return d; + } + else + { + const UInt32 hs = p->historySize; + if (matchMinPos > hs) + matchMinPos -= hs; + else + matchMinPos = 1; + } + } + + for (;;) + { + + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + d += 2; + break; + } + // else + { + d[0] = 2; + d += 2; + } + } + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + break; + } + + if (len != 0) + { + do + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while (bt != btLim); + } + INCREASE_LZ_POS + return d; +} +*/ + + +static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH3_CALC + // MT_HASH4_CALC + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + // c4 = (hash + kFix4HashSize)[h4]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + // (hash + kFix4HashSize)[h4] = m; + + // #define BT5_USE_H2 + // #ifdef BT5_USE_H2 + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; + // return d + 2; + + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + + #ifdef BT5_USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + return d; + } + d[0] = 2; + d += 2; + } + // #endif + + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c3 - 1; + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + } + + #ifdef BT5_USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + + return d; +} + + +static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + p->btBufPos = btLim; + p->btNumAvailBytes--; + INCREASE_LZ_POS + { + while (bt != btLim) + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + } + return d; +} + + + +static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + const UInt32 *bt = p->btBufPos; + UInt32 len = *bt++; + const UInt32 avail = p->btNumAvailBytes - 1; + p->btNumAvailBytes = avail; + p->btBufPos = bt + len; + if (len == 0) + { + #define BT_HASH_BYTES_MAX 5 + if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) + { + UInt32 m = p->lzPos; + if (m > p->historySize) + m -= p->historySize; + else + m = 1; + d = p->MixMatchesFunc(p, m, d); + } + } + else + { + /* + first match pair from BinTree: (match_len, match_dist), + (match_len >= numHashBytes). + MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) + */ + d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); + // if (d) // check for failure + do + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while (len -= 2); + } + INCREASE_LZ_POS + return d; +} + +#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED +#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; +#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); + +static void MatchFinderMt0_Skip(void *_p, UInt32 num) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + SKIP_HEADER2_MT { p->btNumAvailBytes--; + SKIP_FOOTER_MT +} + +static void MatchFinderMt2_Skip(void *_p, UInt32 num) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + SKIP_HEADER_MT(2) + UInt32 h2; + MT_HASH2_CALC + hash[h2] = p->lzPos; + SKIP_FOOTER_MT +} + +static void MatchFinderMt3_Skip(void *_p, UInt32 num) +{ + CMatchFinderMt *p = (CMatchFinderMt *)_p; + SKIP_HEADER_MT(3) + UInt32 h2, h3; + MT_HASH3_CALC + (hash + kFix3HashSize)[h3] = + hash[ h2] = + p->lzPos; + SKIP_FOOTER_MT +} + +/* +// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). +// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. + +static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(4) + UInt32 h2, h3; // h4 + MT_HASH3_CALC + // MT_HASH4_CALC + // (hash + kFix4HashSize)[h4] = + (hash + kFix3HashSize)[h3] = + hash[ h2] = + p->lzPos; + SKIP_FOOTER_MT +} +*/ + +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) +{ + vTable->Init = MatchFinderMt_Init; + vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = MatchFinderMt_GetMatches; + + switch (MF(p)->numHashBytes) + { + case 2: + p->GetHeadsFunc = GetHeads2; + p->MixMatchesFunc = NULL; + vTable->Skip = MatchFinderMt0_Skip; + vTable->GetMatches = MatchFinderMt2_GetMatches; + break; + case 3: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; + p->MixMatchesFunc = MixMatches2; + vTable->Skip = MatchFinderMt2_Skip; + break; + case 4: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; + + // it's fast inline version of GetMatches() + // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4; + + p->MixMatchesFunc = MixMatches3; + vTable->Skip = MatchFinderMt3_Skip; + break; + default: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; + p->MixMatchesFunc = MixMatches4; + vTable->Skip = + MatchFinderMt3_Skip; + // MatchFinderMt4_Skip; + break; + } +} + +#undef RINOK_THREAD +#undef PRF +#undef MF +#undef GetUi24hi_from32 +#undef LOCK_BUFFER +#undef UNLOCK_BUFFER diff --git a/crnlib/lzma/LzFindMt.h b/crnlib/lzma/LzFindMt.h new file mode 100644 index 00000000..0eb2a430 --- /dev/null +++ b/crnlib/lzma/LzFindMt.h @@ -0,0 +1,112 @@ +/* LzFindMt.h -- multithreaded Match finder for LZ algorithms +2024-01-22 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZ_FIND_MT_H +#define ZIP7_INC_LZ_FIND_MT_H + +#include "LzFind.h" +#include "Threads.h" + +EXTERN_C_BEGIN + +typedef struct +{ + UInt32 numProcessedBlocks; + CThread thread; + UInt64 affinity; + + BoolInt wasCreated; + BoolInt needStart; + BoolInt csWasInitialized; + BoolInt csWasEntered; + + BoolInt exit; + BoolInt stopWriting; + + CAutoResetEvent canStart; + CAutoResetEvent wasStopped; + CSemaphore freeSemaphore; + CSemaphore filledSemaphore; + CCriticalSection cs; + // UInt32 numBlocks_Sent; +} CMtSync; + + +struct CMatchFinderMt_; + +typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances); + +/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ +#define kMtCacheLineDummy 128 + +typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); + +typedef struct CMatchFinderMt_ +{ + /* LZ */ + const Byte *pointerToCurPos; + UInt32 *btBuf; + const UInt32 *btBufPos; + const UInt32 *btBufPosLimit; + UInt32 lzPos; + UInt32 btNumAvailBytes; + + UInt32 *hash; + UInt32 fixedHashSize; + // UInt32 hash4Mask; + UInt32 historySize; + const UInt32 *crc; + + Mf_Mix_Matches MixMatchesFunc; + UInt32 failure_LZ_BT; // failure in BT transfered to LZ + // UInt32 failure_LZ_LZ; // failure in LZ tables + UInt32 failureBuf[1]; + // UInt32 crc[256]; + + /* LZ + BT */ + CMtSync btSync; + Byte btDummy[kMtCacheLineDummy]; + + /* BT */ + UInt32 *hashBuf; + UInt32 hashBufPos; + UInt32 hashBufPosLimit; + UInt32 hashNumAvail; + UInt32 failure_BT; + + + CLzRef *son; + UInt32 matchMaxLen; + UInt32 numHashBytes; + UInt32 pos; + const Byte *buffer; + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + UInt32 cutValue; + + /* BT + Hash */ + CMtSync hashSync; + /* Byte hashDummy[kMtCacheLineDummy]; */ + + /* Hash */ + Mf_GetHeads GetHeadsFunc; + CMatchFinder *MatchFinder; + // CMatchFinder MatchFinder; +} CMatchFinderMt; + +// only for Mt part +void MatchFinderMt_Construct(CMatchFinderMt *p); +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable); + +/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */ +SRes MatchFinderMt_InitMt(CMatchFinderMt *p); +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/LzFindOpt.c b/crnlib/lzma/LzFindOpt.c new file mode 100644 index 00000000..e590aac1 --- /dev/null +++ b/crnlib/lzma/LzFindOpt.c @@ -0,0 +1,578 @@ +/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "LzFind.h" + +// #include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +// #define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +UInt64 g_NumIters_Tree; +UInt64 g_NumIters_Loop; +UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +// ---------- BT THREAD ---------- + +#define USE_SON_PREFETCH +#define USE_LONG_MATCH_OPT + +#define kEmptyHashValue 0 + +// #define CYC_TO_POS_OFFSET 0 + +// #define CYC_TO_POS_OFFSET 1 // for debug + +/* +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) +{ + do + { + UInt32 delta; + if (hash == size) + break; + delta = *hash++; + + if (delta == 0 || delta > (UInt32)pos) + return NULL; + + lenLimit++; + + if (delta == (UInt32)pos) + { + CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; + CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + + const Byte *len0 = cur, *len1 = cur; + UInt32 cutValue = _cutValue; + const Byte *maxLen = cur + _maxLen; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + hash++; + pos++; + cur++; + lenLimit++; + { + CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; + #else + const UInt32 p0 = ptr[0 + (diff * 2)]; + const UInt32 p1 = ptr[1 + (diff * 2)]; + ptr[0] = p0; + ptr[1] = p1; + // ptr[0] = ptr[0 + (diff * 2)]; + // ptr[1] = ptr[1 + (diff * 2)]; + #endif + } + // PrintSon(son + 2, pos - 1); + // printf("\npos = %x delta = %x\n", pos, delta); + len++; + *d++ = 2; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + if (delta >= curMatch) + return NULL; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + if (delta >= curMatch) + return NULL; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= pos) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ + +/* define cbs if you use 2 functions. + GetMatchesSpecN_1() : (pos < _cyclicBufferSize) + GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) + + do not define cbs if you use 1 function: + GetMatchesSpecN_2() +*/ + +// #define cbs _cyclicBufferSize + +/* + we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + lenLimit++; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + + UInt32 cutValue = _cutValue; + const Byte *len0 = cur, *len1 = cur; + const Byte *maxLen = cur + _maxLen; + + // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // SPEC code + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + *d++ = 2; + *d++ = (UInt32)(lenLimit - cur); + *d++ = delta - 1; + cur++; + lenLimit++; + // SPEC + _cyclicBufferPos++; + { + // SPEC code + CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); + // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + pos++; + hash++; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } // for() end for long matches + } + #endif + + break; // break from TREE iterations + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + if (delta >= curMatch) + return NULL; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + if (delta >= curMatch) + return NULL; + } + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= cbs) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} + + + +/* +typedef UInt32 uint32plus; // size_t + +UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + UInt32 *_distances = ++d; + uint32plus len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + uint32plus maxLen = _maxLen; + // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + const Byte *pb = cur - delta; + uint32plus len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + { + const UInt32 pair1 = pair[1]; + *ptr0 = pair1; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + } + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + for (;;) + { + *d++ = 2; + *d++ = (UInt32)lenLimit; + *d++ = delta - 1; + _cyclicBufferPos++; + { + CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + hash++; + pos++; + cur++; + pb++; + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; + if (pb[len] < cur[len]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + { + if (delta >= curMatch) + return NULL; + delta = (UInt32)pos - delta; + if (delta >= cbs + // delta >= _cyclicBufferSize || delta >= pos + || --cutValue == 0) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ diff --git a/crnlib/lzma/LzHash.h b/crnlib/lzma/LzHash.h new file mode 100644 index 00000000..3b172ebc --- /dev/null +++ b/crnlib/lzma/LzHash.h @@ -0,0 +1,34 @@ +/* LzHash.h -- HASH constants for LZ algorithms +2023-03-05 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZ_HASH_H +#define ZIP7_INC_LZ_HASH_H + +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +// #define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ + +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 + +#endif diff --git a/crnlib/lzma/Lzma2Dec.c b/crnlib/lzma/Lzma2Dec.c new file mode 100644 index 00000000..64420ef0 --- /dev/null +++ b/crnlib/lzma/Lzma2Dec.c @@ -0,0 +1,493 @@ +/* Lzma2Dec.c -- LZMA2 Decoder +2024-03-01 : Igor Pavlov : Public domain */ + +/* #define SHOW_DEBUG_INFO */ + +#include "Precomp.h" + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include + +#include "Lzma2Dec.h" + +/* +00000000 - End of data +00000001 U U - Uncompressed, reset dic, need reset state and set new prop +00000010 U U - Uncompressed, no reset +100uuuuu U U P P - LZMA, no reset +101uuuuu U U P P - LZMA, reset state +110uuuuu U U P P S - LZMA, reset state + set new prop +111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic + + u, U - Unpack Size + P - Pack Size + S - Props +*/ + +#define LZMA2_CONTROL_COPY_RESET_DIC 1 + +#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0) + +#define LZMA2_LCLP_MAX 4 +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +typedef enum +{ + LZMA2_STATE_CONTROL, + LZMA2_STATE_UNPACK0, + LZMA2_STATE_UNPACK1, + LZMA2_STATE_PACK0, + LZMA2_STATE_PACK1, + LZMA2_STATE_PROP, + LZMA2_STATE_DATA, + LZMA2_STATE_DATA_CONT, + LZMA2_STATE_FINISHED, + LZMA2_STATE_ERROR +} ELzma2State; + +static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) +{ + UInt32 dicSize; + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); + props[0] = (Byte)LZMA2_LCLP_MAX; + props[1] = (Byte)(dicSize); + props[2] = (Byte)(dicSize >> 8); + props[3] = (Byte)(dicSize >> 16); + props[4] = (Byte)(dicSize >> 24); + return SZ_OK; +} + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)) + return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +void Lzma2Dec_Init(CLzma2Dec *p) +{ + p->state = LZMA2_STATE_CONTROL; + p->needInitLevel = 0xE0; + p->isExtraMode = False; + p->unpackSize = 0; + + // p->decoder.dicPos = 0; // we can use it instead of full init + LzmaDec_Init(&p->decoder); +} + +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +{ + switch (p->state) + { + case LZMA2_STATE_CONTROL: + p->isExtraMode = False; + p->control = b; + PRF(printf("\n %8X", (unsigned)p->decoder.dicPos)); + PRF(printf(" %02X", (unsigned)b)); + if (b == 0) + return LZMA2_STATE_FINISHED; + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (b == LZMA2_CONTROL_COPY_RESET_DIC) + p->needInitLevel = 0xC0; + else if (b > 2 || p->needInitLevel == 0xE0) + return LZMA2_STATE_ERROR; + } + else + { + if (b < p->needInitLevel) + return LZMA2_STATE_ERROR; + p->needInitLevel = 0; + p->unpackSize = (UInt32)(b & 0x1F) << 16; + } + return LZMA2_STATE_UNPACK0; + + case LZMA2_STATE_UNPACK0: + p->unpackSize |= (UInt32)b << 8; + return LZMA2_STATE_UNPACK1; + + case LZMA2_STATE_UNPACK1: + p->unpackSize |= (UInt32)b; + p->unpackSize++; + PRF(printf(" %7u", (unsigned)p->unpackSize)); + return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; + + case LZMA2_STATE_PACK0: + p->packSize = (UInt32)b << 8; + return LZMA2_STATE_PACK1; + + case LZMA2_STATE_PACK1: + p->packSize |= (UInt32)b; + p->packSize++; + // if (p->packSize < 5) return LZMA2_STATE_ERROR; + PRF(printf(" %5u", (unsigned)p->packSize)); + return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA; + + case LZMA2_STATE_PROP: + { + unsigned lc, lp; + if (b >= (9 * 5 * 5)) + return LZMA2_STATE_ERROR; + lc = b % 9; + b /= 9; + p->decoder.prop.pb = (Byte)(b / 5); + lp = b % 5; + if (lc + lp > LZMA2_LCLP_MAX) + return LZMA2_STATE_ERROR; + p->decoder.prop.lc = (Byte)lc; + p->decoder.prop.lp = (Byte)lp; + return LZMA2_STATE_DATA; + } + + default: + return LZMA2_STATE_ERROR; + } +} + +static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) +{ + memcpy(p->dic + p->dicPos, src, size); + p->dicPos += size; + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) + p->checkDicSize = p->prop.dicSize; + p->processedPos += (UInt32)size; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); + + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->state != LZMA2_STATE_ERROR) + { + SizeT dicPos; + + if (p->state == LZMA2_STATE_FINISHED) + { + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + dicPos = p->decoder.dicPos; + + if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + (*srcLen)++; + p->state = Lzma2Dec_UpdateState(p, *src++); + if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED) + break; + continue; + } + + { + SizeT inCur = inSize - *srcLen; + SizeT outCur = dicLimit - dicPos; + ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; + + if (outCur >= p->unpackSize) + { + outCur = (SizeT)p->unpackSize; + curFinishMode = LZMA_FINISH_END; + } + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); + LzmaDec_InitDicAndState(&p->decoder, initDic, False); + } + + if (inCur > outCur) + inCur = outCur; + if (inCur == 0) + break; + + LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur); + + src += inCur; + *srcLen += inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + SRes res; + + if (p->state == LZMA2_STATE_DATA) + { + BoolInt initDic = (p->control >= 0xE0); + BoolInt initState = (p->control >= 0xA0); + LzmaDec_InitDicAndState(&p->decoder, initDic, initState); + p->state = LZMA2_STATE_DATA_CONT; + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + outCur = p->decoder.dicPos - dicPos; + p->unpackSize -= (UInt32)outCur; + + if (res != 0) + break; + + if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) + { + if (p->packSize == 0) + break; + return SZ_OK; + } + + if (inCur == 0 && outCur == 0) + { + if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + || p->unpackSize != 0 + || p->packSize != 0) + break; + p->state = LZMA2_STATE_CONTROL; + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + } + } + } + + *status = LZMA_STATUS_NOT_SPECIFIED; + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; +} + + + + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, + const Byte *src, SizeT *srcLen, + int checkFinishBlock) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + + while (p->state != LZMA2_STATE_ERROR) + { + if (p->state == LZMA2_STATE_FINISHED) + return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK; + + if (outSize == 0 && !checkFinishBlock) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + (*srcLen)++; + + p->state = Lzma2Dec_UpdateState(p, *src++); + + if (p->state == LZMA2_STATE_UNPACK0) + { + // if (p->decoder.dicPos != 0) + if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0) + return LZMA2_PARSE_STATUS_NEW_BLOCK; + // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED; + } + + // The following code can be commented. + // It's not big problem, if we read additional input bytes. + // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state. + + if (outSize == 0 && p->state != LZMA2_STATE_FINISHED) + { + // checkFinishBlock is true. So we expect that block must be finished, + // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here + // break; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + } + + if (p->state == LZMA2_STATE_DATA) + return LZMA2_PARSE_STATUS_NEW_CHUNK; + + continue; + } + + if (outSize == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; + + { + SizeT inCur = inSize - *srcLen; + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + if (inCur > p->unpackSize) + inCur = p->unpackSize; + if (inCur > outSize) + inCur = outSize; + p->decoder.dicPos += inCur; + src += inCur; + *srcLen += inCur; + outSize -= inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + p->isExtraMode = True; + + if (inCur == 0) + { + if (p->packSize != 0) + return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; + } + else if (p->state == LZMA2_STATE_DATA) + { + p->state = LZMA2_STATE_DATA_CONT; + if (*src != 0) + { + // first byte of lzma chunk must be Zero + *srcLen += 1; + p->packSize--; + break; + } + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + + if (p->packSize == 0) + { + SizeT rem = outSize; + if (rem > p->unpackSize) + rem = p->unpackSize; + p->decoder.dicPos += rem; + p->unpackSize -= (UInt32)rem; + outSize -= rem; + if (p->unpackSize == 0) + p->state = LZMA2_STATE_CONTROL; + } + } + } + } + + p->state = LZMA2_STATE_ERROR; + return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED; +} + + + + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen, inSize = *srcLen; + *srcLen = *destLen = 0; + + for (;;) + { + SizeT inCur = inSize, outCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + + if (p->decoder.dicPos == p->decoder.dicBufSize) + p->decoder.dicPos = 0; + dicPos = p->decoder.dicPos; + curFinishMode = LZMA_FINISH_ANY; + outCur = p->decoder.dicBufSize - dicPos; + + if (outCur >= outSize) + { + outCur = outSize; + curFinishMode = finishMode; + } + + res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status); + + src += inCur; + inSize -= inCur; + *srcLen += inCur; + outCur = p->decoder.dicPos - dicPos; + memcpy(dest, p->decoder.dic + dicPos, outCur); + dest += outCur; + outSize -= outCur; + *destLen += outCur; + if (res != 0) + return res; + if (outCur == 0 || outSize == 0) + return SZ_OK; + } +} + + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzma2Dec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + Lzma2Dec_CONSTRUCT(&p) + RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)) + p.decoder.dic = dest; + p.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&p); + *srcLen = inSize; + res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.decoder.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + Lzma2Dec_FreeProbs(&p, alloc); + return res; +} + +#undef PRF diff --git a/crnlib/lzma/Lzma2Dec.h b/crnlib/lzma/Lzma2Dec.h new file mode 100644 index 00000000..9b15ab04 --- /dev/null +++ b/crnlib/lzma/Lzma2Dec.h @@ -0,0 +1,121 @@ +/* Lzma2Dec.h -- LZMA2 Decoder +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA2_DEC_H +#define ZIP7_INC_LZMA2_DEC_H + +#include "LzmaDec.h" + +EXTERN_C_BEGIN + +/* ---------- State Interface ---------- */ + +typedef struct +{ + unsigned state; + Byte control; + Byte needInitLevel; + Byte isExtraMode; + Byte _pad_; + UInt32 packSize; + UInt32 unpackSize; + CLzmaDec decoder; +} CLzma2Dec; + +#define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder) +#define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p) +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); +void Lzma2Dec_Init(CLzma2Dec *p); + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen or dicLimit). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + SZ_ERROR_DATA - Data error +*/ + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- LZMA2 block and chunk parsing ---------- */ + +/* +Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data. +It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code: + - LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input. + - LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read. + CLzma2Dec::unpackSize contains unpack size of that chunk +*/ + +typedef enum +{ +/* + LZMA_STATUS_NOT_SPECIFIED // data error + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED // + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused +*/ + LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1, + LZMA2_PARSE_STATUS_NEW_CHUNK +} ELzma2ParseStatus; + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, // output size + const Byte *src, SizeT *srcLen, + int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position. + ); + +/* +LZMA2 parser doesn't decode LZMA chunks, so we must read + full input LZMA chunk to decode some part of LZMA chunk. + +Lzma2Dec_GetUnpackExtra() returns the value that shows + max possible number of output bytes that can be output by decoder + at current input positon. +*/ + +#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0) + + +/* ---------- One Call Interface ---------- */ + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Lzma2DecMt.c b/crnlib/lzma/Lzma2DecMt.c new file mode 100644 index 00000000..e1038a12 --- /dev/null +++ b/crnlib/lzma/Lzma2DecMt.c @@ -0,0 +1,1095 @@ +/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread +2023-04-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #define SHOW_DEBUG_INFO +// #define Z7_ST + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include "Alloc.h" + +#include "Lzma2Dec.h" +#include "Lzma2DecMt.h" + +#ifndef Z7_ST +#include "MtDec.h" + +#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28) +#endif + + +#ifndef Z7_ST +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif +#define PRF_STR(s) PRF(printf("\n" s "\n");) +#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2);) +#endif + + +void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) +{ + p->inBufSize_ST = 1 << 20; + p->outStep_ST = 1 << 20; + + #ifndef Z7_ST + p->numThreads = 1; + p->inBufSize_MT = 1 << 18; + p->outBlockMax = LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT; + p->inBlockMax = p->outBlockMax + p->outBlockMax / 16; + #endif +} + + + +#ifndef Z7_ST + +/* ---------- CLzma2DecMtThread ---------- */ + +typedef struct +{ + CLzma2Dec dec; + Byte dec_created; + Byte needInit; + + Byte *outBuf; + size_t outBufSize; + + EMtDecParseState state; + ELzma2ParseStatus parseStatus; + + size_t inPreSize; + size_t outPreSize; + + size_t inCodeSize; + size_t outCodeSize; + SRes codeRes; + + CAlignOffsetAlloc alloc; + + Byte mtPad[1 << 7]; +} CLzma2DecMtThread; + +#endif + + +/* ---------- CLzma2DecMt ---------- */ + +struct CLzma2DecMt +{ + // ISzAllocPtr alloc; + ISzAllocPtr allocMid; + + CAlignOffsetAlloc alignOffsetAlloc; + CLzma2DecMtProps props; + Byte prop; + + ISeqInStreamPtr inStream; + ISeqOutStreamPtr outStream; + ICompressProgressPtr progress; + + BoolInt finishMode; + BoolInt outSize_Defined; + UInt64 outSize; + + UInt64 outProcessed; + UInt64 inProcessed; + BoolInt readWasFinished; + SRes readRes; + + Byte *inBuf; + size_t inBufSize; + Byte dec_created; + CLzma2Dec dec; + + size_t inPos; + size_t inLim; + + #ifndef Z7_ST + UInt64 outProcessed_Parse; + BoolInt mtc_WasConstructed; + CMtDec mtc; + CLzma2DecMtThread coders[MTDEC_THREADS_MAX]; + #endif +}; + + + +CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) +{ + CLzma2DecMt *p = (CLzma2DecMt *)ISzAlloc_Alloc(alloc, sizeof(CLzma2DecMt)); + if (!p) + return NULL; + + // p->alloc = alloc; + p->allocMid = allocMid; + + AlignOffsetAlloc_CreateVTable(&p->alignOffsetAlloc); + p->alignOffsetAlloc.numAlignBits = 7; + p->alignOffsetAlloc.offset = 0; + p->alignOffsetAlloc.baseAlloc = alloc; + + p->inBuf = NULL; + p->inBufSize = 0; + p->dec_created = False; + + // Lzma2DecMtProps_Init(&p->props); + + #ifndef Z7_ST + p->mtc_WasConstructed = False; + { + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CLzma2DecMtThread *t = &p->coders[i]; + t->dec_created = False; + t->outBuf = NULL; + t->outBufSize = 0; + } + } + #endif + + return (CLzma2DecMtHandle)(void *)p; +} + + +#ifndef Z7_ST + +static void Lzma2DecMt_FreeOutBufs(CLzma2DecMt *p) +{ + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CLzma2DecMtThread *t = &p->coders[i]; + if (t->outBuf) + { + ISzAlloc_Free(p->allocMid, t->outBuf); + t->outBuf = NULL; + t->outBufSize = 0; + } + } +} + +#endif + + +static void Lzma2DecMt_FreeSt(CLzma2DecMt *p) +{ + if (p->dec_created) + { + Lzma2Dec_Free(&p->dec, &p->alignOffsetAlloc.vt); + p->dec_created = False; + } + if (p->inBuf) + { + ISzAlloc_Free(p->allocMid, p->inBuf); + p->inBuf = NULL; + } + p->inBufSize = 0; +} + + +// #define GET_CLzma2DecMt_p CLzma2DecMt *p = (CLzma2DecMt *)(void *)pp; + +void Lzma2DecMt_Destroy(CLzma2DecMtHandle p) +{ + // GET_CLzma2DecMt_p + + Lzma2DecMt_FreeSt(p); + + #ifndef Z7_ST + + if (p->mtc_WasConstructed) + { + MtDec_Destruct(&p->mtc); + p->mtc_WasConstructed = False; + } + { + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CLzma2DecMtThread *t = &p->coders[i]; + if (t->dec_created) + { + // we don't need to free dict here + Lzma2Dec_FreeProbs(&t->dec, &t->alloc.vt); // p->alloc !!! + t->dec_created = False; + } + } + } + Lzma2DecMt_FreeOutBufs(p); + + #endif + + ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p); +} + + + +#ifndef Z7_ST + +static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) +{ + CLzma2DecMt *me = (CLzma2DecMt *)obj; + CLzma2DecMtThread *t = &me->coders[coderIndex]; + + PRF_STR_INT_2("Parse", coderIndex, cc->srcSize) + + cc->state = MTDEC_PARSE_CONTINUE; + + if (cc->startCall) + { + if (!t->dec_created) + { + Lzma2Dec_CONSTRUCT(&t->dec) + t->dec_created = True; + AlignOffsetAlloc_CreateVTable(&t->alloc); + { + /* (1 << 12) is expected size of one way in data cache. + We optimize alignment for cache line size of 128 bytes and smaller */ + const unsigned kNumAlignBits = 12; + const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */ + t->alloc.numAlignBits = kNumAlignBits; + t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits)); + t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc; + } + } + Lzma2Dec_Init(&t->dec); + + t->inPreSize = 0; + t->outPreSize = 0; + // t->blockWasFinished = False; + // t->finishedWithMark = False; + t->parseStatus = (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED; + t->state = MTDEC_PARSE_CONTINUE; + + t->inCodeSize = 0; + t->outCodeSize = 0; + t->codeRes = SZ_OK; + + // (cc->srcSize == 0) is allowed + } + + { + ELzma2ParseStatus status; + BoolInt overflow; + UInt32 unpackRem = 0; + + int checkFinishBlock = True; + size_t limit = me->props.outBlockMax; + if (me->outSize_Defined) + { + UInt64 rem = me->outSize - me->outProcessed_Parse; + if (limit >= rem) + { + limit = (size_t)rem; + if (!me->finishMode) + checkFinishBlock = False; + } + } + + // checkFinishBlock = False, if we want to decode partial data + // that must be finished at position <= outBlockMax. + + { + const size_t srcOrig = cc->srcSize; + SizeT srcSize_Point = 0; + SizeT dicPos_Point = 0; + + cc->srcSize = 0; + overflow = False; + + for (;;) + { + SizeT srcCur = (SizeT)(srcOrig - cc->srcSize); + + status = Lzma2Dec_Parse(&t->dec, + (SizeT)limit - t->dec.decoder.dicPos, + cc->src + cc->srcSize, &srcCur, + checkFinishBlock); + + cc->srcSize += srcCur; + + if (status == LZMA2_PARSE_STATUS_NEW_CHUNK) + { + if (t->dec.unpackSize > me->props.outBlockMax - t->dec.decoder.dicPos) + { + overflow = True; + break; + } + continue; + } + + if (status == LZMA2_PARSE_STATUS_NEW_BLOCK) + { + if (t->dec.decoder.dicPos == 0) + continue; + // we decode small blocks in one thread + if (t->dec.decoder.dicPos >= (1 << 14)) + break; + dicPos_Point = t->dec.decoder.dicPos; + srcSize_Point = (SizeT)cc->srcSize; + continue; + } + + if ((int)status == LZMA_STATUS_NOT_FINISHED && checkFinishBlock + // && limit == t->dec.decoder.dicPos + // && limit == me->props.outBlockMax + ) + { + overflow = True; + break; + } + + unpackRem = Lzma2Dec_GetUnpackExtra(&t->dec); + break; + } + + if (dicPos_Point != 0 + && (int)status != LZMA2_PARSE_STATUS_NEW_BLOCK + && (int)status != LZMA_STATUS_FINISHED_WITH_MARK + && (int)status != LZMA_STATUS_NOT_SPECIFIED) + { + // we revert to latest newBlock state + status = LZMA2_PARSE_STATUS_NEW_BLOCK; + unpackRem = 0; + t->dec.decoder.dicPos = dicPos_Point; + cc->srcSize = srcSize_Point; + overflow = False; + } + } + + t->inPreSize += cc->srcSize; + t->parseStatus = status; + + if (overflow) + cc->state = MTDEC_PARSE_OVERFLOW; + else + { + size_t dicPos = t->dec.decoder.dicPos; + + if ((int)status != LZMA_STATUS_NEEDS_MORE_INPUT) + { + if (status == LZMA2_PARSE_STATUS_NEW_BLOCK) + { + cc->state = MTDEC_PARSE_NEW; + cc->srcSize--; // we don't need control byte of next block + t->inPreSize--; + } + else + { + cc->state = MTDEC_PARSE_END; + if ((int)status != LZMA_STATUS_FINISHED_WITH_MARK) + { + // (status == LZMA_STATUS_NOT_SPECIFIED) + // (status == LZMA_STATUS_NOT_FINISHED) + if (unpackRem != 0) + { + /* we also reserve space for max possible number of output bytes of current LZMA chunk */ + size_t rem = limit - dicPos; + if (rem > unpackRem) + rem = unpackRem; + dicPos += rem; + } + } + } + + me->outProcessed_Parse += dicPos; + } + + cc->outPos = dicPos; + t->outPreSize = (size_t)dicPos; + } + + t->state = cc->state; + return; + } +} + + +static SRes Lzma2DecMt_MtCallback_PreCode(void *pp, unsigned coderIndex) +{ + CLzma2DecMt *me = (CLzma2DecMt *)pp; + CLzma2DecMtThread *t = &me->coders[coderIndex]; + Byte *dest = t->outBuf; + + if (t->inPreSize == 0) + { + t->codeRes = SZ_ERROR_DATA; + return t->codeRes; + } + + if (!dest || t->outBufSize < t->outPreSize) + { + if (dest) + { + ISzAlloc_Free(me->allocMid, dest); + t->outBuf = NULL; + t->outBufSize = 0; + } + + dest = (Byte *)ISzAlloc_Alloc(me->allocMid, t->outPreSize + // + (1 << 28) + ); + // Sleep(200); + if (!dest) + return SZ_ERROR_MEM; + t->outBuf = dest; + t->outBufSize = t->outPreSize; + } + + t->dec.decoder.dic = dest; + t->dec.decoder.dicBufSize = (SizeT)t->outPreSize; + + t->needInit = True; + + return Lzma2Dec_AllocateProbs(&t->dec, me->prop, &t->alloc.vt); // alloc.vt +} + + +static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex, + const Byte *src, size_t srcSize, int srcFinished, + // int finished, int blockFinished, + UInt64 *inCodePos, UInt64 *outCodePos, int *stop) +{ + CLzma2DecMt *me = (CLzma2DecMt *)pp; + CLzma2DecMtThread *t = &me->coders[coderIndex]; + + UNUSED_VAR(srcFinished) + + PRF_STR_INT_2("Code", coderIndex, srcSize) + + *inCodePos = t->inCodeSize; + *outCodePos = 0; + *stop = True; + + if (t->needInit) + { + Lzma2Dec_Init(&t->dec); + t->needInit = False; + } + + { + ELzmaStatus status; + SizeT srcProcessed = (SizeT)srcSize; + BoolInt blockWasFinished = + ((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK + || t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK); + + SRes res = Lzma2Dec_DecodeToDic(&t->dec, + (SizeT)t->outPreSize, + src, &srcProcessed, + blockWasFinished ? LZMA_FINISH_END : LZMA_FINISH_ANY, + &status); + + t->codeRes = res; + + t->inCodeSize += srcProcessed; + *inCodePos = t->inCodeSize; + t->outCodeSize = t->dec.decoder.dicPos; + *outCodePos = t->dec.decoder.dicPos; + + if (res != SZ_OK) + return res; + + if (srcProcessed == srcSize) + *stop = False; + + if (blockWasFinished) + { + if (srcSize != srcProcessed) + return SZ_ERROR_FAIL; + + if (t->inPreSize == t->inCodeSize) + { + if (t->outPreSize != t->outCodeSize) + return SZ_ERROR_FAIL; + *stop = True; + } + } + else + { + if (t->outPreSize == t->outCodeSize) + *stop = True; + } + + return SZ_OK; + } +} + + +#define LZMA2DECMT_STREAM_WRITE_STEP (1 << 24) + +static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex, + BoolInt needWriteToStream, + const Byte *src, size_t srcSize, BoolInt isCross, + BoolInt *needContinue, BoolInt *canRecode) +{ + CLzma2DecMt *me = (CLzma2DecMt *)pp; + const CLzma2DecMtThread *t = &me->coders[coderIndex]; + size_t size = t->outCodeSize; + const Byte *data = t->outBuf; + BoolInt needContinue2 = True; + + UNUSED_VAR(src) + UNUSED_VAR(srcSize) + UNUSED_VAR(isCross) + + PRF_STR_INT_2("Write", coderIndex, srcSize) + + *needContinue = False; + *canRecode = True; + + if ( + // t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK + t->state == MTDEC_PARSE_OVERFLOW + || t->state == MTDEC_PARSE_END) + needContinue2 = False; + + + if (!needWriteToStream) + return SZ_OK; + + me->mtc.inProcessed += t->inCodeSize; + + if (t->codeRes == SZ_OK) + if ((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK + || t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK) + if (t->outPreSize != t->outCodeSize + || t->inPreSize != t->inCodeSize) + return SZ_ERROR_FAIL; + + *canRecode = False; + + if (me->outStream) + { + for (;;) + { + size_t cur = size; + size_t written; + if (cur > LZMA2DECMT_STREAM_WRITE_STEP) + cur = LZMA2DECMT_STREAM_WRITE_STEP; + + written = ISeqOutStream_Write(me->outStream, data, cur); + + me->outProcessed += written; + // me->mtc.writtenTotal += written; + if (written != cur) + return SZ_ERROR_WRITE; + data += cur; + size -= cur; + if (size == 0) + { + *needContinue = needContinue2; + return SZ_OK; + } + RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0)) + } + } + + return SZ_ERROR_FAIL; + /* + if (size > me->outBufSize) + return SZ_ERROR_OUTPUT_EOF; + memcpy(me->outBuf, data, size); + me->outBufSize -= size; + me->outBuf += size; + *needContinue = needContinue2; + return SZ_OK; + */ +} + +#endif + + +static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p) +{ + if (!p->dec_created) + { + Lzma2Dec_CONSTRUCT(&p->dec) + p->dec_created = True; + } + + RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt)) + + if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST) + { + ISzAlloc_Free(p->allocMid, p->inBuf); + p->inBufSize = 0; + p->inBuf = (Byte *)ISzAlloc_Alloc(p->allocMid, p->props.inBufSize_ST); + if (!p->inBuf) + return SZ_ERROR_MEM; + p->inBufSize = p->props.inBufSize_ST; + } + + Lzma2Dec_Init(&p->dec); + + return SZ_OK; +} + + +static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p + #ifndef Z7_ST + , BoolInt tMode + #endif + ) +{ + SizeT wrPos; + size_t inPos, inLim; + const Byte *inData; + UInt64 inPrev, outPrev; + + CLzma2Dec *dec; + + #ifndef Z7_ST + if (tMode) + { + Lzma2DecMt_FreeOutBufs(p); + tMode = MtDec_PrepareRead(&p->mtc); + } + #endif + + RINOK(Lzma2Dec_Prepare_ST(p)) + + dec = &p->dec; + + inPrev = p->inProcessed; + outPrev = p->outProcessed; + + inPos = 0; + inLim = 0; + inData = NULL; + wrPos = dec->decoder.dicPos; + + for (;;) + { + SizeT dicPos; + SizeT size; + ELzmaFinishMode finishMode; + SizeT inProcessed; + ELzmaStatus status; + SRes res; + + SizeT outProcessed; + BoolInt outFinished; + BoolInt needStop; + + if (inPos == inLim) + { + #ifndef Z7_ST + if (tMode) + { + inData = MtDec_Read(&p->mtc, &inLim); + inPos = 0; + if (inData) + continue; + tMode = False; + inLim = 0; + } + #endif + + if (!p->readWasFinished) + { + inPos = 0; + inLim = p->inBufSize; + inData = p->inBuf; + p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim); + // p->readProcessed += inLim; + // inLim -= 5; p->readWasFinished = True; // for test + if (inLim == 0 || p->readRes != SZ_OK) + p->readWasFinished = True; + } + } + + dicPos = dec->decoder.dicPos; + { + SizeT next = dec->decoder.dicBufSize; + if (next - wrPos > p->props.outStep_ST) + next = wrPos + (SizeT)p->props.outStep_ST; + size = next - dicPos; + } + + finishMode = LZMA_FINISH_ANY; + if (p->outSize_Defined) + { + const UInt64 rem = p->outSize - p->outProcessed; + if (size >= rem) + { + size = (SizeT)rem; + if (p->finishMode) + finishMode = LZMA_FINISH_END; + } + } + + inProcessed = (SizeT)(inLim - inPos); + + res = Lzma2Dec_DecodeToDic(dec, dicPos + size, inData + inPos, &inProcessed, finishMode, &status); + + inPos += inProcessed; + p->inProcessed += inProcessed; + outProcessed = dec->decoder.dicPos - dicPos; + p->outProcessed += outProcessed; + + outFinished = (p->outSize_Defined && p->outSize <= p->outProcessed); + + needStop = (res != SZ_OK + || (inProcessed == 0 && outProcessed == 0) + || status == LZMA_STATUS_FINISHED_WITH_MARK + || (!p->finishMode && outFinished)); + + if (needStop || outProcessed >= size) + { + SRes res2; + { + size_t writeSize = dec->decoder.dicPos - wrPos; + size_t written = ISeqOutStream_Write(p->outStream, dec->decoder.dic + wrPos, writeSize); + res2 = (written == writeSize) ? SZ_OK : SZ_ERROR_WRITE; + } + + if (dec->decoder.dicPos == dec->decoder.dicBufSize) + dec->decoder.dicPos = 0; + wrPos = dec->decoder.dicPos; + + RINOK(res2) + + if (needStop) + { + if (res != SZ_OK) + return res; + + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + { + if (p->finishMode) + { + if (p->outSize_Defined && p->outSize != p->outProcessed) + return SZ_ERROR_DATA; + } + return SZ_OK; + } + + if (!p->finishMode && outFinished) + return SZ_OK; + + if (status == LZMA_STATUS_NEEDS_MORE_INPUT) + return SZ_ERROR_INPUT_EOF; + + return SZ_ERROR_DATA; + } + } + + if (p->progress) + { + UInt64 inDelta = p->inProcessed - inPrev; + UInt64 outDelta = p->outProcessed - outPrev; + if (inDelta >= (1 << 22) || outDelta >= (1 << 22)) + { + RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed)) + inPrev = p->inProcessed; + outPrev = p->outProcessed; + } + } + } +} + + + +SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, + Byte prop, + const CLzma2DecMtProps *props, + ISeqOutStreamPtr outStream, const UInt64 *outDataSize, int finishMode, + // Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + // const Byte *inData, size_t inDataSize, + UInt64 *inProcessed, + // UInt64 *outProcessed, + int *isMT, + ICompressProgressPtr progress) +{ + // GET_CLzma2DecMt_p + #ifndef Z7_ST + BoolInt tMode; + #endif + + *inProcessed = 0; + + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + + p->prop = prop; + p->props = *props; + + p->inStream = inStream; + p->outStream = outStream; + p->progress = progress; + + p->outSize = 0; + p->outSize_Defined = False; + if (outDataSize) + { + p->outSize_Defined = True; + p->outSize = *outDataSize; + } + p->finishMode = finishMode; + + p->outProcessed = 0; + p->inProcessed = 0; + + p->readWasFinished = False; + p->readRes = SZ_OK; + + *isMT = False; + + + #ifndef Z7_ST + + tMode = False; + + // p->mtc.parseRes = SZ_OK; + + // p->mtc.numFilledThreads = 0; + // p->mtc.crossStart = 0; + // p->mtc.crossEnd = 0; + // p->mtc.allocError_for_Read_BlockIndex = 0; + // p->mtc.isAllocError = False; + + if (p->props.numThreads > 1) + { + IMtDecCallback2 vt; + + Lzma2DecMt_FreeSt(p); + + p->outProcessed_Parse = 0; + + if (!p->mtc_WasConstructed) + { + p->mtc_WasConstructed = True; + MtDec_Construct(&p->mtc); + } + + p->mtc.progress = progress; + p->mtc.inStream = inStream; + + // p->outBuf = NULL; + // p->outBufSize = 0; + /* + if (!outStream) + { + // p->outBuf = outBuf; + // p->outBufSize = *outBufSize; + // *outBufSize = 0; + return SZ_ERROR_PARAM; + } + */ + + // p->mtc.inBlockMax = p->props.inBlockMax; + p->mtc.alloc = &p->alignOffsetAlloc.vt; + // p->alignOffsetAlloc.baseAlloc; + // p->mtc.inData = inData; + // p->mtc.inDataSize = inDataSize; + p->mtc.mtCallback = &vt; + p->mtc.mtCallbackObject = p; + + p->mtc.inBufSize = p->props.inBufSize_MT; + + p->mtc.numThreadsMax = p->props.numThreads; + + *isMT = True; + + vt.Parse = Lzma2DecMt_MtCallback_Parse; + vt.PreCode = Lzma2DecMt_MtCallback_PreCode; + vt.Code = Lzma2DecMt_MtCallback_Code; + vt.Write = Lzma2DecMt_MtCallback_Write; + + { + BoolInt needContinue = False; + + SRes res = MtDec_Code(&p->mtc); + + /* + if (!outStream) + *outBufSize = p->outBuf - outBuf; + */ + + *inProcessed = p->mtc.inProcessed; + + needContinue = False; + + if (res == SZ_OK) + { + if (p->mtc.mtProgress.res != SZ_OK) + res = p->mtc.mtProgress.res; + else + needContinue = p->mtc.needContinue; + } + + if (!needContinue) + { + if (res == SZ_OK) + return p->mtc.readRes; + return res; + } + + tMode = True; + p->readRes = p->mtc.readRes; + p->readWasFinished = p->mtc.readWasFinished; + p->inProcessed = p->mtc.inProcessed; + + PRF_STR("----- decoding ST -----") + } + } + + #endif + + + *isMT = False; + + { + SRes res = Lzma2Dec_Decode_ST(p + #ifndef Z7_ST + , tMode + #endif + ); + + *inProcessed = p->inProcessed; + + // res = SZ_OK; // for test + if (res == SZ_ERROR_INPUT_EOF) + { + if (p->readRes != SZ_OK) + res = p->readRes; + } + else if (res == SZ_OK && p->readRes != SZ_OK) + res = p->readRes; + + /* + #ifndef Z7_ST + if (res == SZ_OK && tMode && p->mtc.parseRes != SZ_OK) + res = p->mtc.parseRes; + #endif + */ + + return res; + } +} + + +/* ---------- Read from CLzma2DecMtHandle Interface ---------- */ + +SRes Lzma2DecMt_Init(CLzma2DecMtHandle p, + Byte prop, + const CLzma2DecMtProps *props, + const UInt64 *outDataSize, int finishMode, + ISeqInStreamPtr inStream) +{ + // GET_CLzma2DecMt_p + + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + + p->prop = prop; + p->props = *props; + + p->inStream = inStream; + + p->outSize = 0; + p->outSize_Defined = False; + if (outDataSize) + { + p->outSize_Defined = True; + p->outSize = *outDataSize; + } + p->finishMode = finishMode; + + p->outProcessed = 0; + p->inProcessed = 0; + + p->inPos = 0; + p->inLim = 0; + + return Lzma2Dec_Prepare_ST(p); +} + + +SRes Lzma2DecMt_Read(CLzma2DecMtHandle p, + Byte *data, size_t *outSize, + UInt64 *inStreamProcessed) +{ + // GET_CLzma2DecMt_p + ELzmaFinishMode finishMode; + SRes readRes; + size_t size = *outSize; + + *outSize = 0; + *inStreamProcessed = 0; + + finishMode = LZMA_FINISH_ANY; + if (p->outSize_Defined) + { + const UInt64 rem = p->outSize - p->outProcessed; + if (size >= rem) + { + size = (size_t)rem; + if (p->finishMode) + finishMode = LZMA_FINISH_END; + } + } + + readRes = SZ_OK; + + for (;;) + { + SizeT inCur; + SizeT outCur; + ELzmaStatus status; + SRes res; + + if (p->inPos == p->inLim && readRes == SZ_OK) + { + p->inPos = 0; + p->inLim = p->props.inBufSize_ST; + readRes = ISeqInStream_Read(p->inStream, p->inBuf, &p->inLim); + } + + inCur = (SizeT)(p->inLim - p->inPos); + outCur = (SizeT)size; + + res = Lzma2Dec_DecodeToBuf(&p->dec, data, &outCur, + p->inBuf + p->inPos, &inCur, finishMode, &status); + + p->inPos += inCur; + p->inProcessed += inCur; + *inStreamProcessed += inCur; + p->outProcessed += outCur; + *outSize += outCur; + size -= outCur; + data += outCur; + + if (res != 0) + return res; + + /* + if (status == LZMA_STATUS_FINISHED_WITH_MARK) + return readRes; + + if (size == 0 && status != LZMA_STATUS_NEEDS_MORE_INPUT) + { + if (p->finishMode && p->outSize_Defined && p->outProcessed >= p->outSize) + return SZ_ERROR_DATA; + return readRes; + } + */ + + if (inCur == 0 && outCur == 0) + return readRes; + } +} + +#undef PRF +#undef PRF_STR +#undef PRF_STR_INT_2 diff --git a/crnlib/lzma/Lzma2DecMt.h b/crnlib/lzma/Lzma2DecMt.h new file mode 100644 index 00000000..cb7c8f1e --- /dev/null +++ b/crnlib/lzma/Lzma2DecMt.h @@ -0,0 +1,81 @@ +/* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread +2023-04-13 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA2_DEC_MT_H +#define ZIP7_INC_LZMA2_DEC_MT_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef struct +{ + size_t inBufSize_ST; + size_t outStep_ST; + + #ifndef Z7_ST + unsigned numThreads; + size_t inBufSize_MT; + size_t outBlockMax; + size_t inBlockMax; + #endif +} CLzma2DecMtProps; + +/* init to single-thread mode */ +void Lzma2DecMtProps_Init(CLzma2DecMtProps *p); + + +/* ---------- CLzma2DecMtHandle Interface ---------- */ + +/* Lzma2DecMt_ * functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + // SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef struct CLzma2DecMt CLzma2DecMt; +typedef CLzma2DecMt * CLzma2DecMtHandle; +// Z7_DECLARE_HANDLE(CLzma2DecMtHandle) + +CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); +void Lzma2DecMt_Destroy(CLzma2DecMtHandle p); + +SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, + Byte prop, + const CLzma2DecMtProps *props, + ISeqOutStreamPtr outStream, + const UInt64 *outDataSize, // NULL means undefined + int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished + // Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + // const Byte *inData, size_t inDataSize, + + // out variables: + UInt64 *inProcessed, + int *isMT, /* out: (*isMT == 0), if single thread decoding was used */ + + // UInt64 *outProcessed, + ICompressProgressPtr progress); + + +/* ---------- Read from CLzma2DecMtHandle Interface ---------- */ + +SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp, + Byte prop, + const CLzma2DecMtProps *props, + const UInt64 *outDataSize, int finishMode, + ISeqInStreamPtr inStream); + +SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, + Byte *data, size_t *outSize, + UInt64 *inStreamProcessed); + + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Lzma2Enc.c b/crnlib/lzma/Lzma2Enc.c new file mode 100644 index 00000000..85aa80d7 --- /dev/null +++ b/crnlib/lzma/Lzma2Enc.c @@ -0,0 +1,805 @@ +/* Lzma2Enc.c -- LZMA2 Encoder +2023-04-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define Z7_ST */ + +#include "Lzma2Enc.h" + +#ifndef Z7_ST +#include "MtCoder.h" +#else +#define MTCODER_THREADS_MAX 1 +#endif + +#define LZMA2_CONTROL_LZMA (1 << 7) +#define LZMA2_CONTROL_COPY_NO_RESET 2 +#define LZMA2_CONTROL_COPY_RESET_DIC 1 +#define LZMA2_CONTROL_EOF 0 + +#define LZMA2_LCLP_MAX 4 + +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +#define LZMA2_PACK_SIZE_MAX (1 << 16) +#define LZMA2_COPY_CHUNK_SIZE LZMA2_PACK_SIZE_MAX +#define LZMA2_UNPACK_SIZE_MAX (1 << 21) +#define LZMA2_KEEP_WINDOW_SIZE LZMA2_UNPACK_SIZE_MAX + +#define LZMA2_CHUNK_SIZE_COMPRESSED_MAX ((1 << 16) + 16) + + +#define PRF(x) /* x */ + + +/* ---------- CLimitedSeqInStream ---------- */ + +typedef struct +{ + ISeqInStream vt; + ISeqInStreamPtr realStream; + UInt64 limit; + UInt64 processed; + int finished; +} CLimitedSeqInStream; + +static void LimitedSeqInStream_Init(CLimitedSeqInStream *p) +{ + p->limit = (UInt64)(Int64)-1; + p->processed = 0; + p->finished = 0; +} + +static SRes LimitedSeqInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLimitedSeqInStream) + size_t size2 = *size; + SRes res = SZ_OK; + + if (p->limit != (UInt64)(Int64)-1) + { + const UInt64 rem = p->limit - p->processed; + if (size2 > rem) + size2 = (size_t)rem; + } + if (size2 != 0) + { + res = ISeqInStream_Read(p->realStream, data, &size2); + p->finished = (size2 == 0 ? 1 : 0); + p->processed += size2; + } + *size = size2; + return res; +} + + +/* ---------- CLzma2EncInt ---------- */ + +typedef struct +{ + CLzmaEncHandle enc; + Byte propsAreSet; + Byte propsByte; + Byte needInitState; + Byte needInitProp; + UInt64 srcPos; +} CLzma2EncInt; + + +static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props) +{ + if (!p->propsAreSet) + { + SizeT propsSize = LZMA_PROPS_SIZE; + Byte propsEncoded[LZMA_PROPS_SIZE]; + RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps)) + RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize)) + p->propsByte = propsEncoded[0]; + p->propsAreSet = True; + } + return SZ_OK; +} + +static void Lzma2EncInt_InitBlock(CLzma2EncInt *p) +{ + p->srcPos = 0; + p->needInitState = True; + p->needInitProp = True; +} + + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); + +/* +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p); +*/ + +static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, + size_t *packSizeRes, ISeqOutStreamPtr outStream) +{ + size_t packSizeLimit = *packSizeRes; + size_t packSize = packSizeLimit; + UInt32 unpackSize = LZMA2_UNPACK_SIZE_MAX; + unsigned lzHeaderSize = 5 + (p->needInitProp ? 1 : 0); + BoolInt useCopyBlock; + SRes res; + + *packSizeRes = 0; + if (packSize < lzHeaderSize) + return SZ_ERROR_OUTPUT_EOF; + packSize -= lzHeaderSize; + + LzmaEnc_SaveState(p->enc); + res = LzmaEnc_CodeOneMemBlock(p->enc, p->needInitState, + outBuf + lzHeaderSize, &packSize, LZMA2_PACK_SIZE_MAX, &unpackSize); + + PRF(printf("\npackSize = %7d unpackSize = %7d ", packSize, unpackSize)); + + if (unpackSize == 0) + return res; + + if (res == SZ_OK) + useCopyBlock = (packSize + 2 >= unpackSize || packSize > (1 << 16)); + else + { + if (res != SZ_ERROR_OUTPUT_EOF) + return res; + res = SZ_OK; + useCopyBlock = True; + } + + if (useCopyBlock) + { + size_t destPos = 0; + PRF(printf("################# COPY ")); + + while (unpackSize > 0) + { + const UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE; + if (packSizeLimit - destPos < u + 3) + return SZ_ERROR_OUTPUT_EOF; + outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET); + outBuf[destPos++] = (Byte)((u - 1) >> 8); + outBuf[destPos++] = (Byte)(u - 1); + memcpy(outBuf + destPos, LzmaEnc_GetCurBuf(p->enc) - unpackSize, u); + unpackSize -= u; + destPos += u; + p->srcPos += u; + + if (outStream) + { + *packSizeRes += destPos; + if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos) + return SZ_ERROR_WRITE; + destPos = 0; + } + else + *packSizeRes = destPos; + /* needInitState = True; */ + } + + LzmaEnc_RestoreState(p->enc); + return SZ_OK; + } + + { + size_t destPos = 0; + const UInt32 u = unpackSize - 1; + const UInt32 pm = (UInt32)(packSize - 1); + const unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0); + + PRF(printf(" ")); + + outBuf[destPos++] = (Byte)(LZMA2_CONTROL_LZMA | (mode << 5) | ((u >> 16) & 0x1F)); + outBuf[destPos++] = (Byte)(u >> 8); + outBuf[destPos++] = (Byte)u; + outBuf[destPos++] = (Byte)(pm >> 8); + outBuf[destPos++] = (Byte)pm; + + if (p->needInitProp) + outBuf[destPos++] = p->propsByte; + + p->needInitProp = False; + p->needInitState = False; + destPos += packSize; + p->srcPos += unpackSize; + + if (outStream) + if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos) + return SZ_ERROR_WRITE; + + *packSizeRes = destPos; + return SZ_OK; + } +} + + +/* ---------- Lzma2 Props ---------- */ + +void Lzma2EncProps_Init(CLzma2EncProps *p) +{ + LzmaEncProps_Init(&p->lzmaProps); + p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO; + p->numBlockThreads_Reduced = -1; + p->numBlockThreads_Max = -1; + p->numTotalThreads = -1; +} + +void Lzma2EncProps_Normalize(CLzma2EncProps *p) +{ + UInt64 fileSize; + int t1, t1n, t2, t2r, t3; + { + CLzmaEncProps lzmaProps = p->lzmaProps; + LzmaEncProps_Normalize(&lzmaProps); + t1n = lzmaProps.numThreads; + } + + t1 = p->lzmaProps.numThreads; + t2 = p->numBlockThreads_Max; + t3 = p->numTotalThreads; + + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; + + if (t3 <= 0) + { + if (t2 <= 0) + t2 = 1; + t3 = t1n * t2; + } + else if (t2 <= 0) + { + t2 = t3 / t1n; + if (t2 == 0) + { + t1 = 1; + t2 = t3; + } + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; + } + else if (t1 <= 0) + { + t1 = t3 / t2; + if (t1 == 0) + t1 = 1; + } + else + t3 = t1n * t2; + + p->lzmaProps.numThreads = t1; + + t2r = t2; + + fileSize = p->lzmaProps.reduceSize; + + if ( p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + && p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO + && (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1)) + p->lzmaProps.reduceSize = p->blockSize; + + LzmaEncProps_Normalize(&p->lzmaProps); + + p->lzmaProps.reduceSize = fileSize; + + t1 = p->lzmaProps.numThreads; + + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) + { + t2r = t2 = 1; + t3 = t1; + } + else if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO && t2 <= 1) + { + /* if there is no block multi-threading, we use SOLID block */ + p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID; + } + else + { + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + { + const UInt32 kMinSize = (UInt32)1 << 20; + const UInt32 kMaxSize = (UInt32)1 << 28; + const UInt32 dictSize = p->lzmaProps.dictSize; + UInt64 blockSize = (UInt64)dictSize << 2; + if (blockSize < kMinSize) blockSize = kMinSize; + if (blockSize > kMaxSize) blockSize = kMaxSize; + if (blockSize < dictSize) blockSize = dictSize; + blockSize += (kMinSize - 1); + blockSize &= ~(UInt64)(kMinSize - 1); + p->blockSize = blockSize; + } + + if (t2 > 1 && fileSize != (UInt64)(Int64)-1) + { + UInt64 numBlocks = fileSize / p->blockSize; + if (numBlocks * p->blockSize != fileSize) + numBlocks++; + if (numBlocks < (unsigned)t2) + { + t2r = (int)numBlocks; + if (t2r == 0) + t2r = 1; + t3 = t1 * t2r; + } + } + } + + p->numBlockThreads_Max = t2; + p->numBlockThreads_Reduced = t2r; + p->numTotalThreads = t3; +} + + +static SRes Progress(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize) +{ + return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK; +} + + +/* ---------- Lzma2 ---------- */ + +struct CLzma2Enc +{ + Byte propEncoded; + CLzma2EncProps props; + UInt64 expectedDataSize; + + Byte *tempBufLzma; + + ISzAllocPtr alloc; + ISzAllocPtr allocBig; + + CLzma2EncInt coders[MTCODER_THREADS_MAX]; + + #ifndef Z7_ST + + ISeqOutStreamPtr outStream; + Byte *outBuf; + size_t outBuf_Rem; /* remainder in outBuf */ + + size_t outBufSize; /* size of allocated outBufs[i] */ + size_t outBufsDataSizes[MTCODER_BLOCKS_MAX]; + BoolInt mtCoder_WasConstructed; + CMtCoder mtCoder; + Byte *outBufs[MTCODER_BLOCKS_MAX]; + + #endif +}; + + + +CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzma2Enc *p = (CLzma2Enc *)ISzAlloc_Alloc(alloc, sizeof(CLzma2Enc)); + if (!p) + return NULL; + Lzma2EncProps_Init(&p->props); + Lzma2EncProps_Normalize(&p->props); + p->expectedDataSize = (UInt64)(Int64)-1; + p->tempBufLzma = NULL; + p->alloc = alloc; + p->allocBig = allocBig; + { + unsigned i; + for (i = 0; i < MTCODER_THREADS_MAX; i++) + p->coders[i].enc = NULL; + } + + #ifndef Z7_ST + p->mtCoder_WasConstructed = False; + { + unsigned i; + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + p->outBufs[i] = NULL; + p->outBufSize = 0; + } + #endif + + return (CLzma2EncHandle)p; +} + + +#ifndef Z7_ST + +static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p) +{ + unsigned i; + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + if (p->outBufs[i]) + { + ISzAlloc_Free(p->alloc, p->outBufs[i]); + p->outBufs[i] = NULL; + } + p->outBufSize = 0; +} + +#endif + +// #define GET_CLzma2Enc_p CLzma2Enc *p = (CLzma2Enc *)(void *)p; + +void Lzma2Enc_Destroy(CLzma2EncHandle p) +{ + // GET_CLzma2Enc_p + unsigned i; + for (i = 0; i < MTCODER_THREADS_MAX; i++) + { + CLzma2EncInt *t = &p->coders[i]; + if (t->enc) + { + LzmaEnc_Destroy(t->enc, p->alloc, p->allocBig); + t->enc = NULL; + } + } + + + #ifndef Z7_ST + if (p->mtCoder_WasConstructed) + { + MtCoder_Destruct(&p->mtCoder); + p->mtCoder_WasConstructed = False; + } + Lzma2Enc_FreeOutBufs(p); + #endif + + ISzAlloc_Free(p->alloc, p->tempBufLzma); + p->tempBufLzma = NULL; + + ISzAlloc_Free(p->alloc, p); +} + + +SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props) +{ + // GET_CLzma2Enc_p + CLzmaEncProps lzmaProps = props->lzmaProps; + LzmaEncProps_Normalize(&lzmaProps); + if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX) + return SZ_ERROR_PARAM; + p->props = *props; + Lzma2EncProps_Normalize(&p->props); + return SZ_OK; +} + + +void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize) +{ + // GET_CLzma2Enc_p + p->expectedDataSize = expectedDataSiize; +} + + +Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p) +{ + // GET_CLzma2Enc_p + unsigned i; + UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps); + for (i = 0; i < 40; i++) + if (dicSize <= LZMA2_DIC_SIZE_FROM_PROP(i)) + break; + return (Byte)i; +} + + +static SRes Lzma2Enc_EncodeMt1( + CLzma2Enc *me, + CLzma2EncInt *p, + ISeqOutStreamPtr outStream, + Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + const Byte *inData, size_t inDataSize, + int finished, + ICompressProgressPtr progress) +{ + UInt64 unpackTotal = 0; + UInt64 packTotal = 0; + size_t outLim = 0; + CLimitedSeqInStream limitedInStream; + + if (outBuf) + { + outLim = *outBufSize; + *outBufSize = 0; + } + + if (!p->enc) + { + p->propsAreSet = False; + p->enc = LzmaEnc_Create(me->alloc); + if (!p->enc) + return SZ_ERROR_MEM; + } + + limitedInStream.realStream = inStream; + if (inStream) + { + limitedInStream.vt.Read = LimitedSeqInStream_Read; + } + + if (!outBuf) + { + // outStream version works only in one thread. So we use CLzma2Enc::tempBufLzma + if (!me->tempBufLzma) + { + me->tempBufLzma = (Byte *)ISzAlloc_Alloc(me->alloc, LZMA2_CHUNK_SIZE_COMPRESSED_MAX); + if (!me->tempBufLzma) + return SZ_ERROR_MEM; + } + } + + RINOK(Lzma2EncInt_InitStream(p, &me->props)) + + for (;;) + { + SRes res = SZ_OK; + SizeT inSizeCur = 0; + + Lzma2EncInt_InitBlock(p); + + LimitedSeqInStream_Init(&limitedInStream); + limitedInStream.limit = me->props.blockSize; + + if (inStream) + { + UInt64 expected = (UInt64)(Int64)-1; + // inStream version works only in one thread. So we use CLzma2Enc::expectedDataSize + if (me->expectedDataSize != (UInt64)(Int64)-1 + && me->expectedDataSize >= unpackTotal) + expected = me->expectedDataSize - unpackTotal; + if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + && expected > me->props.blockSize) + expected = (size_t)me->props.blockSize; + + LzmaEnc_SetDataSize(p->enc, expected); + + RINOK(LzmaEnc_PrepareForLzma2(p->enc, + &limitedInStream.vt, + LZMA2_KEEP_WINDOW_SIZE, + me->alloc, + me->allocBig)) + } + else + { + inSizeCur = (SizeT)(inDataSize - (size_t)unpackTotal); + if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + && inSizeCur > me->props.blockSize) + inSizeCur = (SizeT)(size_t)me->props.blockSize; + + // LzmaEnc_SetDataSize(p->enc, inSizeCur); + + RINOK(LzmaEnc_MemPrepare(p->enc, + inData + (size_t)unpackTotal, inSizeCur, + LZMA2_KEEP_WINDOW_SIZE, + me->alloc, + me->allocBig)) + } + + for (;;) + { + size_t packSize = LZMA2_CHUNK_SIZE_COMPRESSED_MAX; + if (outBuf) + packSize = outLim - (size_t)packTotal; + + res = Lzma2EncInt_EncodeSubblock(p, + outBuf ? outBuf + (size_t)packTotal : me->tempBufLzma, &packSize, + outBuf ? NULL : outStream); + + if (res != SZ_OK) + break; + + packTotal += packSize; + if (outBuf) + *outBufSize = (size_t)packTotal; + + res = Progress(progress, unpackTotal + p->srcPos, packTotal); + if (res != SZ_OK) + break; + + /* + if (LzmaEnc_GetNumAvailableBytes(p->enc) == 0) + break; + */ + + if (packSize == 0) + break; + } + + LzmaEnc_Finish(p->enc); + + unpackTotal += p->srcPos; + + RINOK(res) + + if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur)) + return SZ_ERROR_FAIL; + + if (inStream ? limitedInStream.finished : (unpackTotal == inDataSize)) + { + if (finished) + { + if (outBuf) + { + const size_t destPos = *outBufSize; + if (destPos >= outLim) + return SZ_ERROR_OUTPUT_EOF; + outBuf[destPos] = LZMA2_CONTROL_EOF; // 0 + *outBufSize = destPos + 1; + } + else + { + const Byte b = LZMA2_CONTROL_EOF; // 0; + if (ISeqOutStream_Write(outStream, &b, 1) != 1) + return SZ_ERROR_WRITE; + } + } + return SZ_OK; + } + } +} + + + +#ifndef Z7_ST + +static SRes Lzma2Enc_MtCallback_Code(void *p, unsigned coderIndex, unsigned outBufIndex, + const Byte *src, size_t srcSize, int finished) +{ + CLzma2Enc *me = (CLzma2Enc *)p; + size_t destSize = me->outBufSize; + SRes res; + CMtProgressThunk progressThunk; + + Byte *dest = me->outBufs[outBufIndex]; + + me->outBufsDataSizes[outBufIndex] = 0; + + if (!dest) + { + dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize); + if (!dest) + return SZ_ERROR_MEM; + me->outBufs[outBufIndex] = dest; + } + + MtProgressThunk_CreateVTable(&progressThunk); + progressThunk.mtProgress = &me->mtCoder.mtProgress; + progressThunk.inSize = 0; + progressThunk.outSize = 0; + + res = Lzma2Enc_EncodeMt1(me, + &me->coders[coderIndex], + NULL, dest, &destSize, + NULL, src, srcSize, + finished, + &progressThunk.vt); + + me->outBufsDataSizes[outBufIndex] = destSize; + + return res; +} + + +static SRes Lzma2Enc_MtCallback_Write(void *p, unsigned outBufIndex) +{ + CLzma2Enc *me = (CLzma2Enc *)p; + size_t size = me->outBufsDataSizes[outBufIndex]; + const Byte *data = me->outBufs[outBufIndex]; + + if (me->outStream) + return ISeqOutStream_Write(me->outStream, data, size) == size ? SZ_OK : SZ_ERROR_WRITE; + + if (size > me->outBuf_Rem) + return SZ_ERROR_OUTPUT_EOF; + memcpy(me->outBuf, data, size); + me->outBuf_Rem -= size; + me->outBuf += size; + return SZ_OK; +} + +#endif + + + +SRes Lzma2Enc_Encode2(CLzma2EncHandle p, + ISeqOutStreamPtr outStream, + Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + const Byte *inData, size_t inDataSize, + ICompressProgressPtr progress) +{ + // GET_CLzma2Enc_p + + if (inStream && inData) + return SZ_ERROR_PARAM; + + if (outStream && outBuf) + return SZ_ERROR_PARAM; + + { + unsigned i; + for (i = 0; i < MTCODER_THREADS_MAX; i++) + p->coders[i].propsAreSet = False; + } + + #ifndef Z7_ST + + if (p->props.numBlockThreads_Reduced > 1) + { + IMtCoderCallback2 vt; + + if (!p->mtCoder_WasConstructed) + { + p->mtCoder_WasConstructed = True; + MtCoder_Construct(&p->mtCoder); + } + + vt.Code = Lzma2Enc_MtCallback_Code; + vt.Write = Lzma2Enc_MtCallback_Write; + + p->outStream = outStream; + p->outBuf = NULL; + p->outBuf_Rem = 0; + if (!outStream) + { + p->outBuf = outBuf; + p->outBuf_Rem = *outBufSize; + *outBufSize = 0; + } + + p->mtCoder.allocBig = p->allocBig; + p->mtCoder.progress = progress; + p->mtCoder.inStream = inStream; + p->mtCoder.inData = inData; + p->mtCoder.inDataSize = inDataSize; + p->mtCoder.mtCallback = &vt; + p->mtCoder.mtCallbackObject = p; + + p->mtCoder.blockSize = (size_t)p->props.blockSize; + if (p->mtCoder.blockSize != p->props.blockSize) + return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */ + + { + const size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16; + if (destBlockSize < p->mtCoder.blockSize) + return SZ_ERROR_PARAM; + if (p->outBufSize != destBlockSize) + Lzma2Enc_FreeOutBufs(p); + p->outBufSize = destBlockSize; + } + + p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max; + p->mtCoder.expectedDataSize = p->expectedDataSize; + + { + const SRes res = MtCoder_Code(&p->mtCoder); + if (!outStream) + *outBufSize = (size_t)(p->outBuf - outBuf); + return res; + } + } + + #endif + + + return Lzma2Enc_EncodeMt1(p, + &p->coders[0], + outStream, outBuf, outBufSize, + inStream, inData, inDataSize, + True, /* finished */ + progress); +} + +#undef PRF diff --git a/crnlib/lzma/Lzma2Enc.h b/crnlib/lzma/Lzma2Enc.h new file mode 100644 index 00000000..bead0fc5 --- /dev/null +++ b/crnlib/lzma/Lzma2Enc.h @@ -0,0 +1,57 @@ +/* Lzma2Enc.h -- LZMA2 Encoder +2023-04-13 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA2_ENC_H +#define ZIP7_INC_LZMA2_ENC_H + +#include "LzmaEnc.h" + +EXTERN_C_BEGIN + +#define LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO 0 +#define LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID ((UInt64)(Int64)-1) + +typedef struct +{ + CLzmaEncProps lzmaProps; + UInt64 blockSize; + int numBlockThreads_Reduced; + int numBlockThreads_Max; + int numTotalThreads; +} CLzma2EncProps; + +void Lzma2EncProps_Init(CLzma2EncProps *p); +void Lzma2EncProps_Normalize(CLzma2EncProps *p); + +/* ---------- CLzmaEnc2Handle Interface ---------- */ + +/* Lzma2Enc_* functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef struct CLzma2Enc CLzma2Enc; +typedef CLzma2Enc * CLzma2EncHandle; +// Z7_DECLARE_HANDLE(CLzma2EncHandle) + +CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); +void Lzma2Enc_Destroy(CLzma2EncHandle p); +SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props); +void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize); +Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p); +SRes Lzma2Enc_Encode2(CLzma2EncHandle p, + ISeqOutStreamPtr outStream, + Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + const Byte *inData, size_t inDataSize, + ICompressProgressPtr progress); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Lzma86.h b/crnlib/lzma/Lzma86.h new file mode 100644 index 00000000..9127d73b --- /dev/null +++ b/crnlib/lzma/Lzma86.h @@ -0,0 +1,111 @@ +/* Lzma86.h -- LZMA + x86 (BCJ) Filter +2023-03-03 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA86_H +#define ZIP7_INC_LZMA86_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA86_SIZE_OFFSET (1 + 5) +#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8) + +/* +It's an example for LZMA + x86 Filter use. +You can use .lzma86 extension, if you write that stream to file. +.lzma86 header adds one additional byte to standard .lzma header. +.lzma86 header (14 bytes): + Offset Size Description + 0 1 = 0 - no filter, pure LZMA + = 1 - x86 filter + LZMA + 1 1 lc, lp and pb in encoded form + 2 4 dictSize (little endian) + 6 8 uncompressed size (little endian) + + +Lzma86_Encode +------------- +level - compression level: 0 <= level <= 9, the default value for "level" is 5. + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes, for level = 5. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + For better compression ratio dictSize must be >= inSize. + +filterMode: + SZ_FILTER_NO - no Filter + SZ_FILTER_YES - x86 Filter + SZ_FILTER_AUTO - it tries both alternatives to select best. + Encoder will use 2 or 3 passes: + 2 passes when FILTER_NO provides better compression. + 3 passes when FILTER_YES provides better compression. + +Lzma86Encode allocates Data with MyAlloc functions. +RAM Requirements for compressing: + RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize + filterMode FilterBlockSize + SZ_FILTER_NO 0 + SZ_FILTER_YES inSize + SZ_FILTER_AUTO inSize + + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +enum ESzFilterMode +{ + SZ_FILTER_NO, + SZ_FILTER_YES, + SZ_FILTER_AUTO +}; + +SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, + int level, UInt32 dictSize, int filterMode); + + +/* +Lzma86_GetUnpackSize: + In: + src - input data + srcLen - input data size + Out: + unpackSize - size of uncompressed stream + Return code: + SZ_OK - OK + SZ_ERROR_INPUT_EOF - Error in headers +*/ + +SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize); + +/* +Lzma86_Decode: + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + Out: + destLen - processed output size + srcLen - processed input size + Return code: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - unsupported file + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer +*/ + +SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Lzma86Dec.c b/crnlib/lzma/Lzma86Dec.c new file mode 100644 index 00000000..9b15f363 --- /dev/null +++ b/crnlib/lzma/Lzma86Dec.c @@ -0,0 +1,53 @@ +/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder +2023-03-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Lzma86.h" + +#include "Alloc.h" +#include "Bra.h" +#include "LzmaDec.h" + +SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize) +{ + unsigned i; + if (srcLen < LZMA86_HEADER_SIZE) + return SZ_ERROR_INPUT_EOF; + *unpackSize = 0; + for (i = 0; i < sizeof(UInt64); i++) + *unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i); + return SZ_OK; +} + +SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen) +{ + SRes res; + int useFilter; + SizeT inSizePure; + ELzmaStatus status; + + if (*srcLen < LZMA86_HEADER_SIZE) + return SZ_ERROR_INPUT_EOF; + + useFilter = src[0]; + + if (useFilter > 1) + { + *destLen = 0; + return SZ_ERROR_UNSUPPORTED; + } + + inSizePure = *srcLen - LZMA86_HEADER_SIZE; + res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure, + src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc); + *srcLen = inSizePure + LZMA86_HEADER_SIZE; + if (res != SZ_OK) + return res; + if (useFilter == 1) + { + UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Dec(dest, *destLen, 0, &x86State); + } + return SZ_OK; +} diff --git a/crnlib/lzma/Lzma86Enc.c b/crnlib/lzma/Lzma86Enc.c new file mode 100644 index 00000000..1c034a74 --- /dev/null +++ b/crnlib/lzma/Lzma86Enc.c @@ -0,0 +1,103 @@ +/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder +2023-03-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "Lzma86.h" + +#include "Alloc.h" +#include "Bra.h" +#include "LzmaEnc.h" + +int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, + int level, UInt32 dictSize, int filterMode) +{ + size_t outSize2 = *destLen; + Byte *filteredStream; + BoolInt useFilter; + int mainResult = SZ_ERROR_OUTPUT_EOF; + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + + *destLen = 0; + if (outSize2 < LZMA86_HEADER_SIZE) + return SZ_ERROR_OUTPUT_EOF; + + { + int i; + UInt64 t = srcLen; + for (i = 0; i < 8; i++, t >>= 8) + dest[LZMA86_SIZE_OFFSET + i] = (Byte)t; + } + + filteredStream = 0; + useFilter = (filterMode != SZ_FILTER_NO); + if (useFilter) + { + if (srcLen != 0) + { + filteredStream = (Byte *)MyAlloc(srcLen); + if (filteredStream == 0) + return SZ_ERROR_MEM; + memcpy(filteredStream, src, srcLen); + } + { + UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + z7_BranchConvSt_X86_Enc(filteredStream, srcLen, 0, &x86State); + } + } + + { + size_t minSize = 0; + BoolInt bestIsFiltered = False; + + /* passes for SZ_FILTER_AUTO: + 0 - BCJ + LZMA + 1 - LZMA + 2 - BCJ + LZMA agaian, if pass 0 (BCJ + LZMA) is better. + */ + int numPasses = (filterMode == SZ_FILTER_AUTO) ? 3 : 1; + + int i; + for (i = 0; i < numPasses; i++) + { + size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE; + size_t outPropsSize = 5; + SRes curRes; + BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1); + if (curModeIsFiltered && !bestIsFiltered) + break; + if (useFilter && i == 0) + curModeIsFiltered = True; + + curRes = LzmaEncode(dest + LZMA86_HEADER_SIZE, &outSizeProcessed, + curModeIsFiltered ? filteredStream : src, srcLen, + &props, dest + 1, &outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); + + if (curRes != SZ_ERROR_OUTPUT_EOF) + { + if (curRes != SZ_OK) + { + mainResult = curRes; + break; + } + if (outSizeProcessed <= minSize || mainResult != SZ_OK) + { + minSize = outSizeProcessed; + bestIsFiltered = curModeIsFiltered; + mainResult = SZ_OK; + } + } + } + dest[0] = (Byte)(bestIsFiltered ? 1 : 0); + *destLen = LZMA86_HEADER_SIZE + minSize; + } + if (useFilter) + MyFree(filteredStream); + return mainResult; +} diff --git a/crnlib/lzma/LzmaDec.c b/crnlib/lzma/LzmaDec.c new file mode 100644 index 00000000..b6bcd509 --- /dev/null +++ b/crnlib/lzma/LzmaDec.c @@ -0,0 +1,1363 @@ +/* LzmaDec.c -- LZMA Decoder +2023-04-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) + +#define RC_INIT_SIZE 5 + +#ifndef Z7_LZMA_DEC_OPT + +#define kNumMoveBits 5 +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } + +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define Z7_LZMA_SIZE_OPT */ + +#ifdef Z7_LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + +#endif // Z7_LZMA_DEC_OPT + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) + +#define kNumStates 12 +#define kNumStates2 16 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) + +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif + +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + +#define LZMA_DIC_MIN (1 << 12) + +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. + } + +Processing: + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. + +Out: + RangeCoder is normalized + Result: + SZ_OK - OK + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined +*/ + + +#ifdef Z7_LZMA_DEC_OPT + +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob) + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef Z7_LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob) + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + UPDATE_0(prob) + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob) + } + else + { + UInt32 distance; + UPDATE_1(prob) + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep1; + } + else + { + UPDATE_1(prob) + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob) + distance = rep2; + } + else + { + UPDATE_1(prob) + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef Z7_LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len) + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + len -= 8; + } + else + { + UPDATE_1(probLen) + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen) + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + } + else + { + UPDATE_1(probLen) + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) + len += kLenNumLowSymbols * 2; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance) + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos; + { + UInt32 m = 1; + distance++; + do + { + REV_BIT_VAR(prob, distance, m) + } + while (--numDirectBits); + distance -= m; + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) + distance |= i; + } + if (distance == (UInt32)0xFFFFFFFF) + { + len = kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) + { + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += (UInt32)curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += (SizeT)curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = (UInt32)state; + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; + return SZ_OK; +} +#endif + + + +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; + { + SizeT dicPos = p->dicPos; + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do + { + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + while (--len); + p->dicPos = dicPos; + } +} + + +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + return res; + } +} + + + +typedef enum +{ + DUMMY_INPUT_EOF, /* need more input data */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = *bufOut; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + ELzmaDummy res; + + for (;;) + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + break; + } + else + { + UPDATE_1_CHECK + } + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + } + else + { + UPDATE_1_CHECK + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len) + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + unsigned m = 1; + do + { + REV_BIT_CHECK(prob, i, m) + } + while (--numDirectBits); + } + } + } + } + break; + } + NORMALIZE_CHECK + + *bufOut = buf; + return res; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) +{ + p->remainLen = kMatchSpecLenStart + 1; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->remainLen = kMatchSpecLenStart + 2; + } + if (initState) + p->remainLen = kMatchSpecLenStart + 2; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN_NOT_FINISHED_FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + if (p->remainLen > kMatchSpecLenStart) + { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + for (;;) + { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + + int checkEndMarkNow = 0; + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + RETURN_NOT_FINISHED_FOR_FINISH + } + checkEndMarkNow = 1; + } + + // (p->remainLen == 0) + + if (p->tempBufSize == 0) + { + const Byte *bufLimit; + int dummyProcessed = -1; + + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; + (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + + bufLimit = src; + // we will decode only one iteration + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + + p->buf = src; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN_NOT_FINISHED_FOR_FINISH + } + } + + p->buf = p->tempBuf; + + { + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + } + } + } + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; +} + + + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = (Byte)(d % 9); + d /= 9; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/crnlib/lzma_LzmaDec.h b/crnlib/lzma/LzmaDec.h similarity index 62% rename from crnlib/lzma_LzmaDec.h rename to crnlib/lzma/LzmaDec.h index 254a3cd9..6432f608 100644 --- a/crnlib/lzma_LzmaDec.h +++ b/crnlib/lzma/LzmaDec.h @@ -1,29 +1,36 @@ /* LzmaDec.h -- LZMA Decoder -2008-10-04 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMADEC_H -#define __LZMADEC_H +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H -#include "lzma_Types.h" +#include "7zTypes.h" -namespace crnlib { +EXTERN_C_BEGIN -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 +typedef +#ifdef Z7_LZMA_PROB32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; + /* ---------- LZMA Properties ---------- */ #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaProps { - unsigned lc, lp, pb; +typedef struct +{ + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; UInt32 dicSize; } CLzmaProps; @@ -33,7 +40,8 @@ typedef struct _CLzmaProps { SZ_ERROR_UNSUPPORTED - Unsupported properties */ -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + /* ---------- LZMA Decoder state ---------- */ @@ -44,40 +52,40 @@ SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); typedef struct { + /* Don't change this structure. ASM code can use it. */ CLzmaProps prop; - CLzmaProb* probs; - Byte* dic; - const Byte* buf; - UInt32 range, code; - SizeT dicPos; + CLzmaProb *probs; + CLzmaProb *probs_1664; + Byte *dic; SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; UInt32 processedPos; UInt32 checkDicSize; - unsigned state; UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; + UInt32 state; + UInt32 remainLen; + UInt32 numProbs; unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) \ - { \ - (p)->dic = 0; \ - (p)->probs = 0; \ - } +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) -void LzmaDec_Init(CLzmaDec* p); +void LzmaDec_Init(CLzmaDec *p); /* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ -typedef enum { - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ } ELzmaFinishMode; /* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! @@ -95,16 +103,18 @@ typedef enum { 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. You must use correct finish mode in that case. */ -typedef enum { - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ } ELzmaStatus; /* ELzmaStatus is used only as output value for function call */ + /* ---------- Interfaces ---------- */ /* There are 3 levels of interfaces: @@ -114,6 +124,7 @@ typedef enum { You can select any of these interfaces, but don't mix functions from different groups for same object. */ + /* There are two variants to allocate state for Dictionary Interface: 1) LzmaDec_Allocate / LzmaDec_Free 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs @@ -125,12 +136,12 @@ LzmaDec_Allocate* can return: SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties */ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc); - -SRes LzmaDec_Allocate(CLzmaDec* state, const Byte* prop, unsigned propsSize, ISzAlloc* alloc); -void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); /* ---------- Dictionary Interface ---------- */ @@ -139,7 +150,7 @@ void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); You must work with CLzmaDec variables directly in this interface. STEPS: - LzmaDec_Constr() + LzmaDec_Construct() LzmaDec_Allocate() for (each new stream) { @@ -171,10 +182,12 @@ void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + /* ---------- Buffer Interface ---------- */ @@ -189,8 +202,9 @@ SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, LZMA_FINISH_END - Stream must be finished after (*destLen). */ -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, - const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + /* ---------- One Call Interface ---------- */ @@ -211,11 +225,13 @@ SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc); -} +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END #endif diff --git a/crnlib/lzma/LzmaEnc.c b/crnlib/lzma/LzmaEnc.c new file mode 100644 index 00000000..166e01b8 --- /dev/null +++ b/crnlib/lzma/LzmaEnc.c @@ -0,0 +1,3144 @@ +/* LzmaEnc.c -- LZMA Encoder +Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "CpuArch.h" +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifndef Z7_ST +#include "LzFindMt.h" +#endif + +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); + +#ifdef SHOW_STAT +static unsigned g_STAT_OFFSET = 0; +#endif + +/* for good normalization speed we still reserve 256 MB before 4 GB range */ +#define kLzmaMaxHistorySize ((UInt32)15 << 28) + +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +// #define kBitPrice (1 << kNumBitPriceShiftBits) + +#define REP_LEN_COUNT 64 + +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->reduceSize = (UInt64)(Int64)-1; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->numHashOutBits = 0; + p->writeEndMark = 0; + p->affinity = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) level = 5; + p->level = level; + + if (p->dictSize == 0) + p->dictSize = (unsigned)level <= 4 ? + (UInt32)1 << (level * 2 + 16) : + (unsigned)level <= sizeof(size_t) / 2 + 4 ? + (UInt32)1 << (level + 20) : + (UInt32)1 << (sizeof(size_t) / 2 + 24); + + if (p->dictSize > p->reduceSize) + { + UInt32 v = (UInt32)p->reduceSize; + const UInt32 kReduceMin = ((UInt32)1 << 12); + if (v < kReduceMin) + v = kReduceMin; + if (p->dictSize > v) + p->dictSize = v; + } + + if (p->lc < 0) p->lc = 3; + if (p->lp < 0) p->lp = 0; + if (p->pb < 0) p->pb = 2; + + if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1; + if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64; + if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); + + if (p->numThreads < 0) + p->numThreads = + #ifndef Z7_ST + ((p->btMode && p->algo) ? 2 : 1); + #else + 1; + #endif +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + + +/* +x86/x64: + +BSR: + IF (SRC == 0) ZF = 1, DEST is undefined; + AMD : DEST is unchanged; + IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit + BSR is slow in some processors + +LZCNT: + IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64) + IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits + IF (DEST == 0) ZF = 1; + +LZCNT works only in new processors starting from Haswell. +if LZCNT is not supported by processor, then it's executed as BSR. +LZCNT can be faster than BSR, if supported. +*/ + +// #define LZMA_LOG_BSR + +#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */ + + #if (defined(__clang__) && (__clang_major__ >= 6)) \ + || (defined(__GNUC__) && (__GNUC__ >= 6)) + #define LZMA_LOG_BSR + #elif defined(_MSC_VER) && (_MSC_VER >= 1300) + // #if defined(MY_CPU_ARM_OR_ARM64) + #define LZMA_LOG_BSR + // #endif + #endif +#endif + +// #include + +#ifdef LZMA_LOG_BSR + +#if defined(__clang__) \ + || defined(__GNUC__) + +/* + C code: : (30 - __builtin_clz(x)) + gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31) + clang10 for x64 : 31 + (bsr(x) xor -32) +*/ + + #define MY_clz(x) ((unsigned)__builtin_clz(x)) + // __lzcnt32 + // __builtin_ia32_lzcnt_u32 + +#else // #if defined(_MSC_VER) + + #ifdef MY_CPU_ARM_OR_ARM64 + + #define MY_clz _CountLeadingZeros + + #else // if defined(MY_CPU_X86_OR_AMD64) + + // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU) + // _BitScanReverse code is not optimal for some MSVC compilers + #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \ + res = (zz + zz) + (pos >> zz); } + + #endif // MY_CPU_X86_OR_AMD64 + +#endif // _MSC_VER + + +#ifndef BSR2_RET + + #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \ + res = (zz + zz) + (pos >> zz); } + +#endif + + +unsigned GetPosSlot1(UInt32 pos); +unsigned GetPosSlot1(UInt32 pos) +{ + unsigned res; + BSR2_RET(pos, res) + return res; +} +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) } + + +#else // ! LZMA_LOG_BSR + +#define kNumLogBits (11 + sizeof(size_t) / 8 * 3) + +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +static void LzmaEnc_FastPosInit(Byte *g_FastPos) +{ + unsigned slot; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + g_FastPos += 2; + + for (slot = 2; slot < kNumLogBits * 2; slot++) + { + size_t k = ((size_t)1 << ((slot >> 1) - 1)); + size_t j; + for (j = 0; j < k; j++) + g_FastPos[j] = (Byte)slot; + g_FastPos += k; + } +} + +/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } + +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } + +#endif // LZMA_LOG_BSR + + +#define LZMA_NUM_REPS 4 + +typedef UInt16 CState; +typedef UInt16 CExtra; + +typedef struct +{ + UInt32 price; + CState state; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; +} COptimal; + + +// 18.06 +#define kNumOpts (1 << 11) +#define kPackReserve (kNumOpts * 8) +// #define kNumOpts (1 << 12) +// #define kPackReserve (1 + kNumOpts * 2) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +// #define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +typedef +#ifdef Z7_LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + + +typedef struct +{ + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + + +typedef struct +{ + unsigned tableSize; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; + // UInt32 prices2[kLenNumSymbolsTotal]; +} CLenPriceEnc; + +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) + +/* +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) +*/ + +typedef struct +{ + UInt32 range; + unsigned cache; + UInt64 low; + UInt64 cacheSize; + Byte *buf; + Byte *bufLim; + Byte *bufBase; + ISeqOutStreamPtr outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + + +typedef struct +{ + CLzmaProb *litProbs; + + unsigned state; + UInt32 reps[LZMA_NUM_REPS]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + +} CSaveState; + + +typedef UInt32 CProbPrice; + + +struct CLzmaEnc +{ + void *matchFinderObj; + IMatchFinder2 matchFinder; + + unsigned optCur; + unsigned optEnd; + + unsigned longestMatchLen; + unsigned numPairs; + UInt32 numAvail; + + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; + + UInt32 backRes; + + unsigned lc, lp, pb; + unsigned lclp; + + BoolInt fastMode; + BoolInt writeEndMark; + BoolInt finished; + BoolInt multiThread; + BoolInt needInit; + // BoolInt _maxMode; + + UInt64 nowPos64; + + unsigned matchPriceCount; + // unsigned alignPriceCount; + int repLenEncCounter; + + unsigned distTableSize; + + UInt32 dictSize; + SRes result; + + #ifndef Z7_ST + BoolInt mtMode; + // begin of CMatchFinderMt is used in LZ thread + CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads + // #else + // CMatchFinder matchFinderBase; + #endif + CMatchFinder matchFinderBase; + + + // we suppose that we have 8-bytes alignment after CMatchFinder + + #ifndef Z7_ST + Byte pad[128]; + #endif + + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + + // we want {len , dist} pairs to be 8-bytes aligned in matches array + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2]; + + // we want 8-bytes alignment here + UInt32 alignPrices[kAlignTableSize]; + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + COptimal opt[kNumOpts]; + + CSaveState saveState; + + // BoolInt mf_Failure; + #ifndef Z7_ST + Byte pad2[128]; + #endif +}; + + +#define MFB (p->matchFinderBase) +/* +#ifndef Z7_ST +#define MFB (p->matchFinderMt.MatchFinder) +#endif +*/ + +// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p; +// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p; + +#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr)); + +#define COPY_LZMA_ENC_STATE(d, s, p) \ + (d)->state = (s)->state; \ + COPY_ARR(d, s, reps) \ + COPY_ARR(d, s, posAlignEncoder) \ + COPY_ARR(d, s, isRep) \ + COPY_ARR(d, s, isRepG0) \ + COPY_ARR(d, s, isRepG1) \ + COPY_ARR(d, s, isRepG2) \ + COPY_ARR(d, s, isMatch) \ + COPY_ARR(d, s, isRep0Long) \ + COPY_ARR(d, s, posSlotEncoder) \ + COPY_ARR(d, s, posEncoders) \ + (d)->lenProbs = (s)->lenProbs; \ + (d)->repLenProbs = (s)->repLenProbs; \ + memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp); + +void LzmaEnc_SaveState(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(v, p, p) +} + +void LzmaEnc_RestoreState(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + const CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(p, v, p) +} + + +Z7_NO_INLINE +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) +{ + // GET_CLzmaEnc_p + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX + || props.lp > LZMA_LP_MAX + || props.pb > LZMA_PB_MAX) + return SZ_ERROR_PARAM; + + + if (props.dictSize > kLzmaMaxHistorySize) + props.dictSize = kLzmaMaxHistorySize; + + #ifndef LZMA_LOG_BSR + { + const UInt64 dict64 = props.dictSize; + if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress)) + return SZ_ERROR_PARAM; + } + #endif + + p->dictSize = props.dictSize; + { + unsigned fb = (unsigned)props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; + p->fastMode = (props.algo == 0); + // p->_maxMode = True; + MFB.btMode = (Byte)(props.btMode ? 1 : 0); + // MFB.btMode = (Byte)(props.btMode); + { + unsigned numHashBytes = 4; + if (props.btMode) + { + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; + } + if (props.numHashBytes >= 5) numHashBytes = 5; + + MFB.numHashBytes = numHashBytes; + // MFB.numHashBytes_Min = 2; + MFB.numHashOutBits = (Byte)props.numHashOutBits; + } + + MFB.cutValue = props.mc; + + p->writeEndMark = (BoolInt)props.writeEndMark; + + #ifndef Z7_ST + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; + #endif + + return SZ_OK; +} + + +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize) +{ + // GET_CLzmaEnc_p + MFB.expectedDataSize = expectedDataSiize; +} + + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = NULL; + p->bufBase = NULL; +} + +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) +{ + if (!p->bufBase) + { + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); + if (!p->bufBase) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + p->range = 0xFFFFFFFF; + p->cache = 0; + p->low = 0; + p->cacheSize = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +{ + const size_t num = (size_t)(p->buf - p->bufBase); + if (p->res == SZ_OK) + { + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + } + p->processed += num; + p->buf = p->bufBase; +} + +Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p) +{ + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) + { + { + Byte *buf = p->buf; + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; + } + } + p->cacheSize++; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } + +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define Z7_LZMA_ENC_USE_BRANCH + +#ifdef Z7_LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, bit) { \ + RC_BIT_PRE(p, prob) \ + if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#else + +#define RC_BIT(p, prob, bit) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)bit; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)bit - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) +{ + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; +} + +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) +{ + UInt32 range = p->range; + sym |= 0x100; + do + { + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); + CLzmaProb *prob = probs + (sym >> 8); + UInt32 bit = (sym >> 7) & 1; + sym <<= 1; + RC_BIT(p, prob, bit) + } + while (sym < 0x10000); + p->range = range; +} + +static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte) +{ + UInt32 range = p->range; + UInt32 offs = 0x100; + sym |= 0x100; + do + { + UInt32 ttt, newBound; + CLzmaProb *prob; + UInt32 bit; + matchByte <<= 1; + // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1); + prob = probs + (offs + (matchByte & offs) + (sym >> 8)); + bit = (sym >> 7) & 1; + sym <<= 1; + offs &= ~(matchByte ^ sym); + RC_BIT(p, prob, bit) + } + while (sym < 0x10000); + p->range = range; +} + + + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) +{ + UInt32 i; + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) + { + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); + } +} + + +#define GET_PRICE(prob, bit) \ + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] + +#define GET_PRICEa(prob, bit) \ + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + + +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + sym |= 0x100; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + return price; +} + + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + UInt32 offs = 0x100; + sym |= 0x100; + do + { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); + sym <<= 1; + offs &= ~(matchByte ^ sym); + } + while (sym < 0x10000); + return price; +} + + +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym) +{ + UInt32 range = rc->range; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + unsigned bit = sym & 1; + // RangeEnc_EncodeBit(rc, probs + m, bit); + sym >>= 1; + RC_BIT(rc, probs + m, bit) + m = (m << 1) | bit; + } + while (--numBits); + rc->range = range; +} + + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) +{ + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs) + if (sym >= kLenNumLowSymbols) + { + RC_BIT_1(rc, probs) + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs) + if (sym >= kLenNumLowSymbols * 2) + { + RC_BIT_1(rc, probs) + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); + return; + } + sym -= kLenNumLowSymbols; + } + + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); + { + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs) + probs += (posState << (1 + kLenNumLowBits)); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit) + rc->range = range; + } +} + +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) +{ + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } +} + + +Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( + CLenPriceEnc *p, + unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) +{ + UInt32 b; + + { + unsigned prob = enc->low[0]; + UInt32 a, c; + unsigned posState; + b = GET_PRICEa_1(prob); + a = GET_PRICEa_0(prob); + c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (posState = 0; posState < numPosStates; posState++) + { + UInt32 *prices = p->prices[posState]; + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, a, prices, ProbPrices); + SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); + } + } + + /* + { + unsigned i; + UInt32 b; + a = GET_PRICEa_0(enc->low[0]); + for (i = 0; i < kLenNumLowSymbols; i++) + p->prices2[i] = a; + a = GET_PRICEa_1(enc->low[0]); + b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++) + p->prices2[i] = b; + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + */ + + // p->counter = numSymbols; + // p->counter = 64; + + { + unsigned i = p->tableSize; + + if (i > kLenNumLowSymbols * 2) + { + const CLzmaProb *probs = enc->high; + UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2; + i -= kLenNumLowSymbols * 2 - 1; + i >>= 1; + b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + do + { + /* + p->prices2[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices); + */ + // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); + unsigned sym = --i + (1 << (kLenNumHighBits - 1)); + UInt32 price = b; + do + { + const unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + + { + const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); + prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); + } + } + while (i); + + { + unsigned posState; + const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + for (posState = 1; posState < numPosStates; posState++) + memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); + } + } + } +} + +/* + #ifdef SHOW_STAT + g_STAT_OFFSET += num; + printf("\n MovePos %u", num); + #endif +*/ + +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } + + +static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) +{ + unsigned numPairs; + + p->additionalOffset++; + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + { + const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; } + numPairs = (unsigned)(d - p->matches); + } + *numPairsRes = numPairs; + + #ifdef SHOW_STAT + printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); + g_STAT_OFFSET++; + { + unsigned i; + for (i = 0; i < numPairs; i += 2) + printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); + } + #endif + + if (numPairs == 0) + return 0; + { + const unsigned len = p->matches[(size_t)numPairs - 2]; + if (len != p->numFastBytes) + return len; + { + UInt32 numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + const Byte *p2 = p1 + len; + const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; + const Byte *lim = p1 + numAvail; + for (; p2 != lim && *p2 == p2[dif]; p2++) + {} + return (unsigned)(p2 - p1); + } + } + } +} + +#define MARK_LIT ((UInt32)(Int32)-1) + +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) + + +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + +Z7_FORCE_INLINE +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) +{ + UInt32 price; + UInt32 prob = p->isRepG0[state]; + if (repIndex == 0) + { + price = GET_PRICE_0(prob); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; + if (repIndex == 1) + price += GET_PRICE_0(prob); + else + { + price += GET_PRICE_1(prob); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + + +static unsigned Backward(CLzmaEnc *p, unsigned cur) +{ + unsigned wr = cur + 1; + p->optEnd = wr; + + for (;;) + { + UInt32 dist = p->opt[cur].dist; + unsigned len = (unsigned)p->opt[cur].len; + unsigned extra = (unsigned)p->opt[cur].extra; + cur -= len; + + if (extra) + { + wr--; + p->opt[wr].len = (UInt32)len; + cur -= extra; + len = extra; + if (extra == 1) + { + p->opt[wr].dist = dist; + dist = MARK_LIT; + } + else + { + p->opt[wr].dist = 0; + len--; + wr--; + p->opt[wr].dist = MARK_LIT; + p->opt[wr].len = 1; + } + } + + if (cur == 0) + { + p->backRes = dist; + p->optCur = wr; + return len; + } + + wr--; + p->opt[wr].dist = dist; + p->opt[wr].len = (UInt32)len; + } +} + + + +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) + + +static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) +{ + unsigned last, cur; + UInt32 reps[LZMA_NUM_REPS]; + unsigned repLens[LZMA_NUM_REPS]; + UInt32 *matches; + + { + UInt32 numAvail; + unsigned numPairs, mainLen, repMaxIndex, i, posState; + UInt32 matchPrice, repMatchPrice; + const Byte *data; + Byte curByte, matchByte; + + p->optCur = p->optEnd = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLen; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) + { + p->backRes = MARK_LIT; + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + repLens[i] = len; + if (len > repLens[repMaxIndex]) + repMaxIndex = i; + if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization + break; + } + + if (repLens[repMaxIndex] >= p->numFastBytes) + { + unsigned len; + p->backRes = (UInt32)repMaxIndex; + len = repLens[repMaxIndex]; + MOVE_POS(p, len - 1) + return len; + } + + matches = p->matches; + #define MATCHES matches + // #define MATCHES p->matches + + if (mainLen >= p->numFastBytes) + { + p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + curByte = *data; + matchByte = *(data - reps[0]); + + last = repLens[repMaxIndex]; + if (last <= mainLen) + last = mainLen; + + if (last < 2 && curByte != matchByte) + { + p->backRes = MARK_LIT; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsLitState(p->state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAs_Lit(&p->opt[1]) + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + // 18.06 + if (matchByte == curByte && repLens[0] == 0) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) + { + p->opt[1].price = shortRepPrice; + MakeAs_ShortRep(&p->opt[1]) + } + if (last < 2) + { + p->backRes = p->opt[1].dist; + return 1; + } + } + + p->opt[1].len = 1; + + p->opt[0].reps[0] = reps[0]; + p->opt[0].reps[1] = reps[1]; + p->opt[0].reps[2] = reps[2]; + p->opt[0].reps[3] = reps[3]; + + // ---------- REP ---------- + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); + do + { + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen); + COptimal *opt = &p->opt[repLen]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)repLen; + opt->dist = (UInt32)i; + opt->extra = 0; + } + } + while (--repLen >= 2); + } + + + // ---------- MATCH ---------- + { + unsigned len = repLens[0] + 1; + if (len <= mainLen) + { + unsigned offs = 0; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + if (len < 2) + len = 2; + else + while (len > MATCHES[offs]) + offs += 2; + + for (; ; len++) + { + COptimal *opt; + UInt32 dist = MATCHES[(size_t)offs + 1]; + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); + unsigned lenToPosState = GetLenToPosState(len); + + if (dist < kNumFullDistances) + price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + { + unsigned slot; + GetPosSlot2(dist, slot) + price += p->alignPrices[dist & kAlignMask]; + price += p->posSlotPrices[lenToPosState][slot]; + } + + opt = &p->opt[len]; + + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + + if (len == MATCHES[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + } + + + cur = 0; + + #ifdef SHOW_STAT2 + /* if (position >= 0) */ + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= last; i++) + printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); + } + #endif + } + + + + // ---------- Optimal Parsing ---------- + + for (;;) + { + unsigned numAvail; + UInt32 numAvailFull; + unsigned newLen, numPairs, prev, state, posState, startLen; + UInt32 litPrice, matchPrice, repMatchPrice; + BoolInt nextIsLit; + Byte curByte, matchByte; + const Byte *data; + COptimal *curOpt, *nextOpt; + + if (++cur == last) + break; + + // 18.06 + if (cur >= kNumOpts - 64) + { + unsigned j, best; + UInt32 price = p->opt[cur].price; + best = cur; + for (j = cur + 1; j <= last; j++) + { + UInt32 price2 = p->opt[j].price; + if (price >= price2) + { + price = price2; + best = j; + } + } + { + unsigned delta = best - cur; + if (delta != 0) + { + MOVE_POS(p, delta) + } + } + cur = best; + break; + } + + newLen = ReadMatchDistances(p, &numPairs); + + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; + p->longestMatchLen = newLen; + break; + } + + curOpt = &p->opt[cur]; + + position++; + + // we need that check here, if skip_items in p->opt are possible + /* + if (curOpt->price >= kInfinityPrice) + continue; + */ + + prev = cur - curOpt->len; + + if (curOpt->len == 1) + { + state = (unsigned)p->opt[prev].state; + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + const COptimal *prevOpt; + UInt32 b0; + UInt32 dist = curOpt->dist; + + if (curOpt->extra) + { + prev -= (unsigned)curOpt->extra; + state = kState_RepAfterLit; + if (curOpt->extra == 1) + state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit); + } + else + { + state = (unsigned)p->opt[prev].state; + if (dist < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + + prevOpt = &p->opt[prev]; + b0 = prevOpt->reps[0]; + + if (dist < LZMA_NUM_REPS) + { + if (dist == 0) + { + reps[0] = b0; + reps[1] = prevOpt->reps[1]; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[1] = b0; + b0 = prevOpt->reps[1]; + if (dist == 1) + { + reps[0] = b0; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[2] = b0; + reps[0] = prevOpt->reps[dist]; + reps[3] = prevOpt->reps[dist ^ 1]; + } + } + } + else + { + reps[0] = (dist - LZMA_NUM_REPS + 1); + reps[1] = b0; + reps[2] = prevOpt->reps[1]; + reps[3] = prevOpt->reps[2]; + } + } + + curOpt->state = (CState)state; + curOpt->reps[0] = reps[0]; + curOpt->reps[1] = reps[1]; + curOpt->reps[2] = reps[2]; + curOpt->reps[3] = reps[3]; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; + matchByte = *(data - reps[0]); + + posState = (position & p->pbMask); + + /* + The order of Price checks: + < LIT + <= SHORT_REP + < LIT : REP_0 + < REP [ : LIT : REP_0 ] + < MATCH [ : LIT : REP_0 ] + */ + + { + UInt32 curPrice = curOpt->price; + unsigned prob = p->isMatch[state][posState]; + matchPrice = curPrice + GET_PRICE_1(prob); + litPrice = curPrice + GET_PRICE_0(prob); + } + + nextOpt = &p->opt[(size_t)cur + 1]; + nextIsLit = False; + + // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice) + // 18.new.06 + if ((nextOpt->price < kInfinityPrice + // && !IsLitState(state) + && matchByte == curByte) + || litPrice > nextOpt->price + ) + litPrice = 0; + else + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + litPrice += (!IsLitState(state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + + if (litPrice < nextOpt->price) + { + nextOpt->price = litPrice; + nextOpt->len = 1; + MakeAs_Lit(nextOpt) + nextIsLit = True; + } + } + + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + numAvailFull = p->numAvail; + { + unsigned temp = kNumOpts - 1 - cur; + if (numAvailFull > temp) + numAvailFull = (UInt32)temp; + } + + // 18.06 + // ---------- SHORT_REP ---------- + if (IsLitState(state)) // 18.new + if (matchByte == curByte) + if (repMatchPrice < nextOpt->price) // 18.new + // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1)) + if ( + // nextOpt->price >= kInfinityPrice || + nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt + || (nextOpt->dist != 0 + // && nextOpt->extra <= 1 // 17.old + ) + ) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); + // if (shortRepPrice <= nextOpt->price) // 17.old + if (shortRepPrice < nextOpt->price) // 18.new + { + nextOpt->price = shortRepPrice; + nextOpt->len = 1; + MakeAs_ShortRep(nextOpt) + nextIsLit = False; + } + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + // numAvail <= p->numFastBytes + + // ---------- LIT : REP_0 ---------- + + if (!nextIsLit + && litPrice != 0 // 18.new + && matchByte != curByte + && numAvailFull > 2) + { + const Byte *data2 = data - reps[0]; + if (data[1] == data2[1] && data[2] == data2[2]) + { + unsigned len; + unsigned limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + for (len = 3; len < limit && data[len] == data2[len]; len++) + {} + + { + unsigned state2 = kLiteralNextStates[state]; + unsigned posState2 = (position + 1) & p->pbMask; + UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); + { + unsigned offset = cur + len; + + if (last < offset) + last = offset; + + // do + { + UInt32 price2; + COptimal *opt; + len--; + // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len; + opt->dist = 0; + opt->extra = 1; + } + } + // while (len >= 3); + } + } + } + } + + startLen = 2; /* speed optimization */ + + { + // ---------- REP ---------- + unsigned repIndex = 0; // 17.old + // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused + for (; repIndex < LZMA_NUM_REPS; repIndex++) + { + unsigned len; + UInt32 price; + const Byte *data2 = data - reps[repIndex]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + + // if (len < startLen) continue; // 18.new: speed optimization + + { + unsigned offset = cur + len; + if (last < offset) + last = offset; + } + { + unsigned len2 = len; + price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); + do + { + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2); + COptimal *opt = &p->opt[cur + len2]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->dist = (UInt32)repIndex; + opt->extra = 0; + } + } + while (--len2 >= 2); + } + + if (repIndex == 0) startLen = len + 1; // 17.old + // startLen = len + 1; // 18.new + + /* if (_maxMode) */ + { + // ---------- REP : LIT : REP_0 ---------- + // numFastBytes + 1 + numFastBytes + + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; + if (limit > numAvailFull) + limit = numAvailFull; + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) + { + unsigned state2 = kRepNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + price += GET_PRICE_LEN(&p->repLenEnc, posState, len) + + GET_PRICE_0(p->isMatch[state2][posState2]) + + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterRep; + posState2 = (posState2 + 1) & p->pbMask; + + + price += GetPrice_Rep_0(p, state2, posState2); + + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + // if (len2 >= 3) + { + { + unsigned offset = cur + len + len2; + + if (last < offset) + last = offset; + // do + { + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = (UInt32)repIndex; + } + } + // while (len2 >= 3); + } + } + } + } + } + } + + + // ---------- MATCH ---------- + /* for (unsigned len = 2; len <= newLen; len++) */ + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2); + MATCHES[numPairs] = (UInt32)newLen; + numPairs += 2; + } + + // startLen = 2; /* speed optimization */ + + if (newLen >= startLen) + { + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + UInt32 dist; + unsigned offs, posSlot, len; + + { + unsigned offset = cur + newLen; + if (last < offset) + last = offset; + } + + offs = 0; + while (startLen > MATCHES[offs]) + offs += 2; + dist = MATCHES[(size_t)offs + 1]; + + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot) + + for (len = /*2*/ startLen; ; len++) + { + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); + { + COptimal *opt; + unsigned lenNorm = len - 2; + lenNorm = GetLenToPosState2(lenNorm); + if (dist < kNumFullDistances) + price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)]; + else + price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask]; + + opt = &p->opt[cur + len]; + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + } + + if (len == MATCHES[offs]) + { + // if (p->_maxMode) { + // MATCH : LIT : REP_0 + + const Byte *data2 = data - dist - 1; + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; + if (limit > numAvailFull) + limit = numAvailFull; + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) + { + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + + // if (len2 >= 3) + { + unsigned state2 = kMatchNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + unsigned offset; + price += GET_PRICE_0(p->isMatch[state2][posState2]); + price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterMatch; + + posState2 = (posState2 + 1) & p->pbMask; + price += GetPrice_Rep_0(p, state2, posState2); + + offset = cur + len + len2; + + if (last < offset) + last = offset; + // do + { + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = dist + LZMA_NUM_REPS; + } + } + // while (len2 >= 3); + } + + } + + offs += 2; + if (offs == numPairs) + break; + dist = MATCHES[(size_t)offs + 1]; + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot) + } + } + } + } + + do + p->opt[last].price = kInfinityPrice; + while (--last); + + return Backward(p, cur); +} + + + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + + + +static unsigned GetOptimumFast(CLzmaEnc *p) +{ + UInt32 numAvail, mainDist; + unsigned mainLen, numPairs, repIndex, repLen, i; + const Byte *data; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLen; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + p->backRes = MARK_LIT; + if (numAvail < 2) + return 1; + // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repLen = repIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len; + const Byte *data2 = data - p->reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + if (len >= p->numFastBytes) + { + p->backRes = (UInt32)i; + MOVE_POS(p, len - 1) + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + if (mainLen >= p->numFastBytes) + { + p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + mainDist = 0; /* for GCC */ + + if (mainLen >= 2) + { + mainDist = p->matches[(size_t)numPairs - 1]; + while (numPairs > 2) + { + UInt32 dist2; + if (mainLen != p->matches[(size_t)numPairs - 4] + 1) + break; + dist2 = p->matches[(size_t)numPairs - 3]; + if (!ChangePair(dist2, mainDist)) + break; + numPairs -= 2; + mainLen--; + mainDist = dist2; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2) + if ( repLen + 1 >= mainLen + || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) + { + p->backRes = (UInt32)repIndex; + MOVE_POS(p, repLen - 1) + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + { + unsigned len1 = ReadMatchDistances(p, &p->numPairs); + p->longestMatchLen = len1; + + if (len1 >= 2) + { + UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; + if ( (len1 >= mainLen && newDist < mainDist) + || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) + || (len1 > mainLen + 1) + || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) + return 1; + } + } + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len, limit; + const Byte *data2 = data - p->reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2;; len++) + { + if (len >= limit) + return 1; + if (data[len] != data2[len]) + break; + } + } + + p->backRes = mainDist + LZMA_NUM_REPS; + if (mainLen != 2) + { + MOVE_POS(p, mainLen - 2) + } + return mainLen; +} + + + + +static void WriteEndMarker(CLzmaEnc *p, unsigned posState) +{ + UInt32 range; + range = p->rc.range; + { + UInt32 ttt, newBound; + CLzmaProb *prob = &p->isMatch[p->state][posState]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_1(&p->rc, prob) + prob = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_0(&p->rc, prob) + } + p->state = kMatchNextStates[p->state]; + + p->rc.range = range; + LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); + range = p->rc.range; + + { + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); + CLzmaProb *probs = p->posSlotEncoder[0]; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m) + m = (m << 1) + 1; + } + while (m < (1 << kNumPosSlotBits)); + } + { + // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; + unsigned numBits = 30 - kNumAlignBits; + do + { + range >>= 1; + p->rc.low += range; + RC_NORM(&p->rc) + } + while (--numBits); + } + + { + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + CLzmaProb *probs = p->posAlignEncoder; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m) + m = (m << 1) + 1; + } + while (m < kAlignTableSize); + } + p->rc.range = range; +} + + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + + #ifndef Z7_ST + if ( + // p->mf_Failure || + (p->mtMode && + ( // p->matchFinderMt.failure_LZ_LZ || + p->matchFinderMt.failure_LZ_BT)) + ) + { + p->result = MY_HRES_ERROR_INTERNAL_ERROR; + // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); + } + #endif + + if (MFB.result != SZ_OK) + p->result = SZ_ERROR_READ; + + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + + +Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +{ + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + + +Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +{ + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; + // p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned sym = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } +} + + +Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +{ + // int y; for (y = 0; y < 100; y++) { + + UInt32 tempPrices[kNumFullDistances]; + unsigned i, lps; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + + for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) + { + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = (posSlot >> 1) - 1; + unsigned base = ((2 | (posSlot & 1)) << footerBits); + const CLzmaProb *probs = p->posEncoders + (size_t)base * 2; + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + UInt32 price = 0; + unsigned m = 1; + unsigned sym = i; + unsigned offset = (unsigned)1 << footerBits; + base += i; + + if (footerBits) + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + + { + unsigned prob = probs[m]; + tempPrices[base ] = price + GET_PRICEa_0(prob); + tempPrices[base + offset] = price + GET_PRICEa_1(prob); + } + } + + for (lps = 0; lps < kNumLenToPosStates; lps++) + { + unsigned slot; + unsigned distTableSize2 = (p->distTableSize + 1) >> 1; + UInt32 *posSlotPrices = p->posSlotPrices[lps]; + const CLzmaProb *probs = p->posSlotEncoder[lps]; + + for (slot = 0; slot < distTableSize2; slot++) + { + // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); + UInt32 price; + unsigned bit; + unsigned sym = slot + (1 << (kNumPosSlotBits - 1)); + unsigned prob; + bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob); + posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob); + } + + { + UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++) + { + posSlotPrices[(size_t)slot * 2 ] += delta; + posSlotPrices[(size_t)slot * 2 + 1] += delta; + delta += ((UInt32)1 << kNumBitPriceShiftBits); + } + } + + { + UInt32 *dp = p->distancesPrices[lps]; + + dp[0] = posSlotPrices[0]; + dp[1] = posSlotPrices[1]; + dp[2] = posSlotPrices[2]; + dp[3] = posSlotPrices[3]; + + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + dp[i ] = slotPrice + tempPrices[i]; + dp[i + 1] = slotPrice + tempPrices[i + 1]; + } + } + } + // } +} + + + +static void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&MFB); + + #ifndef Z7_ST + p->matchFinderMt.MatchFinder = &MFB; + MatchFinderMt_Construct(&p->matchFinderMt); + #endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props); + } + + #ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); + #endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) +{ + void *p; + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + if (p) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + #ifndef Z7_ST + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); + #endif + + MatchFinder_Free(&MFB, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + // GET_CLzmaEnc_p + LzmaEnc_Destruct(p, alloc, allocBig); + ISzAlloc_Free(alloc, p); +} + + +Z7_NO_INLINE +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) +{ + UInt32 nowPos32, startPos32; + if (p->needInit) + { + #ifndef Z7_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)) + } + #endif + p->matchFinder.Init(p->matchFinderObj); + p->needInit = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)) + + nowPos32 = (UInt32)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { + unsigned numPairs; + Byte curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); + // p->state = kLiteralNextStates[p->state]; + curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + + for (;;) + { + UInt32 dist; + unsigned len, posState; + UInt32 range, ttt, newBound; + CLzmaProb *probs; + + if (p->fastMode) + len = GetOptimumFast(p); + else + { + unsigned oci = p->optCur; + if (p->optEnd == oci) + len = GetOptimum(p, nowPos32); + else + { + const COptimal *opt = &p->opt[oci]; + len = opt->len; + p->backRes = opt->dist; + p->optCur = oci + 1; + } + } + + posState = (unsigned)nowPos32 & p->pbMask; + range = p->rc.range; + probs = &p->isMatch[p->state][posState]; + + RC_BIT_PRE(&p->rc, probs) + + dist = p->backRes; + + #ifdef SHOW_STAT2 + printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); + #endif + + if (dist == MARK_LIT) + { + Byte curByte; + const Byte *data; + unsigned state; + + RC_BIT_0(&p->rc, probs) + p->rc.range = range; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + probs = LIT_PROBS(nowPos32, *(data - 1)); + curByte = *data; + state = p->state; + p->state = kLiteralNextStates[state]; + if (IsLitState(state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); + } + else + { + RC_BIT_1(&p->rc, probs) + probs = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, probs) + + if (dist < LZMA_NUM_REPS) + { + RC_BIT_1(&p->rc, probs) + probs = &p->isRepG0[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 0) + { + RC_BIT_0(&p->rc, probs) + probs = &p->isRep0Long[p->state][posState]; + RC_BIT_PRE(&p->rc, probs) + if (len != 1) + { + RC_BIT_1_BASE(&p->rc, probs) + } + else + { + RC_BIT_0_BASE(&p->rc, probs) + p->state = kShortRepNextStates[p->state]; + } + } + else + { + RC_BIT_1(&p->rc, probs) + probs = &p->isRepG1[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 1) + { + RC_BIT_0_BASE(&p->rc, probs) + dist = p->reps[1]; + } + else + { + RC_BIT_1(&p->rc, probs) + probs = &p->isRepG2[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 2) + { + RC_BIT_0_BASE(&p->rc, probs) + dist = p->reps[2]; + } + else + { + RC_BIT_1_BASE(&p->rc, probs) + dist = p->reps[3]; + p->reps[3] = p->reps[2]; + } + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = dist; + } + + RC_NORM(&p->rc) + + p->rc.range = range; + + if (len != 1) + { + LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + --p->repLenEncCounter; + p->state = kRepNextStates[p->state]; + } + } + else + { + unsigned posSlot; + RC_BIT_0(&p->rc, probs) + p->rc.range = range; + p->state = kMatchNextStates[p->state]; + + LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + // --p->lenEnc.counter; + + dist -= LZMA_NUM_REPS; + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = dist + 1; + + p->matchPriceCount++; + GetPosSlot(dist, posSlot) + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); + { + UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); + range = p->rc.range; + probs = p->posSlotEncoder[GetLenToPosState(len)]; + do + { + CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); + UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; + sym <<= 1; + RC_BIT(&p->rc, prob, bit) + } + while (sym < (1 << kNumPosSlotBits * 2)); + p->rc.range = range; + } + + if (dist >= kStartPosModelIndex) + { + unsigned footerBits = ((posSlot >> 1) - 1); + + if (dist < kNumFullDistances) + { + unsigned base = ((2 | (posSlot & 1)) << footerBits); + RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */)); + } + else + { + UInt32 pos2 = (dist | 0xF) << (32 - footerBits); + range = p->rc.range; + // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + /* + do + { + range >>= 1; + p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); + RC_NORM(&p->rc) + } + while (footerBits > kNumAlignBits); + */ + do + { + range >>= 1; + p->rc.low += range & (0 - (pos2 >> 31)); + pos2 += pos2; + RC_NORM(&p->rc) + } + while (pos2 != 0xF0000000); + + + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + + { + unsigned m = 1; + unsigned bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) + p->rc.range = range; + // p->alignPriceCount++; + } + } + } + } + } + + nowPos32 += (UInt32)len; + p->additionalOffset -= len; + + if (p->additionalOffset == 0) + { + UInt32 processed; + + if (!p->fastMode) + { + /* + if (p->alignPriceCount >= 16) // kAlignTableSize + FillAlignPrices(p); + if (p->matchPriceCount >= 128) + FillDistancesPrices(p); + if (p->lenEnc.counter <= 0) + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + */ + if (p->matchPriceCount >= 64) + { + FillAlignPrices(p); + // { int y; for (y = 0; y < 100; y++) { + FillDistancesPrices(p); + // }} + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + } + if (p->repLenEncCounter <= 0) + { + p->repLenEncCounter = REP_LEN_COUNT; + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); + } + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + + if (maxPackSize) + { + if (processed + kNumOpts + 300 >= maxUnpackSize + || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) + break; + } + else if (processed >= (1 << 17)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + + + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + UInt32 beforeSize = kNumOpts; + UInt32 dictSize; + + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + + #ifndef Z7_ST + p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); + #endif + + { + const unsigned lclp = p->lc + p->lp; + if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) + { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + if (!p->litProbs || !p->saveState.litProbs) + { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + + + dictSize = p->dictSize; + if (dictSize == ((UInt32)2 << 30) || + dictSize == ((UInt32)3 << 30)) + { + /* 21.03 : here we reduce the dictionary for 2 reasons: + 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary. + 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases, + where data size is aligned for 1 GB: 5/6/8 GB. + That reducing must be >= 1 for such corner cases. */ + dictSize -= 1; + } + + if (beforeSize + dictSize < keepWindowSize) + beforeSize = keepWindowSize - dictSize; + + /* in worst case we can look ahead for + max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes. + we send larger value for (keepAfter) to MantchFinder_Create(): + (numFastBytes + LZMA_MATCH_LEN_MAX + 1) + */ + + #ifndef Z7_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ + , allocBig)) + p->matchFinderObj = &p->matchFinderMt; + MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0); + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else + #endif + { + if (!MatchFinder_Create(&MFB, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */ + , allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &MFB; + MatchFinder_CreateVTable(&MFB, &p->matchFinder); + } + + return SZ_OK; +} + +static void LzmaEnc_Init(CLzmaEnc *p) +{ + unsigned i; + p->state = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; + + RangeEnc_Init(&p->rc); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + for (i = 0; i < kNumStates; i++) + { + unsigned j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; + unsigned j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances; i++) + p->posEncoders[i] = kProbInitValue; + } + + { + const size_t num = (size_t)0x300 << (p->lp + p->lc); + size_t k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } + + + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; + + { + for (i = 0; i < kNumOpts; i++) + p->opt[i].price = kInfinityPrice; + } + + p->additionalOffset = 0; + + p->pbMask = ((unsigned)1 << p->pb) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); + + // p->mf_Failure = False; +} + + +static void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + + p->repLenEncCounter = REP_LEN_COUNT; + + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + unsigned i; + for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) + if (p->dictSize <= ((UInt32)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + p->nowPos64 = 0; + p->needInit = 1; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + return SZ_OK; +} + +static SRes LzmaEnc_Prepare(CLzmaEncHandle p, + ISeqOutStreamPtr outStream, + ISeqInStreamPtr inStream, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, + ISeqInStreamPtr inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, + const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + // GET_CLzmaEnc_p + MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen) + LzmaEnc_SetDataSize(p, srcLen); + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +void LzmaEnc_Finish(CLzmaEncHandle p) +{ + #ifndef Z7_ST + // GET_CLzmaEnc_p + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); + #else + UNUSED_VAR(p) + #endif +} + + +typedef struct +{ + ISeqOutStream vt; + Byte *data; + size_t rem; + BoolInt overflow; +} CLzmaEnc_SeqOutStreamBuf; + +static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf) + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + if (size != 0) + { + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + } + return size; +} + + +/* +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p) +{ + GET_const_CLzmaEnc_p + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} +*/ + +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p) +{ + // GET_const_CLzmaEnc_p + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + + +// (desiredPackSize == 0) is not allowed +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) +{ + // GET_CLzmaEnc_p + UInt64 nowPos64; + SRes res; + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.vt; + nowPos64 = p->nowPos64; + + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + + +Z7_NO_INLINE +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress) +{ + SRes res = SZ_OK; + + #ifndef Z7_ST + Byte allocaDummy[0x300]; + allocaDummy[0] = 0; + allocaDummy[1] = allocaDummy[0]; + #endif + + for (;;) + { + res = LzmaEnc_CodeOneBlock(p, 0, 0); + if (res != SZ_OK || p->finished) + break; + if (progress) + { + res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) + { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + + LzmaEnc_Finish((CLzmaEncHandle)(void *)p); + + /* + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) + res = SZ_ERROR_FAIL; + } + */ + + return res; +} + + +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + // GET_CLzmaEnc_p + RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig)) + return LzmaEnc_Encode2(p, progress); +} + + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size) +{ + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + { + // GET_CLzmaEnc_p + const UInt32 dictSize = p->dictSize; + UInt32 v; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + // we write aligned dictionary value to properties for lzma decoder + if (dictSize >= ((UInt32)1 << 21)) + { + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; + v = (dictSize + kDictMask) & ~kDictMask; + if (v < dictSize) + v = dictSize; + } + else + { + unsigned i = 11 * 2; + do + { + v = (UInt32)(2 + (i & 1)) << (i >> 1); + i++; + } + while (v < dictSize); + } + + SetUi32(props + 1, v) + return SZ_OK; + } +} + + +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + return (unsigned)p->writeEndMark; +} + + +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + SRes res; + // GET_CLzmaEnc_p + + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + p->rc.outStream = &outStream.vt; + + res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig); + + if (res == SZ_OK) + { + res = LzmaEnc_Encode2(p, progress); + if (res == SZ_OK && p->nowPos64 != srcLen) + res = SZ_ERROR_FAIL; + } + + *destLen -= (SizeT)outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEncHandle p = LzmaEnc_Create(alloc); + SRes res; + if (!p) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) + { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, + writeEndMark, progress, alloc, allocBig); + } + + LzmaEnc_Destroy(p, alloc, allocBig); + return res; +} + + +/* +#ifndef Z7_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2]) +{ + GET_const_CLzmaEnc_p + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/crnlib/lzma/LzmaEnc.h b/crnlib/lzma/LzmaEnc.h new file mode 100644 index 00000000..08711cba --- /dev/null +++ b/crnlib/lzma/LzmaEnc.h @@ -0,0 +1,83 @@ +/* LzmaEnc.h -- LZMA Encoder +2023-04-13 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_LZMA_ENC_H +#define ZIP7_INC_LZMA_ENC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA_PROPS_SIZE 5 + +typedef struct +{ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + unsigned numHashOutBits; /* default = ? */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ + + // int _pad; + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc* functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef struct CLzmaEnc CLzmaEnc; +typedef CLzmaEnc * CLzmaEncHandle; +// Z7_DECLARE_HANDLE(CLzmaEncHandle) + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + + +/* ---------- One Call Interface ---------- */ + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/LzmaLib.c b/crnlib/lzma/LzmaLib.c new file mode 100644 index 00000000..350c1953 --- /dev/null +++ b/crnlib/lzma/LzmaLib.c @@ -0,0 +1,42 @@ +/* LzmaLib.c -- LZMA library wrapper +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Alloc.h" +#include "LzmaDec.h" +#include "LzmaEnc.h" +#include "LzmaLib.h" + +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ +) +{ + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + props.lc = lc; + props.lp = lp; + props.pb = pb; + props.fb = fb; + props.numThreads = numThreads; + + return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); +} + + +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, + const unsigned char *props, size_t propsSize) +{ + ELzmaStatus status; + return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); +} diff --git a/crnlib/lzma_LzmaLib.h b/crnlib/lzma/LzmaLib.h similarity index 63% rename from crnlib/lzma_LzmaLib.h rename to crnlib/lzma/LzmaLib.h index 69f27c94..79612a42 100644 --- a/crnlib/lzma_LzmaLib.h +++ b/crnlib/lzma/LzmaLib.h @@ -1,26 +1,14 @@ /* LzmaLib.h -- LZMA library interface -2008-08-05 -Igor Pavlov -Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMALIB_H -#define __LZMALIB_H +#ifndef ZIP7_INC_LZMA_LIB_H +#define ZIP7_INC_LZMA_LIB_H -#include "lzma_Types.h" +#include "7zTypes.h" -namespace crnlib { +EXTERN_C_BEGIN -#if 0 -#ifdef __cplusplus -#define MY_EXTERN_C extern "C" -#else -#define MY_EXTERN_C extern -#endif - -#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL -#else -#define MY_STDAPI int MY_STD_CALL -#endif +#define Z7_STDAPI int Z7_STDCALL #define LZMA_PROPS_SIZE 5 @@ -52,14 +40,16 @@ outPropsSize - level - compression level: 0 <= level <= 9; level dictSize algo fb - 0: 16 KB 0 32 - 1: 64 KB 0 32 - 2: 256 KB 0 32 - 3: 1 MB 0 32 - 4: 4 MB 0 32 + 0: 64 KB 0 32 + 1: 256 KB 0 32 + 2: 1 MB 0 32 + 3: 4 MB 0 32 + 4: 16 MB 0 32 5: 16 MB 1 32 6: 32 MB 1 32 - 7+: 64 MB 1 64 + 7: 32 MB 1 64 + 8: 64 MB 1 64 + 9: 64 MB 1 64 The default value for "level" is 5. @@ -95,6 +85,11 @@ fb - Word size (the number of fast bytes). numThreads - The number of thereads. 1 or 2. The default value is 2. Fast mode (algo = 0) can use only 1 thread. +In: + dest - output data buffer + destLen - output data buffer size + src - input data + srcLen - input data size Out: destLen - processed output size Returns: @@ -105,23 +100,23 @@ numThreads - The number of thereads. 1 or 2. The default value is 2. SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ -MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, - unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* default = (1 << 24) */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ); +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); /* LzmaUncompress -------------- In: - dest - output data - destLen - output data size + dest - output data buffer + destLen - output data buffer size src - input data srcLen - input data size Out: @@ -135,11 +130,9 @@ LzmaUncompress SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) */ -MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, SizeT* srcLen, - const unsigned char* props, size_t propsSize); +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, + const unsigned char *props, size_t propsSize); -#define LZMA_COMPRESS_FUNC_EXPORT "LzmaCompress" -#define LZMA_UNCOMPRESS_FUNC_EXPORT "LzmaUncompress" -} +EXTERN_C_END #endif diff --git a/crnlib/lzma/MtCoder.c b/crnlib/lzma/MtCoder.c new file mode 100644 index 00000000..10ad5822 --- /dev/null +++ b/crnlib/lzma/MtCoder.c @@ -0,0 +1,571 @@ +/* MtCoder.c -- Multi-thread Coder +2023-09-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "MtCoder.h" + +#ifndef Z7_ST + +static SRes MtProgressThunk_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CMtProgressThunk) + UInt64 inSize2 = 0; + UInt64 outSize2 = 0; + if (inSize != (UInt64)(Int64)-1) + { + inSize2 = inSize - p->inSize; + p->inSize = inSize; + } + if (outSize != (UInt64)(Int64)-1) + { + outSize2 = outSize - p->outSize; + p->outSize = outSize; + } + return MtProgress_ProgressAdd(p->mtProgress, inSize2, outSize2); +} + + +void MtProgressThunk_CreateVTable(CMtProgressThunk *p) +{ + p->vt.Progress = MtProgressThunk_Progress; +} + + + +#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } + + +static THREAD_FUNC_DECL ThreadFunc(void *pp); + + +static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) +{ + WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent); + if (wres == 0) + { + t->stop = False; + if (!Thread_WasCreated(&t->thread)) + wres = Thread_Create(&t->thread, ThreadFunc, t); + if (wres == 0) + wres = Event_Set(&t->startEvent); + } + if (wres == 0) + return SZ_OK; + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +static void MtCoderThread_Destruct(CMtCoderThread *t) +{ + if (Thread_WasCreated(&t->thread)) + { + t->stop = 1; + Event_Set(&t->startEvent); + Thread_Wait_Close(&t->thread); + } + + Event_Close(&t->startEvent); + + if (t->inBuf) + { + ISzAlloc_Free(t->mtCoder->allocBig, t->inBuf); + t->inBuf = NULL; + } +} + + + + +/* + ThreadFunc2() returns: + SZ_OK - in all normal cases (even for stream error or memory allocation error) + SZ_ERROR_THREAD - in case of failure in system synch function +*/ + +static SRes ThreadFunc2(CMtCoderThread *t) +{ + CMtCoder *mtc = t->mtCoder; + + for (;;) + { + unsigned bi; + SRes res; + SRes res2; + BoolInt finished; + unsigned bufIndex; + size_t size; + const Byte *inData; + UInt64 readProcessed = 0; + + RINOK_THREAD(Event_Wait(&mtc->readEvent)) + + /* after Event_Wait(&mtc->readEvent) we must call Event_Set(&mtc->readEvent) in any case to unlock another threads */ + + if (mtc->stopReading) + { + return Event_Set(&mtc->readEvent) == 0 ? SZ_OK : SZ_ERROR_THREAD; + } + + res = MtProgress_GetError(&mtc->mtProgress); + + size = 0; + inData = NULL; + finished = True; + + if (res == SZ_OK) + { + size = mtc->blockSize; + if (mtc->inStream) + { + if (!t->inBuf) + { + t->inBuf = (Byte *)ISzAlloc_Alloc(mtc->allocBig, mtc->blockSize); + if (!t->inBuf) + res = SZ_ERROR_MEM; + } + if (res == SZ_OK) + { + res = SeqInStream_ReadMax(mtc->inStream, t->inBuf, &size); + readProcessed = mtc->readProcessed + size; + mtc->readProcessed = readProcessed; + } + if (res != SZ_OK) + { + mtc->readRes = res; + /* after reading error - we can stop encoding of previous blocks */ + MtProgress_SetError(&mtc->mtProgress, res); + } + else + finished = (size != mtc->blockSize); + } + else + { + size_t rem; + readProcessed = mtc->readProcessed; + rem = mtc->inDataSize - (size_t)readProcessed; + if (size > rem) + size = rem; + inData = mtc->inData + (size_t)readProcessed; + readProcessed += size; + mtc->readProcessed = readProcessed; + finished = (mtc->inDataSize == (size_t)readProcessed); + } + } + + /* we must get some block from blocksSemaphore before Event_Set(&mtc->readEvent) */ + + res2 = SZ_OK; + + if (Semaphore_Wait(&mtc->blocksSemaphore) != 0) + { + res2 = SZ_ERROR_THREAD; + if (res == SZ_OK) + { + res = res2; + // MtProgress_SetError(&mtc->mtProgress, res); + } + } + + bi = mtc->blockIndex; + + if (++mtc->blockIndex >= mtc->numBlocksMax) + mtc->blockIndex = 0; + + bufIndex = (unsigned)(int)-1; + + if (res == SZ_OK) + res = MtProgress_GetError(&mtc->mtProgress); + + if (res != SZ_OK) + finished = True; + + if (!finished) + { + if (mtc->numStartedThreads < mtc->numStartedThreadsLimit + && mtc->expectedDataSize != readProcessed) + { + res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]); + if (res == SZ_OK) + mtc->numStartedThreads++; + else + { + MtProgress_SetError(&mtc->mtProgress, res); + finished = True; + } + } + } + + if (finished) + mtc->stopReading = True; + + RINOK_THREAD(Event_Set(&mtc->readEvent)) + + if (res2 != SZ_OK) + return res2; + + if (res == SZ_OK) + { + CriticalSection_Enter(&mtc->cs); + bufIndex = mtc->freeBlockHead; + mtc->freeBlockHead = mtc->freeBlockList[bufIndex]; + CriticalSection_Leave(&mtc->cs); + + res = mtc->mtCallback->Code(mtc->mtCallbackObject, t->index, bufIndex, + mtc->inStream ? t->inBuf : inData, size, finished); + + // MtProgress_Reinit(&mtc->mtProgress, t->index); + + if (res != SZ_OK) + MtProgress_SetError(&mtc->mtProgress, res); + } + + { + CMtCoderBlock *block = &mtc->blocks[bi]; + block->res = res; + block->bufIndex = bufIndex; + block->finished = finished; + } + + #ifdef MTCODER_USE_WRITE_THREAD + RINOK_THREAD(Event_Set(&mtc->writeEvents[bi])) + #else + { + unsigned wi; + { + CriticalSection_Enter(&mtc->cs); + wi = mtc->writeIndex; + if (wi == bi) + mtc->writeIndex = (unsigned)(int)-1; + else + mtc->ReadyBlocks[bi] = True; + CriticalSection_Leave(&mtc->cs); + } + + if (wi != bi) + { + if (res != SZ_OK || finished) + return 0; + continue; + } + + if (mtc->writeRes != SZ_OK) + res = mtc->writeRes; + + for (;;) + { + if (res == SZ_OK && bufIndex != (unsigned)(int)-1) + { + res = mtc->mtCallback->Write(mtc->mtCallbackObject, bufIndex); + if (res != SZ_OK) + { + mtc->writeRes = res; + MtProgress_SetError(&mtc->mtProgress, res); + } + } + + if (++wi >= mtc->numBlocksMax) + wi = 0; + { + BoolInt isReady; + + CriticalSection_Enter(&mtc->cs); + + if (bufIndex != (unsigned)(int)-1) + { + mtc->freeBlockList[bufIndex] = mtc->freeBlockHead; + mtc->freeBlockHead = bufIndex; + } + + isReady = mtc->ReadyBlocks[wi]; + + if (isReady) + mtc->ReadyBlocks[wi] = False; + else + mtc->writeIndex = wi; + + CriticalSection_Leave(&mtc->cs); + + RINOK_THREAD(Semaphore_Release1(&mtc->blocksSemaphore)) + + if (!isReady) + break; + } + + { + CMtCoderBlock *block = &mtc->blocks[wi]; + if (res == SZ_OK && block->res != SZ_OK) + res = block->res; + bufIndex = block->bufIndex; + finished = block->finished; + } + } + } + #endif + + if (finished || res != SZ_OK) + return 0; + } +} + + +static THREAD_FUNC_DECL ThreadFunc(void *pp) +{ + CMtCoderThread *t = (CMtCoderThread *)pp; + for (;;) + { + if (Event_Wait(&t->startEvent) != 0) + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; + if (t->stop) + return 0; + { + SRes res = ThreadFunc2(t); + CMtCoder *mtc = t->mtCoder; + if (res != SZ_OK) + { + MtProgress_SetError(&mtc->mtProgress, res); + } + + #ifndef MTCODER_USE_WRITE_THREAD + { + unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); + if (numFinished == mtc->numStartedThreads) + if (Event_Set(&mtc->finishedEvent) != 0) + return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; + } + #endif + } + } +} + + + +void MtCoder_Construct(CMtCoder *p) +{ + unsigned i; + + p->blockSize = 0; + p->numThreadsMax = 0; + p->expectedDataSize = (UInt64)(Int64)-1; + + p->inStream = NULL; + p->inData = NULL; + p->inDataSize = 0; + + p->progress = NULL; + p->allocBig = NULL; + + p->mtCallback = NULL; + p->mtCallbackObject = NULL; + + p->allocatedBufsSize = 0; + + Event_Construct(&p->readEvent); + Semaphore_Construct(&p->blocksSemaphore); + + for (i = 0; i < MTCODER_THREADS_MAX; i++) + { + CMtCoderThread *t = &p->threads[i]; + t->mtCoder = p; + t->index = i; + t->inBuf = NULL; + t->stop = False; + Event_Construct(&t->startEvent); + Thread_CONSTRUCT(&t->thread) + } + + #ifdef MTCODER_USE_WRITE_THREAD + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + Event_Construct(&p->writeEvents[i]); + #else + Event_Construct(&p->finishedEvent); + #endif + + CriticalSection_Init(&p->cs); + CriticalSection_Init(&p->mtProgress.cs); +} + + + + +static void MtCoder_Free(CMtCoder *p) +{ + unsigned i; + + /* + p->stopReading = True; + if (Event_IsCreated(&p->readEvent)) + Event_Set(&p->readEvent); + */ + + for (i = 0; i < MTCODER_THREADS_MAX; i++) + MtCoderThread_Destruct(&p->threads[i]); + + Event_Close(&p->readEvent); + Semaphore_Close(&p->blocksSemaphore); + + #ifdef MTCODER_USE_WRITE_THREAD + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + Event_Close(&p->writeEvents[i]); + #else + Event_Close(&p->finishedEvent); + #endif +} + + +void MtCoder_Destruct(CMtCoder *p) +{ + MtCoder_Free(p); + + CriticalSection_Delete(&p->cs); + CriticalSection_Delete(&p->mtProgress.cs); +} + + +SRes MtCoder_Code(CMtCoder *p) +{ + unsigned numThreads = p->numThreadsMax; + unsigned numBlocksMax; + unsigned i; + SRes res = SZ_OK; + + if (numThreads > MTCODER_THREADS_MAX) + numThreads = MTCODER_THREADS_MAX; + numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads); + + if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++; + if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++; + if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++; + + if (numBlocksMax > MTCODER_BLOCKS_MAX) + numBlocksMax = MTCODER_BLOCKS_MAX; + + if (p->blockSize != p->allocatedBufsSize) + { + for (i = 0; i < MTCODER_THREADS_MAX; i++) + { + CMtCoderThread *t = &p->threads[i]; + if (t->inBuf) + { + ISzAlloc_Free(p->allocBig, t->inBuf); + t->inBuf = NULL; + } + } + p->allocatedBufsSize = p->blockSize; + } + + p->readRes = SZ_OK; + + MtProgress_Init(&p->mtProgress, p->progress); + + #ifdef MTCODER_USE_WRITE_THREAD + for (i = 0; i < numBlocksMax; i++) + { + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->writeEvents[i])) + } + #else + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent)) + #endif + + { + RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent)) + RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, (UInt32)numBlocksMax, (UInt32)numBlocksMax)) + } + + for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++) + p->freeBlockList[i] = i + 1; + p->freeBlockList[MTCODER_BLOCKS_MAX - 1] = (unsigned)(int)-1; + p->freeBlockHead = 0; + + p->readProcessed = 0; + p->blockIndex = 0; + p->numBlocksMax = numBlocksMax; + p->stopReading = False; + + #ifndef MTCODER_USE_WRITE_THREAD + p->writeIndex = 0; + p->writeRes = SZ_OK; + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + p->ReadyBlocks[i] = False; + p->numFinishedThreads = 0; + #endif + + p->numStartedThreadsLimit = numThreads; + p->numStartedThreads = 0; + + // for (i = 0; i < numThreads; i++) + { + CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++]; + RINOK(MtCoderThread_CreateAndStart(nextThread)) + } + + RINOK_THREAD(Event_Set(&p->readEvent)) + + #ifdef MTCODER_USE_WRITE_THREAD + { + unsigned bi = 0; + + for (;; bi++) + { + if (bi >= numBlocksMax) + bi = 0; + + RINOK_THREAD(Event_Wait(&p->writeEvents[bi])) + + { + const CMtCoderBlock *block = &p->blocks[bi]; + unsigned bufIndex = block->bufIndex; + BoolInt finished = block->finished; + if (res == SZ_OK && block->res != SZ_OK) + res = block->res; + + if (bufIndex != (unsigned)(int)-1) + { + if (res == SZ_OK) + { + res = p->mtCallback->Write(p->mtCallbackObject, bufIndex); + if (res != SZ_OK) + MtProgress_SetError(&p->mtProgress, res); + } + + CriticalSection_Enter(&p->cs); + { + p->freeBlockList[bufIndex] = p->freeBlockHead; + p->freeBlockHead = bufIndex; + } + CriticalSection_Leave(&p->cs); + } + + RINOK_THREAD(Semaphore_Release1(&p->blocksSemaphore)) + + if (finished) + break; + } + } + } + #else + { + WRes wres = Event_Wait(&p->finishedEvent); + res = MY_SRes_HRESULT_FROM_WRes(wres); + } + #endif + + if (res == SZ_OK) + res = p->readRes; + + if (res == SZ_OK) + res = p->mtProgress.res; + + #ifndef MTCODER_USE_WRITE_THREAD + if (res == SZ_OK) + res = p->writeRes; + #endif + + if (res != SZ_OK) + MtCoder_Free(p); + return res; +} + +#endif + +#undef RINOK_THREAD diff --git a/crnlib/lzma/MtCoder.h b/crnlib/lzma/MtCoder.h new file mode 100644 index 00000000..c031fe0f --- /dev/null +++ b/crnlib/lzma/MtCoder.h @@ -0,0 +1,141 @@ +/* MtCoder.h -- Multi-thread Coder +2023-04-13 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_MT_CODER_H +#define ZIP7_INC_MT_CODER_H + +#include "MtDec.h" + +EXTERN_C_BEGIN + +/* + if ( defined MTCODER_USE_WRITE_THREAD) : main thread writes all data blocks to output stream + if (not defined MTCODER_USE_WRITE_THREAD) : any coder thread can write data blocks to output stream +*/ +/* #define MTCODER_USE_WRITE_THREAD */ + +#ifndef Z7_ST + #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) + #define MTCODER_THREADS_MAX 64 + #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3) +#else + #define MTCODER_THREADS_MAX 1 + #define MTCODER_BLOCKS_MAX 1 +#endif + + +#ifndef Z7_ST + + +typedef struct +{ + ICompressProgress vt; + CMtProgress *mtProgress; + UInt64 inSize; + UInt64 outSize; +} CMtProgressThunk; + +void MtProgressThunk_CreateVTable(CMtProgressThunk *p); + +#define MtProgressThunk_INIT(p) { (p)->inSize = 0; (p)->outSize = 0; } + + +struct CMtCoder_; + + +typedef struct +{ + struct CMtCoder_ *mtCoder; + unsigned index; + int stop; + Byte *inBuf; + + CAutoResetEvent startEvent; + CThread thread; +} CMtCoderThread; + + +typedef struct +{ + SRes (*Code)(void *p, unsigned coderIndex, unsigned outBufIndex, + const Byte *src, size_t srcSize, int finished); + SRes (*Write)(void *p, unsigned outBufIndex); +} IMtCoderCallback2; + + +typedef struct +{ + SRes res; + unsigned bufIndex; + BoolInt finished; +} CMtCoderBlock; + + +typedef struct CMtCoder_ +{ + /* input variables */ + + size_t blockSize; /* size of input block */ + unsigned numThreadsMax; + UInt64 expectedDataSize; + + ISeqInStreamPtr inStream; + const Byte *inData; + size_t inDataSize; + + ICompressProgressPtr progress; + ISzAllocPtr allocBig; + + IMtCoderCallback2 *mtCallback; + void *mtCallbackObject; + + + /* internal variables */ + + size_t allocatedBufsSize; + + CAutoResetEvent readEvent; + CSemaphore blocksSemaphore; + + BoolInt stopReading; + SRes readRes; + + #ifdef MTCODER_USE_WRITE_THREAD + CAutoResetEvent writeEvents[MTCODER_BLOCKS_MAX]; + #else + CAutoResetEvent finishedEvent; + SRes writeRes; + unsigned writeIndex; + Byte ReadyBlocks[MTCODER_BLOCKS_MAX]; + LONG numFinishedThreads; + #endif + + unsigned numStartedThreadsLimit; + unsigned numStartedThreads; + + unsigned numBlocksMax; + unsigned blockIndex; + UInt64 readProcessed; + + CCriticalSection cs; + + unsigned freeBlockHead; + unsigned freeBlockList[MTCODER_BLOCKS_MAX]; + + CMtProgress mtProgress; + CMtCoderBlock blocks[MTCODER_BLOCKS_MAX]; + CMtCoderThread threads[MTCODER_THREADS_MAX]; +} CMtCoder; + + +void MtCoder_Construct(CMtCoder *p); +void MtCoder_Destruct(CMtCoder *p); +SRes MtCoder_Code(CMtCoder *p); + + +#endif + + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/MtDec.c b/crnlib/lzma/MtDec.c new file mode 100644 index 00000000..c7952042 --- /dev/null +++ b/crnlib/lzma/MtDec.c @@ -0,0 +1,1124 @@ +/* MtDec.c -- Multi-thread Decoder +2024-02-20 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #define SHOW_DEBUG_INFO + +// #include +#include + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include "MtDec.h" + +#ifndef Z7_ST + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) + +void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress) +{ + p->progress = progress; + p->res = SZ_OK; + p->totalInSize = 0; + p->totalOutSize = 0; +} + + +SRes MtProgress_Progress_ST(CMtProgress *p) +{ + if (p->res == SZ_OK && p->progress) + if (ICompressProgress_Progress(p->progress, p->totalInSize, p->totalOutSize) != SZ_OK) + p->res = SZ_ERROR_PROGRESS; + return p->res; +} + + +SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize) +{ + SRes res; + CriticalSection_Enter(&p->cs); + + p->totalInSize += inSize; + p->totalOutSize += outSize; + if (p->res == SZ_OK && p->progress) + if (ICompressProgress_Progress(p->progress, p->totalInSize, p->totalOutSize) != SZ_OK) + p->res = SZ_ERROR_PROGRESS; + res = p->res; + + CriticalSection_Leave(&p->cs); + return res; +} + + +SRes MtProgress_GetError(CMtProgress *p) +{ + SRes res; + CriticalSection_Enter(&p->cs); + res = p->res; + CriticalSection_Leave(&p->cs); + return res; +} + + +void MtProgress_SetError(CMtProgress *p, SRes res) +{ + CriticalSection_Enter(&p->cs); + if (p->res == SZ_OK) + p->res = res; + CriticalSection_Leave(&p->cs); +} + + +#define RINOK_THREAD(x) RINOK_WRes(x) + + +struct CMtDecBufLink_ +{ + struct CMtDecBufLink_ *next; + void *pad[3]; +}; + +typedef struct CMtDecBufLink_ CMtDecBufLink; + +#define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink) +#define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET) + + + +static THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp); + + +static WRes MtDecThread_CreateEvents(CMtDecThread *t) +{ + WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->canWrite); + if (wres == 0) + { + wres = AutoResetEvent_OptCreate_And_Reset(&t->canRead); + if (wres == 0) + return SZ_OK; + } + return wres; +} + + +static SRes MtDecThread_CreateAndStart(CMtDecThread *t) +{ + WRes wres = MtDecThread_CreateEvents(t); + // wres = 17; // for test + if (wres == 0) + { + if (Thread_WasCreated(&t->thread)) + return SZ_OK; + wres = Thread_Create(&t->thread, MtDec_ThreadFunc, t); + if (wres == 0) + return SZ_OK; + } + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +void MtDecThread_FreeInBufs(CMtDecThread *t) +{ + if (t->inBuf) + { + void *link = t->inBuf; + t->inBuf = NULL; + do + { + void *next = ((CMtDecBufLink *)link)->next; + ISzAlloc_Free(t->mtDec->alloc, link); + link = next; + } + while (link); + } +} + + +static void MtDecThread_CloseThread(CMtDecThread *t) +{ + if (Thread_WasCreated(&t->thread)) + { + Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */ + Event_Set(&t->canRead); + Thread_Wait_Close(&t->thread); + } + + Event_Close(&t->canRead); + Event_Close(&t->canWrite); +} + +static void MtDec_CloseThreads(CMtDec *p) +{ + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + MtDecThread_CloseThread(&p->threads[i]); +} + +static void MtDecThread_Destruct(CMtDecThread *t) +{ + MtDecThread_CloseThread(t); + MtDecThread_FreeInBufs(t); +} + + + +static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted) +{ + SRes res; + CriticalSection_Enter(&p->mtProgress.cs); + *wasInterrupted = (p->needInterrupt && interruptIndex > p->interruptIndex); + res = p->mtProgress.res; + CriticalSection_Leave(&p->mtProgress.cs); + return res; +} + +static SRes MtDec_Progress_GetError_Spec(CMtDec *p, UInt64 inSize, UInt64 outSize, UInt64 interruptIndex, BoolInt *wasInterrupted) +{ + SRes res; + CriticalSection_Enter(&p->mtProgress.cs); + + p->mtProgress.totalInSize += inSize; + p->mtProgress.totalOutSize += outSize; + if (p->mtProgress.res == SZ_OK && p->mtProgress.progress) + if (ICompressProgress_Progress(p->mtProgress.progress, p->mtProgress.totalInSize, p->mtProgress.totalOutSize) != SZ_OK) + p->mtProgress.res = SZ_ERROR_PROGRESS; + + *wasInterrupted = (p->needInterrupt && interruptIndex > p->interruptIndex); + res = p->mtProgress.res; + + CriticalSection_Leave(&p->mtProgress.cs); + + return res; +} + +static void MtDec_Interrupt(CMtDec *p, UInt64 interruptIndex) +{ + CriticalSection_Enter(&p->mtProgress.cs); + if (!p->needInterrupt || interruptIndex < p->interruptIndex) + { + p->interruptIndex = interruptIndex; + p->needInterrupt = True; + } + CriticalSection_Leave(&p->mtProgress.cs); +} + +Byte *MtDec_GetCrossBuff(CMtDec *p) +{ + Byte *cr = p->crossBlock; + if (!cr) + { + cr = (Byte *)ISzAlloc_Alloc(p->alloc, MTDEC__LINK_DATA_OFFSET + p->inBufSize); + if (!cr) + return NULL; + p->crossBlock = cr; + } + return MTDEC__DATA_PTR_FROM_LINK(cr); +} + + +/* + MtDec_ThreadFunc2() returns: + 0 - in all normal cases (even for stream error or memory allocation error) + (!= 0) - WRes error return by system threading function +*/ + +// #define MTDEC_ProgessStep (1 << 22) +#define MTDEC_ProgessStep (1 << 0) + +static WRes MtDec_ThreadFunc2(CMtDecThread *t) +{ + CMtDec *p = t->mtDec; + + PRF_STR_INT("MtDec_ThreadFunc2", t->index) + + // SetThreadAffinityMask(GetCurrentThread(), 1 << t->index); + + for (;;) + { + SRes res, codeRes; + BoolInt wasInterrupted, isAllocError, overflow, finish; + SRes threadingErrorSRes; + BoolInt needCode, needWrite, needContinue; + + size_t inDataSize_Start; + UInt64 inDataSize; + // UInt64 inDataSize_Full; + + UInt64 blockIndex; + + UInt64 inPrev = 0; + UInt64 outPrev = 0; + UInt64 inCodePos; + UInt64 outCodePos; + + Byte *afterEndData = NULL; + size_t afterEndData_Size = 0; + BoolInt afterEndData_IsCross = False; + + BoolInt canCreateNewThread = False; + // CMtDecCallbackInfo parse; + CMtDecThread *nextThread; + + PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index) + + RINOK_THREAD(Event_Wait(&t->canRead)) + if (p->exitThread) + return 0; + + PRF_STR_INT("after Event_Wait(&t->canRead)", t->index) + + // if (t->index == 3) return 19; // for test + + blockIndex = p->blockIndex++; + + // PRF(printf("\ncanRead\n")) + + res = MtDec_Progress_GetError_Spec(p, 0, 0, blockIndex, &wasInterrupted); + + finish = p->readWasFinished; + needCode = False; + needWrite = False; + isAllocError = False; + overflow = False; + + inDataSize_Start = 0; + inDataSize = 0; + // inDataSize_Full = 0; + + if (res == SZ_OK && !wasInterrupted) + { + // if (p->inStream) + { + CMtDecBufLink *prev = NULL; + CMtDecBufLink *link = (CMtDecBufLink *)t->inBuf; + size_t crossSize = p->crossEnd - p->crossStart; + + PRF(printf("\ncrossSize = %d\n", crossSize)); + + for (;;) + { + if (!link) + { + link = (CMtDecBufLink *)ISzAlloc_Alloc(p->alloc, MTDEC__LINK_DATA_OFFSET + p->inBufSize); + if (!link) + { + finish = True; + // p->allocError_for_Read_BlockIndex = blockIndex; + isAllocError = True; + break; + } + link->next = NULL; + if (prev) + { + // static unsigned g_num = 0; + // printf("\n%6d : %x", ++g_num, (unsigned)(size_t)((Byte *)link - (Byte *)prev)); + prev->next = link; + } + else + t->inBuf = (void *)link; + } + + { + Byte *data = MTDEC__DATA_PTR_FROM_LINK(link); + Byte *parseData = data; + size_t size; + + if (crossSize != 0) + { + inDataSize = crossSize; + // inDataSize_Full = inDataSize; + inDataSize_Start = crossSize; + size = crossSize; + parseData = MTDEC__DATA_PTR_FROM_LINK(p->crossBlock) + p->crossStart; + PRF(printf("\ncross : crossStart = %7d crossEnd = %7d finish = %1d", + (int)p->crossStart, (int)p->crossEnd, (int)finish)); + } + else + { + size = p->inBufSize; + + res = SeqInStream_ReadMax(p->inStream, data, &size); + + // size = 10; // test + + inDataSize += size; + // inDataSize_Full = inDataSize; + if (!prev) + inDataSize_Start = size; + + p->readProcessed += size; + finish = (size != p->inBufSize); + if (finish) + p->readWasFinished = True; + + // res = E_INVALIDARG; // test + + if (res != SZ_OK) + { + // PRF(printf("\nRead error = %d\n", res)) + // we want to decode all data before error + p->readRes = res; + // p->readError_BlockIndex = blockIndex; + p->readWasFinished = True; + finish = True; + res = SZ_OK; + // break; + } + + if (inDataSize - inPrev >= MTDEC_ProgessStep) + { + res = MtDec_Progress_GetError_Spec(p, 0, 0, blockIndex, &wasInterrupted); + if (res != SZ_OK || wasInterrupted) + break; + inPrev = inDataSize; + } + } + + { + CMtDecCallbackInfo parse; + + parse.startCall = (prev == NULL); + parse.src = parseData; + parse.srcSize = size; + parse.srcFinished = finish; + parse.canCreateNewThread = True; + + PRF(printf("\nParse size = %d\n", (unsigned)size)); + + p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse); + + PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state)); + + needWrite = True; + canCreateNewThread = parse.canCreateNewThread; + + // printf("\n\n%12I64u %12I64u", (UInt64)p->mtProgress.totalInSize, (UInt64)p->mtProgress.totalOutSize); + + if ( + // parseRes != SZ_OK || + // inDataSize - (size - parse.srcSize) > p->inBlockMax + // || + parse.state == MTDEC_PARSE_OVERFLOW + // || wasInterrupted + ) + { + // Overflow or Parse error - switch from MT decoding to ST decoding + finish = True; + overflow = True; + + { + PRF(printf("\n Overflow")); + // PRF(printf("\nisBlockFinished = %d", (unsigned)parse.blockWasFinished)); + PRF(printf("\n inDataSize = %d", (unsigned)inDataSize)); + } + + if (crossSize != 0) + memcpy(data, parseData, size); + p->crossStart = 0; + p->crossEnd = 0; + break; + } + + if (crossSize != 0) + { + memcpy(data, parseData, parse.srcSize); + p->crossStart += parse.srcSize; + } + + if (parse.state != MTDEC_PARSE_CONTINUE || finish) + { + // we don't need to parse in current thread anymore + + if (parse.state == MTDEC_PARSE_END) + finish = True; + + needCode = True; + // p->crossFinished = finish; + + if (parse.srcSize == size) + { + // full parsed - no cross transfer + p->crossStart = 0; + p->crossEnd = 0; + break; + } + + if (parse.state == MTDEC_PARSE_END) + { + afterEndData = parseData + parse.srcSize; + afterEndData_Size = size - parse.srcSize; + if (crossSize != 0) + afterEndData_IsCross = True; + // we reduce data size to required bytes (parsed only) + inDataSize -= afterEndData_Size; + if (!prev) + inDataSize_Start = parse.srcSize; + break; + } + + { + // partial parsed - need cross transfer + if (crossSize != 0) + inDataSize = parse.srcSize; // it's only parsed now + else + { + // partial parsed - is not in initial cross block - we need to copy new data to cross block + Byte *cr = MtDec_GetCrossBuff(p); + if (!cr) + { + { + PRF(printf("\ncross alloc error error\n")); + // res = SZ_ERROR_MEM; + finish = True; + // p->allocError_for_Read_BlockIndex = blockIndex; + isAllocError = True; + break; + } + } + + { + size_t crSize = size - parse.srcSize; + inDataSize -= crSize; + p->crossEnd = crSize; + p->crossStart = 0; + memcpy(cr, parseData + parse.srcSize, crSize); + } + } + + // inDataSize_Full = inDataSize; + if (!prev) + inDataSize_Start = parse.srcSize; // it's partial size (parsed only) + + finish = False; + break; + } + } + + if (parse.srcSize != size) + { + res = SZ_ERROR_FAIL; + PRF(printf("\nfinished error SZ_ERROR_FAIL = %d\n", res)); + break; + } + } + } + + prev = link; + link = link->next; + + if (crossSize != 0) + { + crossSize = 0; + p->crossStart = 0; + p->crossEnd = 0; + } + } + } + + if (res == SZ_OK) + res = MtDec_GetError_Spec(p, blockIndex, &wasInterrupted); + } + + codeRes = SZ_OK; + + if (res == SZ_OK && needCode && !wasInterrupted) + { + codeRes = p->mtCallback->PreCode(p->mtCallbackObject, t->index); + if (codeRes != SZ_OK) + { + needCode = False; + finish = True; + // SZ_ERROR_MEM is expected error here. + // if (codeRes == SZ_ERROR_MEM) - we will try single-thread decoding later. + // if (codeRes != SZ_ERROR_MEM) - we can stop decoding or try single-thread decoding. + } + } + + if (res != SZ_OK || wasInterrupted) + finish = True; + + nextThread = NULL; + threadingErrorSRes = SZ_OK; + + if (!finish) + { + if (p->numStartedThreads < p->numStartedThreads_Limit && canCreateNewThread) + { + SRes res2 = MtDecThread_CreateAndStart(&p->threads[p->numStartedThreads]); + if (res2 == SZ_OK) + { + // if (p->numStartedThreads % 1000 == 0) PRF(printf("\n numStartedThreads=%d\n", p->numStartedThreads)); + p->numStartedThreads++; + } + else + { + PRF(printf("\nERROR: numStartedThreads=%d\n", p->numStartedThreads)); + if (p->numStartedThreads == 1) + { + // if only one thread is possible, we leave muti-threading code + finish = True; + needCode = False; + threadingErrorSRes = res2; + } + else + p->numStartedThreads_Limit = p->numStartedThreads; + } + } + + if (!finish) + { + unsigned nextIndex = t->index + 1; + nextThread = &p->threads[nextIndex >= p->numStartedThreads ? 0 : nextIndex]; + RINOK_THREAD(Event_Set(&nextThread->canRead)) + // We have started executing for new iteration (with next thread) + // And that next thread now is responsible for possible exit from decoding (threading_code) + } + } + + // each call of Event_Set(&nextThread->canRead) must be followed by call of Event_Set(&nextThread->canWrite) + // if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case + // if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block): + // - if (needContinue) after Write(&needContinue), we restore decoding with new iteration + // - otherwise we stop decoding and exit from MtDec_ThreadFunc2() + + // Don't change (finish) variable in the further code + + + // ---------- CODE ---------- + + inPrev = 0; + outPrev = 0; + inCodePos = 0; + outCodePos = 0; + + if (res == SZ_OK && needCode && codeRes == SZ_OK) + { + BoolInt isStartBlock = True; + CMtDecBufLink *link = (CMtDecBufLink *)t->inBuf; + + for (;;) + { + size_t inSize; + int stop; + + if (isStartBlock) + inSize = inDataSize_Start; + else + { + UInt64 rem = inDataSize - inCodePos; + inSize = p->inBufSize; + if (inSize > rem) + inSize = (size_t)rem; + } + + inCodePos += inSize; + stop = True; + + codeRes = p->mtCallback->Code(p->mtCallbackObject, t->index, + (const Byte *)MTDEC__DATA_PTR_FROM_LINK(link), inSize, + (inCodePos == inDataSize), // srcFinished + &inCodePos, &outCodePos, &stop); + + if (codeRes != SZ_OK) + { + PRF(printf("\nCode Interrupt error = %x\n", codeRes)); + // we interrupt only later blocks + MtDec_Interrupt(p, blockIndex); + break; + } + + if (stop || inCodePos == inDataSize) + break; + + { + const UInt64 inDelta = inCodePos - inPrev; + const UInt64 outDelta = outCodePos - outPrev; + if (inDelta >= MTDEC_ProgessStep || outDelta >= MTDEC_ProgessStep) + { + // Sleep(1); + res = MtDec_Progress_GetError_Spec(p, inDelta, outDelta, blockIndex, &wasInterrupted); + if (res != SZ_OK || wasInterrupted) + break; + inPrev = inCodePos; + outPrev = outCodePos; + } + } + + link = link->next; + isStartBlock = False; + } + } + + + // ---------- WRITE ---------- + + RINOK_THREAD(Event_Wait(&t->canWrite)) + + { + BoolInt isErrorMode = False; + BoolInt canRecode = True; + BoolInt needWriteToStream = needWrite; + + if (p->exitThread) return 0; // it's never executed in normal cases + + if (p->wasInterrupted) + wasInterrupted = True; + else + { + if (codeRes != SZ_OK) // || !needCode // check it !!! + { + p->wasInterrupted = True; + p->codeRes = codeRes; + if (codeRes == SZ_ERROR_MEM) + isAllocError = True; + } + + if (threadingErrorSRes) + { + p->wasInterrupted = True; + p->threadingErrorSRes = threadingErrorSRes; + needWriteToStream = False; + } + if (isAllocError) + { + p->wasInterrupted = True; + p->isAllocError = True; + needWriteToStream = False; + } + if (overflow) + { + p->wasInterrupted = True; + p->overflow = True; + needWriteToStream = False; + } + } + + if (needCode) + { + if (wasInterrupted) + { + inCodePos = 0; + outCodePos = 0; + } + { + const UInt64 inDelta = inCodePos - inPrev; + const UInt64 outDelta = outCodePos - outPrev; + // if (inDelta != 0 || outDelta != 0) + res = MtProgress_ProgressAdd(&p->mtProgress, inDelta, outDelta); + } + } + + needContinue = (!finish); + + // if (res == SZ_OK && needWrite && !wasInterrupted) + if (needWrite) + { + // p->inProcessed += inCodePos; + + PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size)); + + res = p->mtCallback->Write(p->mtCallbackObject, t->index, + res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite + afterEndData, afterEndData_Size, afterEndData_IsCross, + &needContinue, + &canRecode); + + // res = SZ_ERROR_FAIL; // for test + + PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue)); + PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed)); + + if (res != SZ_OK) + { + PRF(printf("\nWrite error = %d\n", res)); + isErrorMode = True; + p->wasInterrupted = True; + } + if (res != SZ_OK + || (!needContinue && !finish)) + { + PRF(printf("\nWrite Interrupt error = %x\n", res)); + MtDec_Interrupt(p, blockIndex); + } + } + + if (canRecode) + if (!needCode + || res != SZ_OK + || p->wasInterrupted + || codeRes != SZ_OK + || wasInterrupted + || p->numFilledThreads != 0 + || isErrorMode) + { + if (p->numFilledThreads == 0) + p->filledThreadStart = t->index; + if (inDataSize != 0 || !finish) + { + t->inDataSize_Start = inDataSize_Start; + t->inDataSize = inDataSize; + p->numFilledThreads++; + } + PRF(printf("\np->numFilledThreads = %d\n", p->numFilledThreads)); + PRF(printf("p->filledThreadStart = %d\n", p->filledThreadStart)); + } + + if (!finish) + { + RINOK_THREAD(Event_Set(&nextThread->canWrite)) + } + else + { + if (needContinue) + { + // we restore decoding with new iteration + RINOK_THREAD(Event_Set(&p->threads[0].canWrite)) + } + else + { + // we exit from decoding + if (t->index == 0) + return SZ_OK; + p->exitThread = True; + } + RINOK_THREAD(Event_Set(&p->threads[0].canRead)) + } + } + } +} + +#ifdef _WIN32 +#define USE_ALLOCA +#endif + +#ifdef USE_ALLOCA +#ifdef _WIN32 +#include +#else +#include +#endif +#endif + + +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; + +static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp) +{ + WRes res; + + CMtDecThread *t = (CMtDecThread *)pp; + CMtDec *p; + + // fprintf(stdout, "\n%d = %p\n", t->index, &t); + + res = MtDec_ThreadFunc2(t); + p = t->mtDec; + if (res == 0) + return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)p->exitThreadWRes; + { + // it's unexpected situation for some threading function error + if (p->exitThreadWRes == 0) + p->exitThreadWRes = res; + PRF(printf("\nthread exit error = %d\n", res)); + p->exitThread = True; + Event_Set(&p->threads[0].canRead); + Event_Set(&p->threads[0].canWrite); + MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res)); + } + return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)res; +} + +static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp) +{ + #ifdef USE_ALLOCA + CMtDecThread *t = (CMtDecThread *)pp; + // fprintf(stderr, "\n%d = %p - before", t->index, &t); + t->allocaPtr = alloca(t->index * 128); + #endif + return MtDec_ThreadFunc1(pp); +} + + +int MtDec_PrepareRead(CMtDec *p) +{ + if (p->crossBlock && p->crossStart == p->crossEnd) + { + ISzAlloc_Free(p->alloc, p->crossBlock); + p->crossBlock = NULL; + } + + { + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + if (i > p->numStartedThreads + || p->numFilledThreads <= + (i >= p->filledThreadStart ? + i - p->filledThreadStart : + i + p->numStartedThreads - p->filledThreadStart)) + MtDecThread_FreeInBufs(&p->threads[i]); + } + + return (p->numFilledThreads != 0) || (p->crossStart != p->crossEnd); +} + + +const Byte *MtDec_Read(CMtDec *p, size_t *inLim) +{ + while (p->numFilledThreads != 0) + { + CMtDecThread *t = &p->threads[p->filledThreadStart]; + + if (*inLim != 0) + { + { + void *link = t->inBuf; + void *next = ((CMtDecBufLink *)link)->next; + ISzAlloc_Free(p->alloc, link); + t->inBuf = next; + } + + if (t->inDataSize == 0) + { + MtDecThread_FreeInBufs(t); + if (--p->numFilledThreads == 0) + break; + if (++p->filledThreadStart == p->numStartedThreads) + p->filledThreadStart = 0; + t = &p->threads[p->filledThreadStart]; + } + } + + { + size_t lim = t->inDataSize_Start; + if (lim != 0) + t->inDataSize_Start = 0; + else + { + UInt64 rem = t->inDataSize; + lim = p->inBufSize; + if (lim > rem) + lim = (size_t)rem; + } + t->inDataSize -= lim; + *inLim = lim; + return (const Byte *)MTDEC__DATA_PTR_FROM_LINK(t->inBuf); + } + } + + { + size_t crossSize = p->crossEnd - p->crossStart; + if (crossSize != 0) + { + const Byte *data = MTDEC__DATA_PTR_FROM_LINK(p->crossBlock) + p->crossStart; + *inLim = crossSize; + p->crossStart = 0; + p->crossEnd = 0; + return data; + } + *inLim = 0; + if (p->crossBlock) + { + ISzAlloc_Free(p->alloc, p->crossBlock); + p->crossBlock = NULL; + } + return NULL; + } +} + + +void MtDec_Construct(CMtDec *p) +{ + unsigned i; + + p->inBufSize = (size_t)1 << 18; + + p->numThreadsMax = 0; + + p->inStream = NULL; + + // p->inData = NULL; + // p->inDataSize = 0; + + p->crossBlock = NULL; + p->crossStart = 0; + p->crossEnd = 0; + + p->numFilledThreads = 0; + + p->progress = NULL; + p->alloc = NULL; + + p->mtCallback = NULL; + p->mtCallbackObject = NULL; + + p->allocatedBufsSize = 0; + + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CMtDecThread *t = &p->threads[i]; + t->mtDec = p; + t->index = i; + t->inBuf = NULL; + Event_Construct(&t->canRead); + Event_Construct(&t->canWrite); + Thread_CONSTRUCT(&t->thread) + } + + // Event_Construct(&p->finishedEvent); + + CriticalSection_Init(&p->mtProgress.cs); +} + + +static void MtDec_Free(CMtDec *p) +{ + unsigned i; + + p->exitThread = True; + + for (i = 0; i < MTDEC_THREADS_MAX; i++) + MtDecThread_Destruct(&p->threads[i]); + + // Event_Close(&p->finishedEvent); + + if (p->crossBlock) + { + ISzAlloc_Free(p->alloc, p->crossBlock); + p->crossBlock = NULL; + } +} + + +void MtDec_Destruct(CMtDec *p) +{ + MtDec_Free(p); + + CriticalSection_Delete(&p->mtProgress.cs); +} + + +SRes MtDec_Code(CMtDec *p) +{ + unsigned i; + + p->inProcessed = 0; + + p->blockIndex = 1; // it must be larger than not_defined index (0) + p->isAllocError = False; + p->overflow = False; + p->threadingErrorSRes = SZ_OK; + + p->needContinue = True; + + p->readWasFinished = False; + p->needInterrupt = False; + p->interruptIndex = (UInt64)(Int64)-1; + + p->readProcessed = 0; + p->readRes = SZ_OK; + p->codeRes = SZ_OK; + p->wasInterrupted = False; + + p->crossStart = 0; + p->crossEnd = 0; + + p->filledThreadStart = 0; + p->numFilledThreads = 0; + + { + unsigned numThreads = p->numThreadsMax; + if (numThreads > MTDEC_THREADS_MAX) + numThreads = MTDEC_THREADS_MAX; + p->numStartedThreads_Limit = numThreads; + p->numStartedThreads = 0; + } + + if (p->inBufSize != p->allocatedBufsSize) + { + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CMtDecThread *t = &p->threads[i]; + if (t->inBuf) + MtDecThread_FreeInBufs(t); + } + if (p->crossBlock) + { + ISzAlloc_Free(p->alloc, p->crossBlock); + p->crossBlock = NULL; + } + + p->allocatedBufsSize = p->inBufSize; + } + + MtProgress_Init(&p->mtProgress, p->progress); + + // RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent)) + p->exitThread = False; + p->exitThreadWRes = 0; + + { + WRes wres; + SRes sres; + CMtDecThread *nextThread = &p->threads[p->numStartedThreads++]; + // wres = MtDecThread_CreateAndStart(nextThread); + wres = MtDecThread_CreateEvents(nextThread); + if (wres == 0) { wres = Event_Set(&nextThread->canWrite); + if (wres == 0) { wres = Event_Set(&nextThread->canRead); + if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread); + wres = (WRes)(MY_uintptr_t)res; + if (wres != 0) + { + p->needContinue = False; + MtDec_CloseThreads(p); + }}}} + + // wres = 17; // for test + // wres = Event_Wait(&p->finishedEvent); + + sres = MY_SRes_HRESULT_FROM_WRes(wres); + + if (sres != 0) + p->threadingErrorSRes = sres; + + if ( + // wres == 0 + // wres != 0 + // || p->mtc.codeRes == SZ_ERROR_MEM + p->isAllocError + || p->threadingErrorSRes != SZ_OK + || p->overflow) + { + // p->needContinue = True; + } + else + p->needContinue = False; + + if (p->needContinue) + return SZ_OK; + + // if (sres != SZ_OK) + return sres; + // return SZ_ERROR_FAIL; + } +} + +#endif + +#undef PRF diff --git a/crnlib/lzma/MtDec.h b/crnlib/lzma/MtDec.h new file mode 100644 index 00000000..f214b3ab --- /dev/null +++ b/crnlib/lzma/MtDec.h @@ -0,0 +1,202 @@ +/* MtDec.h -- Multi-thread Decoder +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_MT_DEC_H +#define ZIP7_INC_MT_DEC_H + +#include "7zTypes.h" + +#ifndef Z7_ST +#include "Threads.h" +#endif + +EXTERN_C_BEGIN + +#ifndef Z7_ST + +#ifndef Z7_ST + #define MTDEC_THREADS_MAX 32 +#else + #define MTDEC_THREADS_MAX 1 +#endif + + +typedef struct +{ + ICompressProgressPtr progress; + SRes res; + UInt64 totalInSize; + UInt64 totalOutSize; + CCriticalSection cs; +} CMtProgress; + +void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress); +SRes MtProgress_Progress_ST(CMtProgress *p); +SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize); +SRes MtProgress_GetError(CMtProgress *p); +void MtProgress_SetError(CMtProgress *p, SRes res); + +struct CMtDec; + +typedef struct +{ + struct CMtDec_ *mtDec; + unsigned index; + void *inBuf; + + size_t inDataSize_Start; // size of input data in start block + UInt64 inDataSize; // total size of input data in all blocks + + CThread thread; + CAutoResetEvent canRead; + CAutoResetEvent canWrite; + void *allocaPtr; +} CMtDecThread; + +void MtDecThread_FreeInBufs(CMtDecThread *t); + + +typedef enum +{ + MTDEC_PARSE_CONTINUE, // continue this block with more input data + MTDEC_PARSE_OVERFLOW, // MT buffers overflow, need switch to single-thread + MTDEC_PARSE_NEW, // new block + MTDEC_PARSE_END // end of block threading. But we still can return to threading after Write(&needContinue) +} EMtDecParseState; + +typedef struct +{ + // in + int startCall; + const Byte *src; + size_t srcSize; + // in : (srcSize == 0) is allowed + // out : it's allowed to return less that actually was used ? + int srcFinished; + + // out + EMtDecParseState state; + BoolInt canCreateNewThread; + UInt64 outPos; // check it (size_t) +} CMtDecCallbackInfo; + + +typedef struct +{ + void (*Parse)(void *p, unsigned coderIndex, CMtDecCallbackInfo *ci); + + // PreCode() and Code(): + // (SRes_return_result != SZ_OK) means stop decoding, no need another blocks + SRes (*PreCode)(void *p, unsigned coderIndex); + SRes (*Code)(void *p, unsigned coderIndex, + const Byte *src, size_t srcSize, int srcFinished, + UInt64 *inCodePos, UInt64 *outCodePos, int *stop); + // stop - means stop another Code calls + + + /* Write() must be called, if Parse() was called + set (needWrite) if + { + && (was not interrupted by progress) + && (was not interrupted in previous block) + } + + out: + if (*needContinue), decoder still need to continue decoding with new iteration, + even after MTDEC_PARSE_END + if (*canRecode), we didn't flush current block data, so we still can decode current block later. + */ + SRes (*Write)(void *p, unsigned coderIndex, + BoolInt needWriteToStream, + const Byte *src, size_t srcSize, BoolInt isCross, + // int srcFinished, + BoolInt *needContinue, + BoolInt *canRecode); + +} IMtDecCallback2; + + + +typedef struct CMtDec_ +{ + /* input variables */ + + size_t inBufSize; /* size of input block */ + unsigned numThreadsMax; + // size_t inBlockMax; + unsigned numThreadsMax_2; + + ISeqInStreamPtr inStream; + // const Byte *inData; + // size_t inDataSize; + + ICompressProgressPtr progress; + ISzAllocPtr alloc; + + IMtDecCallback2 *mtCallback; + void *mtCallbackObject; + + + /* internal variables */ + + size_t allocatedBufsSize; + + BoolInt exitThread; + WRes exitThreadWRes; + + UInt64 blockIndex; + BoolInt isAllocError; + BoolInt overflow; + SRes threadingErrorSRes; + + BoolInt needContinue; + + // CAutoResetEvent finishedEvent; + + SRes readRes; + SRes codeRes; + + BoolInt wasInterrupted; + + unsigned numStartedThreads_Limit; + unsigned numStartedThreads; + + Byte *crossBlock; + size_t crossStart; + size_t crossEnd; + UInt64 readProcessed; + BoolInt readWasFinished; + UInt64 inProcessed; + + unsigned filledThreadStart; + unsigned numFilledThreads; + + #ifndef Z7_ST + BoolInt needInterrupt; + UInt64 interruptIndex; + CMtProgress mtProgress; + CMtDecThread threads[MTDEC_THREADS_MAX]; + #endif +} CMtDec; + + +void MtDec_Construct(CMtDec *p); +void MtDec_Destruct(CMtDec *p); + +/* +MtDec_Code() returns: + SZ_OK - in most cases + MY_SRes_HRESULT_FROM_WRes(WRes_error) - in case of unexpected error in threading function +*/ + +SRes MtDec_Code(CMtDec *p); +Byte *MtDec_GetCrossBuff(CMtDec *p); + +int MtDec_PrepareRead(CMtDec *p); +const Byte *MtDec_Read(CMtDec *p, size_t *inLim); + +#endif + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Ppmd.h b/crnlib/lzma/Ppmd.h new file mode 100644 index 00000000..da1ed375 --- /dev/null +++ b/crnlib/lzma/Ppmd.h @@ -0,0 +1,169 @@ +/* Ppmd.h -- PPMD codec common code +2023-03-05 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#ifndef ZIP7_INC_PPMD_H +#define ZIP7_INC_PPMD_H + +#include "CpuArch.h" + +EXTERN_C_BEGIN + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ + #define PPMD_32BIT +#endif + +#define PPMD_INT_BITS 7 +#define PPMD_PERIOD_BITS 7 +#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS)) + +#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift)) +#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2) +#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob)) +#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob)) + +#define PPMD_N1 4 +#define PPMD_N2 4 +#define PPMD_N3 4 +#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) +#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) + +MY_CPU_pragma_pack_push_1 +/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ + +/* SEE-contexts for PPM-contexts with masked symbols */ +typedef struct +{ + UInt16 Summ; /* Freq */ + Byte Shift; /* Speed of Freq change; low Shift is for fast change */ + Byte Count; /* Count to next change of Shift */ +} CPpmd_See; + +#define Ppmd_See_UPDATE(p) \ + { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ + { (p)->Summ = (UInt16)((p)->Summ << 1); \ + (p)->Count = (Byte)(3 << (p)->Shift++); }} + + +typedef struct +{ + Byte Symbol; + Byte Freq; + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State; + +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; + +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; + +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif + + +#define PPMD_SetAllBitsIn256Bytes(p) \ + { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ + p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }} + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Ppmd7.c b/crnlib/lzma/Ppmd7.c new file mode 100644 index 00000000..29ff621c --- /dev/null +++ b/crnlib/lzma/Ppmd7.c @@ -0,0 +1,1131 @@ +/* Ppmd7.c -- PPMdH codec +2023-09-07 : Igor Pavlov : Public domain +This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ + +#include "Precomp.h" + +#include + +#include "Ppmd7.h" + +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) +static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; + +#define MAX_FREQ 124 +#define UNIT_SIZE 12 + +#define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) +#define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) + +#define REF(ptr) Ppmd_GetRef(p, ptr) + +#define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define STATS(ctx) Ppmd7_GetStats(p, ctx) +#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) +#define SUFFIX(ctx) CTX((ctx)->Suffix) + +typedef CPpmd7_Context * PPMD7_CTX_PTR; + +struct CPpmd7_Node_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; + +typedef struct CPpmd7_Node_ +{ + UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */ + UInt16 NU; + CPpmd7_Node_Ref Next; /* must be at offset >= 4 */ + CPpmd7_Node_Ref Prev; +} CPpmd7_Node; + +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) + +void Ppmd7_Construct(CPpmd7 *p) +{ + unsigned i, k, m; + + p->Base = NULL; + + for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) + { + unsigned step = (i >= 12 ? 4 : (i >> 2) + 1); + do { p->Units2Indx[k++] = (Byte)i; } while (--step); + p->Indx2Units[i] = (Byte)k; + } + + p->NS2BSIndx[0] = (0 << 1); + p->NS2BSIndx[1] = (1 << 1); + memset(p->NS2BSIndx + 2, (2 << 1), 9); + memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11); + + for (i = 0; i < 3; i++) + p->NS2Indx[i] = (Byte)i; + + for (m = i, k = 1; i < 256; i++) + { + p->NS2Indx[i] = (Byte)m; + if (--k == 0) + k = (++m) - 2; + } + + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); +} + + +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->Base); + p->Size = 0; + p->Base = NULL; +} + + +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) +{ + if (!p->Base || p->Size != size) + { + Ppmd7_Free(p, alloc); + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) + return False; + p->Size = size; + } + return True; +} + + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode() +*/ + +#define EMPTY_NODE 0 + + +static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx) +{ + *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + + p->FreeList[indx] = REF(node); + +} + + +static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx) +{ + CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); + p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; + return node; +} + + +static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) +{ + unsigned i, nu = I2U(oldIndx) - I2U(newIndx); + ptr = (Byte *)ptr + U2B(I2U(newIndx)); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); + } + Ppmd7_InsertNode(p, ptr, i); +} + + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + +static void Ppmd7_GlueFreeBlocks(CPpmd7 *p) +{ + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + + p->GlueCount = 255; + + + /* we set guard NODE at LoUnit */ + if (p->LoUnit != p->HiUnit) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + + { + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) + { + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } + } + } + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + + /* Fill lists of free blocks */ + for (n = head; n != 0;) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) + Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1); + if (I2U(i = U2I(nu)) != nu) + { + unsigned k = I2U(--i); + Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1); + } + Ppmd7_InsertNode(p, node, i); + } +} + + +Z7_NO_INLINE +static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx) +{ + unsigned i; + + if (p->GlueCount == 0) + { + Ppmd7_GlueFreeBlocks(p); + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + } + + i = indx; + + do + { + if (++i == PPMD_NUM_INDEXES) + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; + p->GlueCount--; + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; + } + } + while (p->FreeList[i] == 0); + + { + void *block = Ppmd7_RemoveNode(p, i); + Ppmd7_SplitBlock(p, block, i, indx); + return block; + } +} + + +static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx) +{ + if (p->FreeList[indx] != 0) + return Ppmd7_RemoveNode(p, indx); + { + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } + } + return Ppmd7_AllocUnitsRare(p, indx); +} + + +#define MEM_12_CPY(dest, src, num) \ + { UInt32 *d = (UInt32 *)(dest); \ + const UInt32 *z = (const UInt32 *)(src); \ + unsigned n = (num); \ + do { \ + d[0] = z[0]; \ + d[1] = z[1]; \ + d[2] = z[2]; \ + z += 3; \ + d += 3; \ + } while (--n); \ + } + + +/* +static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) +{ + unsigned i0 = U2I(oldNU); + unsigned i1 = U2I(newNU); + if (i0 == i1) + return oldPtr; + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + MEM_12_CPY(ptr, oldPtr, newNU) + Ppmd7_InsertNode(p, oldPtr, i0); + return ptr; + } + Ppmd7_SplitBlock(p, oldPtr, i0, i1); + return oldPtr; +} +*/ + + +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) +{ + Ppmd_SET_SUCCESSOR(p, v) +} + + + +Z7_NO_INLINE +static +void Ppmd7_RestartModel(CPpmd7 *p) +{ + unsigned i, k; + + memset(p->FreeList, 0, sizeof(p->FreeList)); + + p->Text = p->Base + p->AlignOffset; + p->HiUnit = p->Text + p->Size; + p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; + p->GlueCount = 0; + + p->OrderFall = p->MaxOrder; + p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; + p->PrevSuccess = 0; + + { + CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif + } + + for (i = 0; i < 128; i++) + + + + for (k = 0; k < 8; k++) + { + unsigned m; + UInt16 *dest = p->BinSumm[i] + k; + const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2)); + for (m = 0; m < 64; m += 8) + dest[m] = val; + } + + + for (i = 0; i < 25; i++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) + { + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); + s->Count = 4; + } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ +} + + +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) +{ + p->MaxOrder = maxOrder; + + Ppmd7_RestartModel(p); +} + + + +/* + Ppmd7_CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +Z7_NO_INLINE +static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = p->MinContext; + CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); + Byte newSym, newFreq; + unsigned numPs = 0; + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) + ps[numPs++] = p->FoundState; + + while (c->Suffix) + { + CPpmd_Void_Ref successor; + CPpmd_State *s; + c = SUFFIX(c); + + + if (c->NumStats != 1) + { + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + + } + else + { + s = ONE_STATE(c); + + } + successor = SUCCESSOR(s); + if (successor != upBranch) + { + // (c) is real record Context here, + c = CTX(successor); + if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. + return c; + } + break; + } + ps[numPs++] = s; + } + + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + + + if (c->NumStats == 1) + newFreq = ONE_STATE(c)->Freq; + else + { + UInt32 cf, s0; + CPpmd_State *s; + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); + } + + // Create new single-symbol contexts from low order to high order in loop + + do + { + PPMD7_CTX_PTR c1; + /* = AllocContext(p); */ + if (p->HiUnit != p->LoUnit) + c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); + else if (p->FreeList[0] != 0) + c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0); + else + { + c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0); + if (!c1) + return NULL; + } + + c1->NumStats = 1; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); + c1->Suffix = REF(c); + SetSuccessor(ps[--numPs], REF(c1)); + c = c1; + } + while (numPs != 0); + + return c; +} + + + +#define SWAP_STATES(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +Z7_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) +{ + CPpmd_Void_Ref maxSuccessor, minSuccessor; + PPMD7_CTX_PTR c, mc; + unsigned s0, ns; + + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) + { + /* Update Freqs in Suffix Context */ + + c = SUFFIX(p->MinContext); + + if (c->NumStats == 1) + { + CPpmd_State *s = ONE_STATE(c); + if (s->Freq < 32) + s->Freq++; + } + else + { + CPpmd_State *s = STATS(c); + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) + { + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + + if (s[0].Freq >= s[-1].Freq) + { + SWAP_STATES(s) + s--; + } + } + + if (s->Freq < MAX_FREQ - 9) + { + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); + } + } + } + + + if (p->OrderFall == 0) + { + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p); + if (!p->MinContext) + { + Ppmd7_RestartModel(p); + return; + } + SetSuccessor(p->FoundState, REF(p->MinContext)); + return; + } + + + /* NON-MAX ORDER context */ + + { + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + Ppmd7_RestartModel(p); + return; + } + maxSuccessor = REF(text); + } + + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) + { + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) + { + // minSuccessor is RAW-Successor. So we will create real contexts records: + PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p); + if (!cs) + { + Ppmd7_RestartModel(p); + return; + } + minSuccessor = REF(cs); + } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + + if (--p->OrderFall == 0) + { + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ + p->Text -= (p->MaxContext != p->MinContext); + } + } + else + { + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); + } + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do + { + unsigned ns1; + UInt32 sum; + + if ((ns1 = c->NumStats) != 1) + { + if ((ns1 & 1) == 0) + { + /* Expand for one UNIT */ + const unsigned oldNU = ns1 >> 1; + const unsigned i = U2I(oldNU); + if (i != U2I((size_t)oldNU + 1)) + { + void *ptr = Ppmd7_AllocUnits(p, i + 1); + void *oldPtr; + if (!ptr) + { + Ppmd7_RestartModel(p); + return; + } + oldPtr = STATS(c); + MEM_12_CPY(ptr, oldPtr, oldNU) + Ppmd7_InsertNode(p, oldPtr, i); + c->Union4.Stats = STATS_REF(ptr); + } + } + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1))); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; + } + else + { + // instead of One-symbol context we create 2-symbol context + CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0); + if (!s) + { + Ppmd7_RestartModel(p); + return; + } + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = (UInt32)(freq + p->InitEsc + (ns > 3)); + } + } + + { + CPpmd_State *s = STATS(c) + ns1; + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; + s->Symbol = p->FoundState->Symbol; + c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; + } + c = SUFFIX(c); + } + while (c != mc); +} + + + +Z7_NO_INLINE +static void Ppmd7_Rescale(CPpmd7 *p) +{ + unsigned i, adder, sumFreq, escFreq; + CPpmd_State *stats = STATS(p->MinContext); + CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) + { + CPpmd_State tmp = *s; + do + s[0] = s[-1]; + while (--s != stats); + *s = tmp; + } + + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; + + do + { + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + CPpmd_State tmp = *s; + CPpmd_State *s1 = s; + do + { + s1[0] = s1[-1]; + } + while (--s1 != stats && freq > s1[-1].Freq); + *s1 = tmp; + } + } + while (--i); + + if (s->Freq == 0) + { + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ + escFreq += i; + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) + { + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + + do + { + escFreq >>= 1; + freq = (freq + 1) >> 1; + } + while (escFreq > 1); + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + Ppmd7_InsertNode(p, stats, U2I(n0)); + return; + } + + n1 = (numStatsNew + 1) >> 1; + if (n0 != n1) + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = Ppmd7_RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MEM_12_CPY(ptr, (const void *)stats, n1) + Ppmd7_InsertNode(p, stats, i0); + } + else + Ppmd7_SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); + } +} + + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) +{ + CPpmd_See *see; + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) + { + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + + p->HiBitsFlag; + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + const unsigned summ = (UInt16)see->Summ; // & 0xFFFF + const unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + *escFreq = (UInt32)(r + (r == 0)); + } + } + else + { + see = &p->DummySee; + *escFreq = 1; + } + return see; +} + + +static void Ppmd7_NextContext(CPpmd7 *p) +{ + PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); +} + + +void Ppmd7_Update1(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) + { + SWAP_STATES(s) + p->FoundState = --s; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + } + Ppmd7_NextContext(p); +} + + +void Ppmd7_Update1_0(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + const unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (Int32)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_NextContext(p); +} + + +/* +void Ppmd7_UpdateBin(CPpmd7 *p) +{ + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); + p->PrevSuccess = 1; + p->RunLength++; + Ppmd7_NextContext(p); +} +*/ + +void Ppmd7_Update2(CPpmd7 *p) +{ + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; + p->RunLength = p->InitRL; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Ppmd7_Rescale(p); + Ppmd7_UpdateModel(p); +} + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls Ppmd7_RestartModel(), when there is no free memory for allocation. +And Ppmd7_RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - Ppmd7_AllocUnitsRare() + +Ppmd7_AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions. +Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ + +#undef MAX_FREQ +#undef UNIT_SIZE +#undef U2B +#undef U2I +#undef I2U +#undef I2U_UInt16 +#undef REF +#undef STATS_REF +#undef CTX +#undef STATS +#undef ONE_STATE +#undef SUFFIX +#undef NODE +#undef EMPTY_NODE +#undef MEM_12_CPY +#undef SUCCESSOR +#undef SWAP_STATES diff --git a/crnlib/lzma/Ppmd7.h b/crnlib/lzma/Ppmd7.h new file mode 100644 index 00000000..65c22ae9 --- /dev/null +++ b/crnlib/lzma/Ppmd7.h @@ -0,0 +1,181 @@ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2023-04-02 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#ifndef ZIP7_INC_PPMD7_H +#define ZIP7_INC_PPMD7_H + +#include "Ppmd.h" + +EXTERN_C_BEGIN + +#define PPMD7_MIN_ORDER 2 +#define PPMD7_MAX_ORDER 64 + +#define PPMD7_MIN_MEM_SIZE (1 << 11) +#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3) + +struct CPpmd7_Context_; + +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 + +typedef struct CPpmd7_Context_ +{ + UInt16 NumStats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + + CPpmd7_Context_Ref Suffix; +} CPpmd7_Context; + +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteInPtr Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOutPtr Stream; +} CPpmd7z_RangeEnc; + + +typedef struct +{ + CPpmd7_Context *MinContext, *MaxContext; + CPpmd_State *FoundState; + unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag; + Int32 RunLength, InitRL; /* must be 32-bit at least */ + + UInt32 Size; + UInt32 GlueCount; + UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; + + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment + Byte Units2Indx[128]; + CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; + CPpmd_See DummySee, See[25][16]; + UInt16 BinSumm[128][64]; + // int LastSymbol; +} CPpmd7; + + +void Ppmd7_Construct(CPpmd7 *p); +BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); +void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); +void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); +#define Ppmd7_WasAllocated(p) ((p)->Base != NULL) + + +/* ---------- Internal Functions ---------- */ + +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) + +void Ppmd7_Update1(CPpmd7 *p); +void Ppmd7_Update1_0(CPpmd7 *p); +void Ppmd7_Update2(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) + +#define Ppmd7_GetBinSumm(p) \ + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] + +CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); + + +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + +/* ---------- Decode ---------- */ + +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) + +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() + +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ + +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); + + +/* ---------- Encode ---------- */ + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Ppmd7Dec.c b/crnlib/lzma/Ppmd7Dec.c new file mode 100644 index 00000000..e601feb1 --- /dev/null +++ b/crnlib/lzma/Ppmd7Dec.c @@ -0,0 +1,312 @@ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2023-09-07 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTopValue ((UInt32)1 << 24) + + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) +{ + unsigned i; + p->Code = 0; + p->Range = 0xFFFFFFFF; + if (READ_BYTE(p) != 0) + return False; + for (i = 0; i < 4; i++) + p->Code = (p->Code << 8) | READ_BYTE(p); + return (p->Code < 0xFFFFFFFF); +} + +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) +} + +#define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) + + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +// typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((Byte *)charMask)[sym] +// Z7_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + unsigned i; + UInt32 count, hiCnt; + const UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) + { + Byte sym; + RC_DecodeFinal(0, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1_0(p); + return sym; + } + + p->PrevSuccess = 0; + i = (unsigned)p->MinContext->NumStats - 1; + + do + { + if ((Int32)(count -= (++s)->Freq) < 0) + { + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update1(p); + return sym; + } + } + while (--i); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt16 *prob = Ppmd7_GetBinSumm(p); + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) + { + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CPpmd7_Context *c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_State *s, *s2; + UInt32 freqSum, count, hiCnt; + + CPpmd_See *see; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); + } + while (mc->NumStats == numMasked); + + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0))); + hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1))); + } + while (--num2); + } + + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); + freqSum += hiCnt; + + + + + count = RC_GetThreshold(freqSum); + + if (count < hiCnt) + { + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (UInt32)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + } + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq) + + // new (see->Summ) value can overflow over 16-bits in some rare cases + Ppmd_See_UPDATE(see) + p->FoundState = s; + sym = s->Symbol; + Ppmd7_Update2(p); + return sym; + } + + if (count >= freqSum) + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt) + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases + see->Summ = (UInt16)(see->Summ + freqSum); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); + } +} + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ + +#undef kTopValue +#undef READ_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Decode +#undef RC_DecodeFinal +#undef RC_GetThreshold +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/crnlib/lzma/Ppmd7Enc.c b/crnlib/lzma/Ppmd7Enc.c new file mode 100644 index 00000000..fc3a15d2 --- /dev/null +++ b/crnlib/lzma/Ppmd7Enc.c @@ -0,0 +1,337 @@ +/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder +2023-09-07 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + + +#include "Precomp.h" + +#include "Ppmd7.h" + +#define kTopValue ((UInt32)1 << 24) + +#define R (&p->rc.enc) + +void Ppmd7z_Init_RangeEnc(CPpmd7 *p) +{ + R->Low = 0; + R->Range = 0xFFFFFFFF; + R->Cache = 0; + R->CacheSize = 1; +} + +Z7_NO_INLINE +static void Ppmd7z_RangeEnc_ShiftLow(CPpmd7 *p) +{ + if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0) + { + Byte temp = R->Cache; + do + { + IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32))); + temp = 0xFF; + } + while (--R->CacheSize != 0); + R->Cache = (Byte)((UInt32)R->Low >> 24); + } + R->CacheSize++; + R->Low = (UInt32)((UInt32)R->Low << 8); +} + +#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; Ppmd7z_RangeEnc_ShiftLow(p); +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +/* +#define Ppmd7z_RangeEnc_Encode(p, start, _size_) \ + { UInt32 size = _size_; \ + R->Low += start * R->Range; \ + R->Range *= size; \ + RC_NORM_LOCAL(p); } +*/ + +Z7_FORCE_INLINE +// Z7_NO_INLINE +static void Ppmd7z_RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) +{ + R->Low += (UInt64)start * (UInt64)R->Range; + R->Range *= size; + RC_NORM_LOCAL(p) +} + +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) +{ + unsigned i; + for (i = 0; i < 5; i++) + Ppmd7z_RangeEnc_ShiftLow(p); +} + + + +#define RC_Encode(start, size) Ppmd7z_RangeEnc_Encode(p, start, size); +#define RC_EncodeFinal(start, size) RC_Encode(start, size) RC_NORM_REMOTE(p) + +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +#define SUFFIX(ctx) CTX((ctx)->Suffix) +// typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) + +void Ppmd7_UpdateModel(CPpmd7 *p); + +#define MASK(sym) ((Byte *)charMask)[sym] + +Z7_FORCE_INLINE +static +void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) +{ + size_t charMask[256 / sizeof(size_t)]; + + if (p->MinContext->NumStats != 1) + { + CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); + UInt32 sum; + unsigned i; + + + + + R->Range /= p->MinContext->Union2.SummFreq; + + if (s->Symbol == symbol) + { + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(0, s->Freq) + p->FoundState = s; + Ppmd7_Update1_0(p); + return; + } + p->PrevSuccess = 0; + sum = s->Freq; + i = (unsigned)p->MinContext->NumStats - 1; + do + { + if ((++s)->Symbol == symbol) + { + // R->Range /= p->MinContext->Union2.SummFreq; + RC_EncodeFinal(sum, s->Freq) + p->FoundState = s; + Ppmd7_Update1(p); + return; + } + sum += s->Freq; + } + while (--i); + + // R->Range /= p->MinContext->Union2.SummFreq; + RC_Encode(sum, p->MinContext->Union2.SummFreq - sum) + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); + PPMD_SetAllBitsIn256Bytes(charMask) + // MASK(s->Symbol) = 0; + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } + else + { + UInt16 *prob = Ppmd7_GetBinSumm(p); + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); + UInt32 pr = *prob; + const UInt32 bound = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + if (s->Symbol == symbol) + { + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + // RangeEnc_EncodeBit_0(p, bound); + R->Range = bound; + RC_NORM_1(p) + + // p->FoundState = s; + // Ppmd7_UpdateBin(p); + { + const unsigned freq = s->Freq; + CPpmd7_Context *c = CTX(SUCCESSOR(s)); + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return; + } + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + // RangeEnc_EncodeBit_1(p, bound); + R->Low += bound; + R->Range -= bound; + RC_NORM_LOCAL(p) + + PPMD_SetAllBitsIn256Bytes(charMask) + MASK(s->Symbol) = 0; + p->PrevSuccess = 0; + } + + for (;;) + { + CPpmd_See *see; + CPpmd_State *s; + UInt32 sum, escFreq; + CPpmd7_Context *mc; + unsigned i, numMasked; + + RC_NORM_REMOTE(p) + + mc = p->MinContext; + numMasked = mc->NumStats; + + do + { + p->OrderFall++; + if (!mc->Suffix) + return; /* EndMarker (symbol = -1) */ + mc = Ppmd7_GetContext(p, mc->Suffix); + i = mc->NumStats; + } + while (i == numMasked); + + p->MinContext = mc; + + // see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq); + { + if (i != 256) + { + unsigned nonMasked = i - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + p->HiBitsFlag + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * i) + + 4 * (unsigned)(numMasked > nonMasked); + { + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); + escFreq = r + (r == 0); + } + } + else + { + see = &p->DummySee; + escFreq = 1; + } + } + + s = Ppmd7_GetStats(p, mc); + sum = 0; + // i = mc->NumStats; + + do + { + const unsigned cur = s->Symbol; + if ((int)cur == symbol) + { + const UInt32 low = sum; + const UInt32 freq = s->Freq; + unsigned num2; + + Ppmd_See_UPDATE(see) + p->FoundState = s; + sum += escFreq; + + num2 = i / 2; + i &= 1; + sum += freq & (0 - (UInt32)i); + if (num2 != 0) + { + s += i; + do + { + const unsigned sym0 = s[0].Symbol; + const unsigned sym1 = s[1].Symbol; + s += 2; + sum += (s[-2].Freq & (unsigned)(MASK(sym0))); + sum += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + + + R->Range /= sum; + RC_EncodeFinal(low, freq) + Ppmd7_Update2(p); + return; + } + sum += (s->Freq & (unsigned)(MASK(cur))); + s++; + } + while (--i); + + { + const UInt32 total = sum + escFreq; + see->Summ = (UInt16)(see->Summ + total); + + R->Range /= total; + RC_Encode(sum, escFreq) + } + + { + const CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + s--; + MASK(s->Symbol) = 0; + do + { + const unsigned sym0 = s2[0].Symbol; + const unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } + } +} + + +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim) +{ + for (; buf < lim; buf++) + { + Ppmd7z_EncodeSymbol(p, *buf); + } +} + +#undef kTopValue +#undef WRITE_BYTE +#undef RC_NORM_BASE +#undef RC_NORM_1 +#undef RC_NORM +#undef RC_NORM_LOCAL +#undef RC_NORM_REMOTE +#undef R +#undef RC_Encode +#undef RC_EncodeFinal +#undef SUFFIX +#undef CTX +#undef SUCCESSOR +#undef MASK diff --git a/crnlib/lzma/Precomp.h b/crnlib/lzma/Precomp.h new file mode 100644 index 00000000..97652281 --- /dev/null +++ b/crnlib/lzma/Precomp.h @@ -0,0 +1,127 @@ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H + +/* + this file must be included before another *.h files and before . + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ + +#include "Compiler.h" + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before , + if we want to define _WIN32_WINNT before . +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 + +#endif diff --git a/crnlib/lzma/RotateDefs.h b/crnlib/lzma/RotateDefs.h new file mode 100644 index 00000000..8026cd10 --- /dev/null +++ b/crnlib/lzma/RotateDefs.h @@ -0,0 +1,50 @@ +/* RotateDefs.h -- Rotate functions +2023-06-18 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_ROTATE_DEFS_H +#define ZIP7_INC_ROTATE_DEFS_H + +#ifdef _MSC_VER + +#include + +/* don't use _rotl with old MINGW. It can insert slow call to function. */ + +/* #if (_MSC_VER >= 1200) */ +#pragma intrinsic(_rotl) +#pragma intrinsic(_rotr) +/* #endif */ + +#define rotlFixed(x, n) _rotl((x), (n)) +#define rotrFixed(x, n) _rotr((x), (n)) + +#if (_MSC_VER >= 1300) +#define Z7_ROTL64(x, n) _rotl64((x), (n)) +#define Z7_ROTR64(x, n) _rotr64((x), (n)) +#else +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#else + +/* new compilers can translate these macros to fast commands. */ + +#if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5) +/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63))) +#else +/* for old GCC / clang: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#endif + +#endif diff --git a/crnlib/lzma/Sha256.c b/crnlib/lzma/Sha256.c new file mode 100644 index 00000000..b29f06bc --- /dev/null +++ b/crnlib/lzma/Sha256.c @@ -0,0 +1,492 @@ +/* Sha256.c -- SHA-256 Hash +: Igor Pavlov : Public domain +This code is based on public domain code from Wei Dai's Crypto++ library. */ + +#include "Precomp.h" + +#include + +#include "Sha256.h" +#include "RotateDefs.h" +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \ + || defined(_MSC_VER) && (_MSC_VER >= 1200) + #define Z7_COMPILER_SHA256_SUPPORTED + #endif +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_SHA2) \ + || defined(__ARM_FEATURE_CRYPTO) + #define Z7_COMPILER_SHA256_SUPPORTED + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) + #define Z7_COMPILER_SHA256_SUPPORTED + #endif + #endif + #endif + #endif +#endif + +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +#ifdef Z7_COMPILER_SHA256_SUPPORTED + void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); + + static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; + static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW; + + #define SHA256_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks +#else + #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks +#endif + + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo) +{ + SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks; + + #ifdef Z7_COMPILER_SHA256_SUPPORTED + if (algo != SHA256_ALGO_SW) + { + if (algo == SHA256_ALGO_DEFAULT) + func = g_SHA256_FUNC_UPDATE_BLOCKS; + else + { + if (algo != SHA256_ALGO_HW) + return False; + func = g_SHA256_FUNC_UPDATE_BLOCKS_HW; + if (!func) + return False; + } + } + #else + if (algo > 1) + return False; + #endif + + p->v.vars.func_UpdateBlocks = func; + return True; +} + + +/* define it for speed optimization */ + +#ifdef Z7_SFX + #define STEP_PRE 1 + #define STEP_MAIN 1 +#else + #define STEP_PRE 2 + #define STEP_MAIN 4 + // #define Z7_SHA256_UNROLL +#endif + +#undef Z7_SHA256_BIG_W +#if STEP_MAIN != 16 + #define Z7_SHA256_BIG_W +#endif + + + + +void Sha256_InitState(CSha256 *p) +{ + p->v.vars.count = 0; + p->state[0] = 0x6a09e667; + p->state[1] = 0xbb67ae85; + p->state[2] = 0x3c6ef372; + p->state[3] = 0xa54ff53a; + p->state[4] = 0x510e527f; + p->state[5] = 0x9b05688c; + p->state[6] = 0x1f83d9ab; + p->state[7] = 0x5be0cd19; +} + + + + + + + + +void Sha256_Init(CSha256 *p) +{ + p->v.vars.func_UpdateBlocks = + #ifdef Z7_COMPILER_SHA256_SUPPORTED + g_SHA256_FUNC_UPDATE_BLOCKS; + #else + NULL; + #endif + Sha256_InitState(p); +} + +#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x,22)) +#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x,25)) +#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3)) +#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >>10)) + +#define Ch(x,y,z) (z^(x&(y^z))) +#define Maj(x,y,z) ((x&y)|(z&(x|y))) + + +#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4)) + +#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) + +#ifdef Z7_SHA256_BIG_W + // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. + #define w(j, i) W[(size_t)(j) + i] + #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) +#else + #if STEP_MAIN == 16 + #define w(j, i) W[(i) & 15] + #else + #define w(j, i) W[((size_t)(j) + (i)) & 15] + #endif + #define blk2(j, i) (w(j, i) += blk2_main(j, i)) +#endif + +#define W_MAIN(i) blk2(j, i) + + +#define T1(wx, i) \ + tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + h = g; \ + g = f; \ + f = e; \ + e = d + tmp; \ + tmp += S0(a) + Maj(a, b, c); \ + d = c; \ + c = b; \ + b = a; \ + a = tmp; \ + +#define R1_PRE(i) T1( W_PRE, i) +#define R1_MAIN(i) T1( W_MAIN, i) + +#if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) +#define R2_MAIN(i) \ + R1_MAIN(i) \ + R1_MAIN(i + 1) \ + +#endif + + + +#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8 + +#define T4( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + tmp = h; \ + h += d; \ + d = tmp + S0(a) + Maj(a, b, c); \ + +#define R4( wx, i) \ + T4 ( a,b,c,d,e,f,g,h, wx, (i )); \ + T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \ + T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \ + T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \ + +#define R4_PRE(i) R4( W_PRE, i) +#define R4_MAIN(i) R4( W_MAIN, i) + + +#define T8( a,b,c,d,e,f,g,h, wx, i) \ + h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + +#define R8( wx, i) \ + T8 ( a,b,c,d,e,f,g,h, wx, i ); \ + T8 ( h,a,b,c,d,e,f,g, wx, i+1); \ + T8 ( g,h,a,b,c,d,e,f, wx, i+2); \ + T8 ( f,g,h,a,b,c,d,e, wx, i+3); \ + T8 ( e,f,g,h,a,b,c,d, wx, i+4); \ + T8 ( d,e,f,g,h,a,b,c, wx, i+5); \ + T8 ( c,d,e,f,g,h,a,b, wx, i+6); \ + T8 ( b,c,d,e,f,g,h,a, wx, i+7); \ + +#define R8_PRE(i) R8( W_PRE, i) +#define R8_MAIN(i) R8( W_MAIN, i) + +#endif + + +extern +MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64]; +MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + + + + + +#define K SHA256_K_ARRAY + +Z7_NO_INLINE +void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + UInt32 W +#ifdef Z7_SHA256_BIG_W + [64]; +#else + [16]; +#endif + unsigned j; + UInt32 a,b,c,d,e,f,g,h; +#if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) + UInt32 tmp; +#endif + + if (numBlocks == 0) return; + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + do + { + + for (j = 0; j < 16; j += STEP_PRE) + { + #if STEP_PRE > 4 + + #if STEP_PRE < 8 + R4_PRE(0); + #else + R8_PRE(0); + #if STEP_PRE == 16 + R8_PRE(8); + #endif + #endif + + #else + + R1_PRE(0) + #if STEP_PRE >= 2 + R1_PRE(1) + #if STEP_PRE >= 4 + R1_PRE(2) + R1_PRE(3) + #endif + #endif + + #endif + } + + for (j = 16; j < 64; j += STEP_MAIN) + { + #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8 + + #if STEP_MAIN < 8 + R4_MAIN(0) + #else + R8_MAIN(0) + #if STEP_MAIN == 16 + R8_MAIN(8) + #endif + #endif + + #else + + R1_MAIN(0) + #if STEP_MAIN >= 2 + R1_MAIN(1) + #if STEP_MAIN >= 4 + R2_MAIN(2) + #if STEP_MAIN >= 8 + R2_MAIN(4) + R2_MAIN(6) + #if STEP_MAIN >= 16 + R2_MAIN(8) + R2_MAIN(10) + R2_MAIN(12) + R2_MAIN(14) + #endif + #endif + #endif + #endif + #endif + } + + a += state[0]; state[0] = a; + b += state[1]; state[1] = b; + c += state[2]; state[2] = c; + d += state[3]; state[3] = d; + e += state[4]; state[4] = e; + f += state[5]; state[5] = f; + g += state[6]; state[6] = g; + h += state[7]; state[7] = h; + + data += SHA256_BLOCK_SIZE; + } + while (--numBlocks); +} + + +#define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1) + +void Sha256_Update(CSha256 *p, const Byte *data, size_t size) +{ + if (size == 0) + return; + { + const unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1); + const unsigned num = SHA256_BLOCK_SIZE - pos; + p->v.vars.count += size; + if (num > size) + { + memcpy(p->buffer + pos, data, size); + return; + } + if (pos != 0) + { + size -= num; + memcpy(p->buffer + pos, data, num); + data += num; + Sha256_UpdateBlock(p); + } + } + { + const size_t numBlocks = size >> 6; + // if (numBlocks) + SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks); + size &= SHA256_BLOCK_SIZE - 1; + if (size == 0) + return; + data += (numBlocks << 6); + memcpy(p->buffer, data, size); + } +} + + +void Sha256_Final(CSha256 *p, Byte *digest) +{ + unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1); + p->buffer[pos++] = 0x80; + if (pos > (SHA256_BLOCK_SIZE - 4 * 2)) + { + while (pos != SHA256_BLOCK_SIZE) { p->buffer[pos++] = 0; } + // memset(&p->buf.buffer[pos], 0, SHA256_BLOCK_SIZE - pos); + Sha256_UpdateBlock(p); + pos = 0; + } + memset(&p->buffer[pos], 0, (SHA256_BLOCK_SIZE - 4 * 2) - pos); + { + const UInt64 numBits = p->v.vars.count << 3; + SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 2, (UInt32)(numBits >> 32)) + SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 1, (UInt32)(numBits)) + } + Sha256_UpdateBlock(p); +#if 1 && defined(MY_CPU_BE) + memcpy(digest, p->state, SHA256_DIGEST_SIZE); +#else + { + unsigned i; + for (i = 0; i < 8; i += 2) + { + const UInt32 v0 = p->state[i]; + const UInt32 v1 = p->state[(size_t)i + 1]; + SetBe32(digest , v0) + SetBe32(digest + 4, v1) + digest += 4 * 2; + } + } + + + + +#endif + Sha256_InitState(p); +} + + +void Sha256Prepare(void) +{ +#ifdef Z7_COMPILER_SHA256_SUPPORTED + SHA256_FUNC_UPDATE_BLOCKS f, f_hw; + f = Sha256_UpdateBlocks; + f_hw = NULL; +#ifdef MY_CPU_X86_OR_AMD64 + if (CPU_IsSupported_SHA() + && CPU_IsSupported_SSSE3() + ) +#else + if (CPU_IsSupported_SHA2()) +#endif + { + // printf("\n========== HW SHA256 ======== \n"); + f = f_hw = Sha256_UpdateBlocks_HW; + } + g_SHA256_FUNC_UPDATE_BLOCKS = f; + g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw; +#endif +} + +#undef U64C +#undef K +#undef S0 +#undef S1 +#undef s0 +#undef s1 +#undef Ch +#undef Maj +#undef W_MAIN +#undef W_PRE +#undef w +#undef blk2_main +#undef blk2 +#undef T1 +#undef T4 +#undef T8 +#undef R1_PRE +#undef R1_MAIN +#undef R2_MAIN +#undef R4 +#undef R4_PRE +#undef R4_MAIN +#undef R8 +#undef R8_PRE +#undef R8_MAIN +#undef STEP_PRE +#undef STEP_MAIN +#undef Z7_SHA256_BIG_W +#undef Z7_SHA256_UNROLL +#undef Z7_COMPILER_SHA256_SUPPORTED diff --git a/crnlib/lzma/Sha256.h b/crnlib/lzma/Sha256.h new file mode 100644 index 00000000..17c35a1e --- /dev/null +++ b/crnlib/lzma/Sha256.h @@ -0,0 +1,86 @@ +/* Sha256.h -- SHA-256 Hash +: Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_SHA256_H +#define ZIP7_INC_SHA256_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define SHA256_NUM_BLOCK_WORDS 16 +#define SHA256_NUM_DIGEST_WORDS 8 + +#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4) +#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4) + + + + +typedef void (Z7_FASTCALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* + if (the system supports different SHA256 code implementations) + { + (CSha256::func_UpdateBlocks) will be used + (CSha256::func_UpdateBlocks) can be set by + Sha256_Init() - to default (fastest) + Sha256_SetFunction() - to any algo + } + else + { + (CSha256::func_UpdateBlocks) is ignored. + } +*/ + +typedef struct +{ + union + { + struct + { + SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks; + UInt64 count; + } vars; + UInt64 _pad_64bit[4]; + void *_pad_align_ptr[2]; + } v; + UInt32 state[SHA256_NUM_DIGEST_WORDS]; + + Byte buffer[SHA256_BLOCK_SIZE]; +} CSha256; + + +#define SHA256_ALGO_DEFAULT 0 +#define SHA256_ALGO_SW 1 +#define SHA256_ALGO_HW 2 + +/* +Sha256_SetFunction() +return: + 0 - (algo) value is not supported, and func_UpdateBlocks was not changed + 1 - func_UpdateBlocks was set according (algo) value. +*/ + +BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo); + +void Sha256_InitState(CSha256 *p); +void Sha256_Init(CSha256 *p); +void Sha256_Update(CSha256 *p, const Byte *data, size_t size); +void Sha256_Final(CSha256 *p, Byte *digest); + + + + +// void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); + +/* +call Sha256Prepare() once at program start. +It prepares all supported implementations, and detects the fastest implementation. +*/ + +void Sha256Prepare(void); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Sha256Opt.c b/crnlib/lzma/Sha256Opt.c new file mode 100644 index 00000000..291e71c1 --- /dev/null +++ b/crnlib/lzma/Sha256Opt.c @@ -0,0 +1,451 @@ +/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions +: Igor Pavlov : Public domain */ + +#include "Precomp.h" +#include "Compiler.h" +#include "CpuArch.h" + +// #define Z7_USE_HW_SHA_STUB // for debug +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check + #define USE_HW_SHA + #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) + #define USE_HW_SHA + #if !defined(__INTEL_COMPILER) + // icc defines __GNUC__, but icc doesn't support __attribute__(__target__) + #if !defined(__SHA__) || !defined(__SSSE3__) + #define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) + #endif + #endif + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1900) + #define USE_HW_SHA + #else + #define Z7_USE_HW_SHA_STUB + #endif + #endif +// #endif // MY_CPU_X86_OR_AMD64 +#ifndef USE_HW_SHA + // #define Z7_USE_HW_SHA_STUB // for debug +#endif + +#ifdef USE_HW_SHA + +// #pragma message("Sha256 HW") + + + + +// sse/sse2/ssse3: +#include +// sha*: +#include + +#if defined (__clang__) && defined(_MSC_VER) + #if !defined(__SHA__) + #include + #endif +#else + +#endif + +/* +SHA256 uses: +SSE2: + _mm_loadu_si128 + _mm_storeu_si128 + _mm_set_epi32 + _mm_add_epi32 + _mm_shuffle_epi32 / pshufd + + + +SSSE3: + _mm_shuffle_epi8 / pshufb + _mm_alignr_epi8 +SHA: + _mm_sha256* +*/ + +// K array must be aligned for 16-bytes at least. +// The compiler can look align attribute and selects +// movdqu - for code without align attribute +// movdqa - for code with align attribute +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; +#define K SHA256_K_ARRAY + + +#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); +#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); +#define SHA256_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); + +#define LOAD_SHUFFLE(m, k) \ + m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \ + m = _mm_shuffle_epi8(m, mask); \ + +#define NNN(m0, m1, m2, m3) + +#define SM1(m1, m2, m3, m0) \ + SHA256_MSG1(m0, m1); \ + +#define SM2(m2, m3, m0, m1) \ + ADD_EPI32(m0, _mm_alignr_epi8(m3, m2, 4)) \ + SHA256_MSG2(m0, m3); \ + +#define RND2(t0, t1) \ + t0 = _mm_sha256rnds2_epu32(t0, t1, msg); + + + +#define R4(k, m0, m1, m2, m3, OP0, OP1) \ + msg = _mm_add_epi32(m0, *(const __m128i *) (const void *) &K[(k) * 4]); \ + RND2(state0, state1); \ + msg = _mm_shuffle_epi32(msg, 0x0E); \ + OP0(m0, m1, m2, m3) \ + RND2(state1, state0); \ + OP1(m0, m1, m2, m3) \ + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \ + +#define PREPARE_STATE \ + tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ + state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \ + state1 = state0; \ + state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \ + state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ + + +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); + + + __m128i tmp, state0, state1; + + if (numBlocks == 0) + return; + + state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); + state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]); + + PREPARE_STATE + + do + { + __m128i state0_save, state1_save; + __m128i m0, m1, m2, m3; + __m128i msg; + // #define msg tmp + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) + + ADD_EPI32(state0, state0_save) + ADD_EPI32(state1, state1_save) + + data += 64; + } + while (--numBlocks); + + PREPARE_STATE + + _mm_storeu_si128((__m128i *) (void *) &state[0], state0); + _mm_storeu_si128((__m128i *) (void *) &state[4], state1); +} + +#endif // USE_HW_SHA + +#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) + + #if defined(__ARM_FEATURE_SHA2) \ + || defined(__ARM_FEATURE_CRYPTO) + #define USE_HW_SHA + #else + #if defined(MY_CPU_ARM64) \ + || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \ + || defined(Z7_MSC_VER_ORIGINAL) + #if defined(__ARM_FP) && \ + ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) \ + ) \ + || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) && \ + (Z7_CLANG_VERSION < 170000 || \ + Z7_CLANG_VERSION > 170001) + #define USE_HW_SHA + #endif + #endif + #endif + #endif + +#ifdef USE_HW_SHA + +// #pragma message("=== Sha256 HW === ") + + +#if defined(__clang__) || defined(__GNUC__) +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) + #ifdef MY_CPU_ARM64 +#if defined(__clang__) + #define ATTRIB_SHA __attribute__((__target__("crypto"))) +#else + #define ATTRIB_SHA __attribute__((__target__("+crypto"))) +#endif + #else +#if defined(__clang__) && (__clang_major__ >= 1) + #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2"))) +#else + #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) +#endif + #endif +#endif +#else + // _MSC_VER + // for arm32 + #define _ARM_USE_NEW_NEON_INTRINSICS +#endif + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) +#include +#else + +#if defined(__clang__) && __clang_major__ < 16 +#if !defined(__ARM_FEATURE_SHA2) && \ + !defined(__ARM_FEATURE_CRYPTO) +// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ") + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1 +// #if defined(__clang__) && __clang_major__ < 13 + #define __ARM_FEATURE_CRYPTO 1 +// #else + #define __ARM_FEATURE_SHA2 1 +// #endif + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif // clang + +#if defined(__clang__) + +#if defined(__ARM_ARCH) && __ARM_ARCH < 8 + Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #pragma message("#define __ARM_ARCH 8") + #undef __ARM_ARCH + #define __ARM_ARCH 8 + Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // clang + +#include + +#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \ + defined(__ARM_FEATURE_CRYPTO) && \ + defined(__ARM_FEATURE_SHA2) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #undef __ARM_FEATURE_CRYPTO + #undef __ARM_FEATURE_SHA2 + #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ") +#endif + +#endif // Z7_MSC_VER_ORIGINAL + +typedef uint32x4_t v128; +// typedef __n128 v128; // MSVC + +#ifdef MY_CPU_BE + #define MY_rev32_for_LE(x) x +#else + #define MY_rev32_for_LE(x) vrev32q_u8(x) +#endif + +#if 1 // 0 for debug +// for arm32: it works slower by some reason than direct code +/* +for arm32 it generates: +MSVC-2022, GCC-9: + vld1.32 {d18,d19}, [r10] + vst1.32 {d4,d5}, [r3] + vld1.8 {d20-d21}, [r4] +there is no align hint (like [r10:128]). So instruction allows unaligned access +*/ +#define LOAD_128_32(_p) vld1q_u32(_p) +#define LOAD_128_8(_p) vld1q_u8 (_p) +#define STORE_128_32(_p, _v) vst1q_u32(_p, _v) +#else +/* +for arm32: +MSVC-2022: + vldm r10,{d18,d19} + vstm r3,{d4,d5} + does it require strict alignment? +GCC-9: + vld1.64 {d30-d31}, [r0:64] + vldr d28, [r0, #16] + vldr d29, [r0, #24] + vst1.64 {d30-d31}, [r0:64] + vstr d28, [r0, #16] + vstr d29, [r0, #24] +there is hint [r0:64], so does it requires 64-bit alignment. +*/ +#define LOAD_128_32(_p) (*(const v128 *)(const void *)(_p)) +#define LOAD_128_8(_p) vreinterpretq_u8_u32(*(const v128 *)(const void *)(_p)) +#define STORE_128_32(_p, _v) *(v128 *)(void *)(_p) = (_v) +#endif + +#define LOAD_SHUFFLE(m, k) \ + m = vreinterpretq_u32_u8( \ + MY_rev32_for_LE( \ + LOAD_128_8(data + (k) * 16))); \ + +// K array must be aligned for 16-bytes at least. +extern +MY_ALIGN(64) +const UInt32 SHA256_K_ARRAY[64]; +#define K SHA256_K_ARRAY + +#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src); +#define SHA256_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3); + +#define SM1(m0, m1, m2, m3) SHA256_SU0(m3, m0) +#define SM2(m0, m1, m2, m3) SHA256_SU1(m2, m0, m1) +#define NNN(m0, m1, m2, m3) + +#define R4(k, m0, m1, m2, m3, OP0, OP1) \ + msg = vaddq_u32(m0, *(const v128 *) (const void *) &K[(k) * 4]); \ + tmp = state0; \ + state0 = vsha256hq_u32( state0, state1, msg ); \ + state1 = vsha256h2q_u32( state1, tmp, msg ); \ + OP0(m0, m1, m2, m3); \ + OP1(m0, m1, m2, m3); \ + + +#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ + R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ + R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ + R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ + R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ + + +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +#ifdef ATTRIB_SHA +ATTRIB_SHA +#endif +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + v128 state0, state1; + + if (numBlocks == 0) + return; + + state0 = LOAD_128_32(&state[0]); + state1 = LOAD_128_32(&state[4]); + + do + { + v128 state0_save, state1_save; + v128 m0, m1, m2, m3; + v128 msg, tmp; + + state0_save = state0; + state1_save = state1; + + LOAD_SHUFFLE (m0, 0) + LOAD_SHUFFLE (m1, 1) + LOAD_SHUFFLE (m2, 2) + LOAD_SHUFFLE (m3, 3) + + R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ) + R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ) + R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ) + + state0 = vaddq_u32(state0, state0_save); + state1 = vaddq_u32(state1, state1_save); + + data += 64; + } + while (--numBlocks); + + STORE_128_32(&state[0], state0); + STORE_128_32(&state[4], state1); +} + +#endif // USE_HW_SHA + +#endif // MY_CPU_ARM_OR_ARM64 + + +#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB) +// #error Stop_Compiling_UNSUPPORTED_SHA +// #include +// We can compile this file with another C compiler, +// or we can compile asm version. +// So we can generate real code instead of this stub function. +// #include "Sha256.h" +// #if defined(_MSC_VER) +#pragma message("Sha256 HW-SW stub was used") +// #endif +void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); +void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) +{ + Sha256_UpdateBlocks(state, data, numBlocks); + /* + UNUSED_VAR(state); + UNUSED_VAR(data); + UNUSED_VAR(numBlocks); + exit(1); + return; + */ +} +#endif + + +#undef K +#undef RND2 +#undef MY_rev32_for_LE + +#undef NNN +#undef LOAD_128 +#undef STORE_128 +#undef LOAD_SHUFFLE +#undef SM1 +#undef SM2 + + +#undef R4 +#undef R16 +#undef PREPARE_STATE +#undef USE_HW_SHA +#undef ATTRIB_SHA +#undef USE_VER_MIN +#undef Z7_USE_HW_SHA_STUB diff --git a/crnlib/lzma/Sort.c b/crnlib/lzma/Sort.c new file mode 100644 index 00000000..73dcbf05 --- /dev/null +++ b/crnlib/lzma/Sort.c @@ -0,0 +1,141 @@ +/* Sort.c -- Sort functions +2014-04-05 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Sort.h" + +#define HeapSortDown(p, k, size, temp) \ + { for (;;) { \ + size_t s = (k << 1); \ + if (s > size) break; \ + if (s < size && p[s + 1] > p[s]) s++; \ + if (temp >= p[s]) break; \ + p[k] = p[s]; k = s; \ + } p[k] = temp; } + +void HeapSort(UInt32 *p, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + UInt32 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp) + } + while (--i != 0); + } + /* + do + { + size_t k = 1; + UInt32 temp = p[size]; + p[size--] = p[1]; + HeapSortDown(p, k, size, temp) + } + while (size > 1); + */ + while (size > 3) + { + UInt32 temp = p[size]; + size_t k = (p[3] > p[2]) ? 3 : 2; + p[size--] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp) + } + { + UInt32 temp = p[size]; + p[size] = p[1]; + if (size > 2 && p[2] < temp) + { + p[1] = p[2]; + p[2] = temp; + } + else + p[1] = temp; + } +} + +void HeapSort64(UInt64 *p, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + UInt64 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp) + } + while (--i != 0); + } + /* + do + { + size_t k = 1; + UInt64 temp = p[size]; + p[size--] = p[1]; + HeapSortDown(p, k, size, temp) + } + while (size > 1); + */ + while (size > 3) + { + UInt64 temp = p[size]; + size_t k = (p[3] > p[2]) ? 3 : 2; + p[size--] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp) + } + { + UInt64 temp = p[size]; + p[size] = p[1]; + if (size > 2 && p[2] < temp) + { + p[1] = p[2]; + p[2] = temp; + } + else + p[1] = temp; + } +} + +/* +#define HeapSortRefDown(p, vals, n, size, temp) \ + { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ + size_t s = (k << 1); \ + if (s > size) break; \ + if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ + if (val >= vals[p[s]]) break; \ + p[k] = p[s]; k = s; \ + } p[k] = temp; } + +void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + UInt32 temp = p[i]; + HeapSortRefDown(p, vals, i, size, temp); + } + while (--i != 0); + } + do + { + UInt32 temp = p[size]; + p[size--] = p[1]; + HeapSortRefDown(p, vals, 1, size, temp); + } + while (size > 1); +} +*/ diff --git a/crnlib/lzma/Sort.h b/crnlib/lzma/Sort.h new file mode 100644 index 00000000..1bb2b1e7 --- /dev/null +++ b/crnlib/lzma/Sort.h @@ -0,0 +1,18 @@ +/* Sort.h -- Sort functions +2023-03-05 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_SORT_H +#define ZIP7_INC_SORT_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void HeapSort(UInt32 *p, size_t size); +void HeapSort64(UInt64 *p, size_t size); + +/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */ + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/SwapBytes.c b/crnlib/lzma/SwapBytes.c new file mode 100644 index 00000000..d382c327 --- /dev/null +++ b/crnlib/lzma/SwapBytes.c @@ -0,0 +1,835 @@ +/* SwapBytes.c -- Byte Swap conversion filter +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Compiler.h" +#include "CpuArch.h" +#include "RotateDefs.h" +#include "SwapBytes.h" + +typedef UInt16 CSwapUInt16; +typedef UInt32 CSwapUInt32; + +// #define k_SwapBytes_Mode_BASE 0 + +#ifdef MY_CPU_X86_OR_AMD64 + +#define k_SwapBytes_Mode_SSE2 1 +#define k_SwapBytes_Mode_SSSE3 2 +#define k_SwapBytes_Mode_AVX2 3 + + // #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2 + #define SWAP_ATTRIB_SSE2 __attribute__((__target__("sse2"))) + #define SWAP_ATTRIB_SSSE3 __attribute__((__target__("ssse3"))) + #define SWAP_ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER == 1900) + #pragma warning(disable : 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX + #endif + #if (_MSC_VER >= 1900) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2 + #elif (_MSC_VER >= 1500) // (VS2008) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSSE3 + #elif (_MSC_VER >= 1310) // (VS2003) + #define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSE2 + #endif + #endif // _MSC_VER + +/* +// for debug +#ifdef k_SwapBytes_Mode_MAX +#undef k_SwapBytes_Mode_MAX +#endif +*/ + +#ifndef k_SwapBytes_Mode_MAX +#define k_SwapBytes_Mode_MAX 0 +#endif + +#if (k_SwapBytes_Mode_MAX != 0) && defined(MY_CPU_AMD64) + #define k_SwapBytes_Mode_MIN k_SwapBytes_Mode_SSE2 +#else + #define k_SwapBytes_Mode_MIN 0 +#endif + +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_AVX2) + #define USE_SWAP_AVX2 +#endif +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSSE3) + #define USE_SWAP_SSSE3 +#endif +#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSE2) + #define USE_SWAP_128 +#endif + +#if k_SwapBytes_Mode_MAX <= k_SwapBytes_Mode_MIN || !defined(USE_SWAP_128) +#define FORCE_SWAP_MODE +#endif + + +#ifdef USE_SWAP_128 +/* + MMX + SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2 + SSE4A + AES + AVX, AVX2, FMA +*/ + +#include // sse2 +// typedef __m128i v128; + +#define SWAP2_128(i) { \ + const __m128i v = *(const __m128i *)(const void *)(items + (i) * 8); \ + *( __m128i *)( void *)(items + (i) * 8) = \ + _mm_or_si128( \ + _mm_slli_epi16(v, 8), \ + _mm_srli_epi16(v, 8)); } +// _mm_or_si128() has more ports to execute than _mm_add_epi16(). + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + } + while (items != lim); +} + +/* +// sse2 +#define SWAP4_128_pack(i) { \ + __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \ + __m128i v0 = _mm_unpacklo_epi8(v, mask); \ + __m128i v1 = _mm_unpackhi_epi8(v, mask); \ + v0 = _mm_shufflelo_epi16(v0, 0x1b); \ + v1 = _mm_shufflelo_epi16(v1, 0x1b); \ + v0 = _mm_shufflehi_epi16(v0, 0x1b); \ + v1 = _mm_shufflehi_epi16(v1, 0x1b); \ + *(__m128i *)(void *)(items + (i) * 4) = _mm_packus_epi16(v0, v1); } + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes4_128_pack(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + const __m128i mask = _mm_setzero_si128(); + // const __m128i mask = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_128_pack(0); items += 1 * 4; + // SWAP4_128_pack(0); SWAP4_128_pack(1); items += 2 * 4; + } + while (items != lim); +} + +// sse2 +#define SWAP4_128_shift(i) { \ + __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \ + __m128i v2; \ + v2 = _mm_or_si128( \ + _mm_slli_si128(_mm_and_si128(v, mask), 1), \ + _mm_and_si128(_mm_srli_si128(v, 1), mask)); \ + v = _mm_or_si128( \ + _mm_slli_epi32(v, 24), \ + _mm_srli_epi32(v, 24)); \ + *(__m128i *)(void *)(items + (i) * 4) = _mm_or_si128(v2, v); } + +static +#ifdef SWAP_ATTRIB_SSE2 +SWAP_ATTRIB_SSE2 +#endif +void +Z7_FASTCALL +SwapBytes4_128_shift(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + #define M1 0xff00 + const __m128i mask = _mm_set_epi32(M1, M1, M1, M1); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4; + // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4; + SWAP4_128_shift(0); items += 1 * 4; + } + while (items != lim); +} +*/ + + +#if defined(USE_SWAP_SSSE3) || defined(USE_SWAP_AVX2) + +#define SWAP_SHUF_REV_SEQ_2_VALS(v) (v)+1, (v) +#define SWAP_SHUF_REV_SEQ_4_VALS(v) (v)+3, (v)+2, (v)+1, (v) + +#define SWAP2_SHUF_MASK_16_BYTES \ + SWAP_SHUF_REV_SEQ_2_VALS (0 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (1 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (2 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (3 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (4 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (5 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (6 * 2), \ + SWAP_SHUF_REV_SEQ_2_VALS (7 * 2) + +#define SWAP4_SHUF_MASK_16_BYTES \ + SWAP_SHUF_REV_SEQ_4_VALS (0 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (1 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (2 * 4), \ + SWAP_SHUF_REV_SEQ_4_VALS (3 * 4) + +#if defined(USE_SWAP_AVX2) +/* if we use 256_BIT_INIT_MASK, each static array mask will be larger for 16 bytes */ +// #define SWAP_USE_256_BIT_INIT_MASK +#endif + +#if defined(SWAP_USE_256_BIT_INIT_MASK) && defined(USE_SWAP_AVX2) +#define SWAP_MASK_INIT_SIZE 32 +#else +#define SWAP_MASK_INIT_SIZE 16 +#endif + +MY_ALIGN(SWAP_MASK_INIT_SIZE) +static const Byte k_ShufMask_Swap2[] = +{ + SWAP2_SHUF_MASK_16_BYTES + #if SWAP_MASK_INIT_SIZE > 16 + , SWAP2_SHUF_MASK_16_BYTES + #endif +}; + +MY_ALIGN(SWAP_MASK_INIT_SIZE) +static const Byte k_ShufMask_Swap4[] = +{ + SWAP4_SHUF_MASK_16_BYTES + #if SWAP_MASK_INIT_SIZE > 16 + , SWAP4_SHUF_MASK_16_BYTES + #endif +}; + + +#ifdef USE_SWAP_SSSE3 + +#include // ssse3 + +#define SHUF_128(i) *(items + (i)) = \ + _mm_shuffle_epi8(*(items + (i)), mask); // SSSE3 + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_SSSE3 +SWAP_ATTRIB_SSSE3 +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +ShufBytes_128(void *items8, const void *lim8, const void *mask128_ptr) +{ + __m128i *items = (__m128i *)items8; + const __m128i *lim = (const __m128i *)lim8; + // const __m128i mask = _mm_set_epi8(SHUF_SWAP2_MASK_16_VALS); + // const __m128i mask = _mm_set_epi8(SHUF_SWAP4_MASK_16_VALS); + // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + // const __m128i mask = *(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]); + const __m128i mask = *(const __m128i *)mask128_ptr; + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SHUF_128(0) SHUF_128(1) items += 2; + SHUF_128(0) SHUF_128(1) items += 2; + } + while (items != lim); +} + +#endif // USE_SWAP_SSSE3 + + + +#ifdef USE_SWAP_AVX2 + +#include // avx, avx2 +#if defined(__clang__) +#include +#include +#endif + +#define SHUF_256(i) *(items + (i)) = \ + _mm256_shuffle_epi8(*(items + (i)), mask); // AVX2 + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_AVX2 +SWAP_ATTRIB_AVX2 +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr) +{ + __m256i *items = (__m256i *)items8; + const __m256i *lim = (const __m256i *)lim8; + /* + UNUSED_VAR(mask128_ptr) + __m256i mask = + for Swap4: _mm256_setr_epi8(SWAP4_SHUF_MASK_16_BYTES, SWAP4_SHUF_MASK_16_BYTES); + for Swap2: _mm256_setr_epi8(SWAP2_SHUF_MASK_16_BYTES, SWAP2_SHUF_MASK_16_BYTES); + */ + const __m256i mask = + #if SWAP_MASK_INIT_SIZE > 16 + *(const __m256i *)(const void *)mask128_ptr; + #else + /* msvc: broadcastsi128() version reserves the stack for no reason + msvc 19.29-: _mm256_insertf128_si256() / _mm256_set_m128i)) versions use non-avx movdqu xmm0,XMMWORD PTR [r8] + msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want + */ + // _mm256_broadcastsi128_si256(*mask128_ptr); +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000) + #define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1) +#else + #define MY_mm256_set_m128i _mm256_set_m128i +#endif + MY_mm256_set_m128i( + *(const __m128i *)mask128_ptr, + *(const __m128i *)mask128_ptr); + #endif + + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SHUF_256(0) SHUF_256(1) items += 2; + SHUF_256(0) SHUF_256(1) items += 2; + } + while (items != lim); +} + +#endif // USE_SWAP_AVX2 +#endif // USE_SWAP_SSSE3 || USE_SWAP_AVX2 +#endif // USE_SWAP_128 + + + +// compile message "NEON intrinsics not available with the soft-float ABI" +#elif defined(MY_CPU_ARM_OR_ARM64) \ + && defined(MY_CPU_LE) \ + && !defined(Z7_DISABLE_ARM_NEON) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 6) + #if defined(__ARM_FP) + #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \ + || defined(MY_CPU_ARM64) + #if defined(MY_CPU_ARM64) \ + || !defined(Z7_CLANG_VERSION) \ + || defined(__ARM_NEON) + #define USE_SWAP_128 + #ifdef MY_CPU_ARM64 + // #define SWAP_ATTRIB_NEON __attribute__((__target__(""))) + #else +#if defined(Z7_CLANG_VERSION) + // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon"))) +#else + // #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))") + #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon"))) +#endif + #endif // MY_CPU_ARM64 + #endif // __ARM_NEON + #endif // __ARM_ARCH + #endif // __ARM_FP + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SWAP_128 + #endif + #endif + + #ifdef USE_SWAP_128 + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) + #include + #else + +/* +#if !defined(__ARM_NEON) +#if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \ + || defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#pragma message("#define __ARM_NEON 1") +// #define __ARM_NEON 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif +*/ + #include + #endif + #endif + +#ifndef USE_SWAP_128 + #define FORCE_SWAP_MODE +#else + +#ifdef MY_CPU_ARM64 + // for debug : comment it + #define FORCE_SWAP_MODE +#else + #define k_SwapBytes_Mode_NEON 1 +#endif +// typedef uint8x16_t v128; +#define SWAP2_128(i) *(uint8x16_t *) (void *)(items + (i) * 8) = \ + vrev16q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 8)); +#define SWAP4_128(i) *(uint8x16_t *) (void *)(items + (i) * 4) = \ + vrev32q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 4)); + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_NEON +SWAP_ATTRIB_NEON +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + SWAP2_128(0) SWAP2_128(1) items += 2 * 8; + } + while (items != lim); +} + +// Z7_NO_INLINE +static +#ifdef SWAP_ATTRIB_NEON +SWAP_ATTRIB_NEON +#endif +Z7_ATTRIB_NO_VECTORIZE +void +Z7_FASTCALL +SwapBytes4_128(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_128(0) SWAP4_128(1) items += 2 * 4; + SWAP4_128(0) SWAP4_128(1) items += 2 * 4; + } + while (items != lim); +} + +#endif // USE_SWAP_128 + +#else // MY_CPU_ARM_OR_ARM64 +#define FORCE_SWAP_MODE +#endif // MY_CPU_ARM_OR_ARM64 + + + + + + +#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_X86) + /* _byteswap_ushort() in MSVC x86 32-bit works via slow { mov dh, al; mov dl, ah } + So we use own versions of byteswap function */ + #if (_MSC_VER < 1400 ) // old MSVC-X86 without _rotr16() support + #define SWAP2_16(i) { UInt32 v = items[i]; v += (v << 16); v >>= 8; items[i] = (CSwapUInt16)v; } + #else // is new MSVC-X86 with fast _rotr16() + #include + #define SWAP2_16(i) { items[i] = _rotr16(items[i], 8); } + #endif +#else // is not MSVC-X86 + #define SWAP2_16(i) { CSwapUInt16 v = items[i]; items[i] = Z7_BSWAP16(v); } +#endif // MSVC-X86 + +#if defined(Z7_CPU_FAST_BSWAP_SUPPORTED) + #define SWAP4_32(i) { CSwapUInt32 v = items[i]; items[i] = Z7_BSWAP32(v); } +#else + #define SWAP4_32(i) \ + { UInt32 v = items[i]; \ + v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); \ + v = rotlFixed(v, 16); \ + items[i] = v; } +#endif + + + + +#if defined(FORCE_SWAP_MODE) && defined(USE_SWAP_128) + #define DEFAULT_Swap2 SwapBytes2_128 + #if !defined(MY_CPU_X86_OR_AMD64) + #define DEFAULT_Swap4 SwapBytes4_128 + #endif +#endif + +#if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4) + +#define SWAP_BASE_FUNCS_PREFIXES \ +Z7_FORCE_INLINE \ +static \ +Z7_ATTRIB_NO_VECTOR \ +void Z7_FASTCALL + + +#if defined(MY_CPU_ARM_OR_ARM64) +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wlanguage-extension-token" +#endif +#endif + + +#ifdef MY_CPU_64BIT + +#if defined(MY_CPU_ARM64) \ + && defined(__ARM_ARCH) && (__ARM_ARCH >= 8) \ + && ( (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4))) + + #define SWAP2_64_VAR(v) asm ("rev16 %x0,%x0" : "+r" (v)); + #define SWAP4_64_VAR(v) asm ("rev32 %x0,%x0" : "+r" (v)); + +#else // is not ARM64-GNU + +#if !defined(MY_CPU_X86_OR_AMD64) || (k_SwapBytes_Mode_MIN == 0) || !defined(USE_SWAP_128) + #define SWAP2_64_VAR(v) \ + v = ( 0x00ff00ff00ff00ff & (v >> 8)) \ + + ((0x00ff00ff00ff00ff & v) << 8); + /* plus gives faster code in MSVC */ +#endif + +#ifdef Z7_CPU_FAST_BSWAP_SUPPORTED + #define SWAP4_64_VAR(v) \ + v = Z7_BSWAP64(v); \ + v = Z7_ROTL64(v, 32); +#else + #define SWAP4_64_VAR(v) \ + v = ( 0x000000ff000000ff & (v >> 24)) \ + + ((0x000000ff000000ff & v) << 24 ) \ + + ( 0x0000ff000000ff00 & (v >> 8)) \ + + ((0x0000ff000000ff00 & v) << 8 ) \ + ; +#endif + +#endif // ARM64-GNU + + +#ifdef SWAP2_64_VAR + +#define SWAP2_64(i) { \ + UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 4); \ + SWAP2_64_VAR(v) \ + *(UInt64 *)(void *)(items + (i) * 4) = v; } + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes2_64(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_64(0) SWAP2_64(1) items += 2 * 4; + SWAP2_64(0) SWAP2_64(1) items += 2 * 4; + } + while (items != lim); +} + + #define DEFAULT_Swap2 SwapBytes2_64 + #if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 0 + #endif +#else // !defined(SWAP2_64_VAR) + #define DEFAULT_Swap2 SwapBytes2_128 + #if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 1 + #endif +#endif // SWAP2_64_VAR + + +#define SWAP4_64(i) { \ + UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 2); \ + SWAP4_64_VAR(v) \ + *(UInt64 *)(void *)(items + (i) * 2) = v; } + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes4_64(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_64(0) SWAP4_64(1) items += 2 * 2; + SWAP4_64(0) SWAP4_64(1) items += 2 * 2; + } + while (items != lim); +} + +#define DEFAULT_Swap4 SwapBytes4_64 + +#else // is not 64BIT + + +#if defined(MY_CPU_ARM_OR_ARM64) \ + && defined(__ARM_ARCH) && (__ARM_ARCH >= 6) \ + && ( (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4))) + +#ifdef MY_CPU_64BIT + #define SWAP2_32_VAR(v) asm ("rev16 %w0,%w0" : "+r" (v)); +#else + #define SWAP2_32_VAR(v) asm ("rev16 %0,%0" : "+r" (v)); // for clang/gcc + // asm ("rev16 %r0,%r0" : "+r" (a)); // for gcc +#endif + +#elif defined(_MSC_VER) && (_MSC_VER < 1300) && defined(MY_CPU_X86) \ + || !defined(Z7_CPU_FAST_BSWAP_SUPPORTED) \ + || !defined(Z7_CPU_FAST_ROTATE_SUPPORTED) + // old msvc doesn't support _byteswap_ulong() + #define SWAP2_32_VAR(v) \ + v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); + +#else // is not ARM and is not old-MSVC-X86 and fast BSWAP/ROTATE are supported + #define SWAP2_32_VAR(v) \ + v = Z7_BSWAP32(v); \ + v = rotlFixed(v, 16); + +#endif // GNU-ARM* + +#define SWAP2_32(i) { \ + UInt32 v = *(const UInt32 *)(const void *)(items + (i) * 2); \ + SWAP2_32_VAR(v); \ + *(UInt32 *)(void *)(items + (i) * 2) = v; } + + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes2_32(CSwapUInt16 *items, const CSwapUInt16 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP2_32(0) SWAP2_32(1) items += 2 * 2; + SWAP2_32(0) SWAP2_32(1) items += 2 * 2; + } + while (items != lim); +} + + +SWAP_BASE_FUNCS_PREFIXES +SwapBytes4_32(CSwapUInt32 *items, const CSwapUInt32 *lim) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + do + { + SWAP4_32(0) SWAP4_32(1) items += 2; + SWAP4_32(0) SWAP4_32(1) items += 2; + } + while (items != lim); +} + +#define DEFAULT_Swap2 SwapBytes2_32 +#define DEFAULT_Swap4 SwapBytes4_32 +#if !defined(FORCE_SWAP_MODE) + #define SWAP2_DEFAULT_MODE 0 +#endif + +#endif // MY_CPU_64BIT +#endif // if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4) + + + +#if !defined(FORCE_SWAP_MODE) +static unsigned g_SwapBytes_Mode; +#endif + +/* size of largest unrolled loop iteration: 128 bytes = 4 * 32 bytes (AVX). */ +#define SWAP_ITERATION_BLOCK_SIZE_MAX (1 << 7) + +// 32 bytes for (AVX) or 2 * 16-bytes for NEON. +#define SWAP_VECTOR_ALIGN_SIZE (1 << 5) + +Z7_NO_INLINE +void z7_SwapBytes2(CSwapUInt16 *items, size_t numItems) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--) + { + SWAP2_16(0) + items++; + } + { + const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt16) - 1; + size_t numItems2 = numItems; + CSwapUInt16 *lim; + numItems &= k_Align_Mask; + numItems2 &= ~(size_t)k_Align_Mask; + lim = items + numItems2; + if (numItems2 != 0) + { + #if !defined(FORCE_SWAP_MODE) + #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_SWAP_AVX2 + if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3) + ShufBytes_256((__m256i *)(void *)items, + (const __m256i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap2[0])); + else + #endif + #ifdef USE_SWAP_SSSE3 + if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3) + ShufBytes_128((__m128i *)(void *)items, + (const __m128i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap2[0])); + else + #endif + #endif // MY_CPU_X86_OR_AMD64 + #if SWAP2_DEFAULT_MODE == 0 + if (g_SwapBytes_Mode != 0) + SwapBytes2_128(items, lim); + else + #endif + #endif // FORCE_SWAP_MODE + DEFAULT_Swap2(items, lim); + } + items = lim; + } + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0; numItems--) + { + SWAP2_16(0) + items++; + } +} + + +Z7_NO_INLINE +void z7_SwapBytes4(CSwapUInt32 *items, size_t numItems) +{ + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--) + { + SWAP4_32(0) + items++; + } + { + const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt32) - 1; + size_t numItems2 = numItems; + CSwapUInt32 *lim; + numItems &= k_Align_Mask; + numItems2 &= ~(size_t)k_Align_Mask; + lim = items + numItems2; + if (numItems2 != 0) + { + #if !defined(FORCE_SWAP_MODE) + #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_SWAP_AVX2 + if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3) + ShufBytes_256((__m256i *)(void *)items, + (const __m256i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + else + #endif + #ifdef USE_SWAP_SSSE3 + if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3) + ShufBytes_128((__m128i *)(void *)items, + (const __m128i *)(const void *)lim, + (const __m128i *)(const void *)&(k_ShufMask_Swap4[0])); + else + #endif + #else // MY_CPU_X86_OR_AMD64 + + if (g_SwapBytes_Mode != 0) + SwapBytes4_128(items, lim); + else + #endif // MY_CPU_X86_OR_AMD64 + #endif // FORCE_SWAP_MODE + DEFAULT_Swap4(items, lim); + } + items = lim; + } + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0; numItems--) + { + SWAP4_32(0) + items++; + } +} + + +// #define SHOW_HW_STATUS + +#ifdef SHOW_HW_STATUS +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +void z7_SwapBytesPrepare(void) +{ +#ifndef FORCE_SWAP_MODE + unsigned mode = 0; // k_SwapBytes_Mode_BASE; + +#ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== SwapBytes NEON") + PRF(printf("\n=== SwapBytes NEON\n");) + mode = k_SwapBytes_Mode_NEON; + } + } +#else // MY_CPU_ARM_OR_ARM64 + { + #ifdef USE_SWAP_AVX2 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== SwapBytes AVX2") + PRF(printf("\n=== SwapBytes AVX2\n");) + mode = k_SwapBytes_Mode_AVX2; + } + else + #endif + #ifdef USE_SWAP_SSSE3 + if (CPU_IsSupported_SSSE3()) + { + // #pragma message ("=== SwapBytes SSSE3") + PRF(printf("\n=== SwapBytes SSSE3\n");) + mode = k_SwapBytes_Mode_SSSE3; + } + else + #endif + #if !defined(MY_CPU_AMD64) + if (CPU_IsSupported_SSE2()) + #endif + { + // #pragma message ("=== SwapBytes SSE2") + PRF(printf("\n=== SwapBytes SSE2\n");) + mode = k_SwapBytes_Mode_SSE2; + } + } +#endif // MY_CPU_ARM_OR_ARM64 + g_SwapBytes_Mode = mode; + // g_SwapBytes_Mode = 0; // for debug +#endif // FORCE_SWAP_MODE + PRF(printf("\n=== SwapBytesPrepare\n");) +} + +#undef PRF diff --git a/crnlib/lzma/SwapBytes.h b/crnlib/lzma/SwapBytes.h new file mode 100644 index 00000000..593a3bd6 --- /dev/null +++ b/crnlib/lzma/SwapBytes.h @@ -0,0 +1,17 @@ +/* SwapBytes.h -- Byte Swap conversion filter +2023-04-02 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_SWAP_BYTES_H +#define ZIP7_INC_SWAP_BYTES_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void z7_SwapBytes2(UInt16 *data, size_t numItems); +void z7_SwapBytes4(UInt32 *data, size_t numItems); +void z7_SwapBytesPrepare(void); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Threads.c b/crnlib/lzma/Threads.c new file mode 100644 index 00000000..a04c0058 --- /dev/null +++ b/crnlib/lzma/Threads.c @@ -0,0 +1,581 @@ +/* Threads.c -- multithreading library +2024-03-28 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#ifdef _WIN32 + +#ifndef USE_THREADS_CreateThread +#include +#endif + +#include "Threads.h" + +static WRes GetError(void) +{ + const DWORD res = GetLastError(); + return res ? (WRes)res : 1; +} + +static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); } +static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } + +WRes HandlePtr_Close(HANDLE *p) +{ + if (*p != NULL) + { + if (!CloseHandle(*p)) + return GetError(); + *p = NULL; + } + return 0; +} + +WRes Handle_WaitObject(HANDLE h) +{ + DWORD dw = WaitForSingleObject(h, INFINITE); + /* + (dw) result: + WAIT_OBJECT_0 // 0 + WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space + WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space + WAIT_FAILED // 0xFFFFFFFF + */ + if (dw == WAIT_FAILED) + { + dw = GetLastError(); + if (dw == 0) + return WAIT_FAILED; + } + return (WRes)dw; +} + +#define Thread_Wait(p) Handle_WaitObject(*(p)) + +WRes Thread_Wait_Close(CThread *p) +{ + WRes res = Thread_Wait(p); + WRes res2 = Thread_Close(p); + return (res != 0 ? res : res2); +} + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + + #ifdef USE_THREADS_CreateThread + + DWORD threadId; + *p = CreateThread(NULL, 0, func, param, 0, &threadId); + + #else + + unsigned threadId; + *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); + + #endif + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return HandleToWRes(*p); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + #ifdef USE_THREADS_CreateThread + + UNUSED_VAR(affinity) + return Thread_Create(p, func, param); + + #else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + { + // DWORD_PTR prevMask = + SetThreadAffinityMask(h, (DWORD_PTR)affinity); + /* + if (prevMask == 0) + { + // affinity change is non-critical error, so we can ignore it + // wres = GetError(); + } + */ + } + { + DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + +static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) +{ + *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); + return HandleToWRes(*p); +} + +WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); } +WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); } + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore() + *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); + return HandleToWRes(*p); +} + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // if (Semaphore_IsCreated(p)) + { + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + } + return Semaphore_Create(p, initCount, maxCount); +} + +static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) + { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); } +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num) + { return Semaphore_Release(p, (LONG)num, NULL); } +WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); } + +WRes CriticalSection_Init(CCriticalSection *p) +{ + /* InitializeCriticalSection() can raise exception: + Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception + Windows Vista+ : no exceptions */ + #ifdef _MSC_VER + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wlanguage-extension-token" + #endif + __try + #endif + { + InitializeCriticalSection(p); + /* InitializeCriticalSectionAndSpinCount(p, 0); */ + } + #ifdef _MSC_VER + __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; } + #endif + return 0; +} + + + + +#else // _WIN32 + +// ---------- POSIX ---------- + +#if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) +#ifndef Z7_AFFINITY_DISABLE +// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +// clang < 3.6 : unknown warning group '-Wreserved-id-macro' +// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" +// clang >= 13 : do not give warning +#if !defined(_GNU_SOURCE) +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif // !defined(_GNU_SOURCE) +#endif // Z7_AFFINITY_DISABLE +#endif // __linux__ + +#include "Threads.h" + +#include +#include +#include +#ifdef Z7_AFFINITY_SUPPORTED +// #include +#endif + + +// #include +// #define PRF(p) p +#define PRF(p) +#define Print(s) PRF(printf("\n%s\n", s);) + +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) +{ + // new thread in Posix probably inherits affinity from parrent thread + Print("Thread_Create_With_CpuSet") + + pthread_attr_t attr; + int ret; + // int ret2; + + p->_created = 0; + + RINOK(pthread_attr_init(&attr)) + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + if (!ret) + { + if (cpuSet) + { + // pthread_attr_setaffinity_np() is not supported for MUSL compile. + // so we check for __GLIBC__ here +#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__) + /* + printf("\n affinity :"); + unsigned i; + for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) + { + Byte b = *((const Byte *)cpuSet + i); + char temp[32]; + #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + temp[0] = GET_HEX_CHAR((b & 0xF)); + temp[1] = GET_HEX_CHAR((b >> 4)); + // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian + // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian + temp[2] = 0; + printf("%s", temp); + } + printf("\n"); + */ + + // ret2 = + pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; +#endif + } + + ret = pthread_create(&p->_tid, &attr, func, param); + + if (!ret) + { + p->_created = 1; + /* + if (cpuSet) + { + // ret2 = + pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + } + */ + } + } + // ret2 = + pthread_attr_destroy(&attr); + // if (ret2 != 0) ret = ret2; + return ret; +} + + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + return Thread_Create_With_CpuSet(p, func, param, NULL); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + Print("Thread_Create_WithAffinity") + CCpuSet cs; + unsigned i; + CpuSet_Zero(&cs); + for (i = 0; i < sizeof(affinity) * 8; i++) + { + if (affinity == 0) + break; + if (affinity & 1) + { + CpuSet_Set(&cs, i); + } + affinity >>= 1; + } + return Thread_Create_With_CpuSet(p, func, param, &cs); +} + + +WRes Thread_Close(CThread *p) +{ + // Print("Thread_Close") + int ret; + if (!p->_created) + return 0; + + ret = pthread_detach(p->_tid); + p->_tid = 0; + p->_created = 0; + return ret; +} + + +WRes Thread_Wait_Close(CThread *p) +{ + // Print("Thread_Wait_Close") + void *thread_return; + int ret; + if (!p->_created) + return EINVAL; + + ret = pthread_join(p->_tid, &thread_return); + // probably we can't use that (_tid) after pthread_join(), so we close thread here + p->_created = 0; + p->_tid = 0; + return ret; +} + + + +static WRes Event_Create(CEvent *p, int manualReset, int signaled) +{ + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) + p->_manual_reset = manualReset; + p->_state = (signaled ? True : False); + p->_created = 1; + return 0; +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) + { return Event_Create(p, True, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) + { return Event_Create(p, False, signaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) +// freebsd: +#pragma GCC diagnostic ignored "-Wthread-safety-analysis" +#endif + +WRes Event_Set(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)) + p->_state = True; + { + const int res1 = pthread_cond_broadcast(&p->_cond); + const int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); + } +} + +WRes Event_Reset(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)) + p->_state = False; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Wait(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)) + while (p->_state == False) + { + // ETIMEDOUT + // ret = + pthread_cond_wait(&p->_cond, &p->_mutex); + // if (ret != 0) break; + } + if (p->_manual_reset == False) + { + p->_state = False; + } + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Close(CEvent *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) + p->_count = initCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (Semaphore_IsCreated(p)) + { + /* + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + */ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + // return EINVAL; // for debug + p->_count = initCount; + p->_maxCount = maxCount; + return 0; + } + return Semaphore_Create(p, initCount, maxCount); +} + + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int ret; + + if (releaseCount < 1) + return EINVAL; + + RINOK(pthread_mutex_lock(&p->_mutex)) + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + ret = ERROR_TOO_MANY_POSTS; // EINVAL; + else + { + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + } + RINOK(pthread_mutex_unlock(&p->_mutex)) + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)) + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Semaphore_Close(CSemaphore *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + + +WRes CriticalSection_Init(CCriticalSection *p) +{ + // Print("CriticalSection_Init") + if (!p) + return EINTR; + return pthread_mutex_init(&p->_mutex, NULL); +} + +void CriticalSection_Enter(CCriticalSection *p) +{ + // Print("CriticalSection_Enter") + if (p) + { + // int ret = + pthread_mutex_lock(&p->_mutex); + } +} + +void CriticalSection_Leave(CCriticalSection *p) +{ + // Print("CriticalSection_Leave") + if (p) + { + // int ret = + pthread_mutex_unlock(&p->_mutex); + } +} + +void CriticalSection_Delete(CCriticalSection *p) +{ + // Print("CriticalSection_Delete") + if (p) + { + // int ret = + pthread_mutex_destroy(&p->_mutex); + } +} + +LONG InterlockedIncrement(LONG volatile *addend) +{ + // Print("InterlockedIncrement") + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend + 1; + *addend = val; + return val; + #else + + #if defined(__clang__) && (__clang_major__ >= 8) + #pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst" + #endif + return __sync_add_and_fetch(addend, 1); + #endif +} + +LONG InterlockedDecrement(LONG volatile *addend) +{ + // Print("InterlockedDecrement") + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend - 1; + *addend = val; + return val; + #else + return __sync_sub_and_fetch(addend, 1); + #endif +} + +#endif // _WIN32 + +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) +{ + if (Event_IsCreated(p)) + return Event_Reset(p); + return AutoResetEvent_CreateNotSignaled(p); +} + +#undef PRF +#undef Print diff --git a/crnlib/lzma/Threads.h b/crnlib/lzma/Threads.h new file mode 100644 index 00000000..2428e887 --- /dev/null +++ b/crnlib/lzma/Threads.h @@ -0,0 +1,250 @@ +/* Threads.h -- multithreading library +2024-03-28 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_THREADS_H +#define ZIP7_INC_THREADS_H + +#ifdef _WIN32 +#include "7zWindows.h" + +#else + +#include "Compiler.h" + +// #define Z7_AFFINITY_DISABLE +#if defined(__linux__) +#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) +#ifndef Z7_AFFINITY_DISABLE +#define Z7_AFFINITY_SUPPORTED +// #pragma message(" ==== Z7_AFFINITY_SUPPORTED") +#if !defined(_GNU_SOURCE) +// #pragma message(" ==== _GNU_SOURCE set") +// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif +#endif +#endif +#endif + +#include + +#endif + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#ifdef _WIN32 + +WRes HandlePtr_Close(HANDLE *h); +WRes Handle_WaitObject(HANDLE h); + +typedef HANDLE CThread; + +#define Thread_CONSTRUCT(p) { *(p) = NULL; } +#define Thread_WasCreated(p) (*(p) != NULL) +#define Thread_Close(p) HandlePtr_Close(p) +// #define Thread_Wait(p) Handle_WaitObject(*(p)) + +#ifdef UNDER_CE + // if (USE_THREADS_CreateThread is defined), we use _beginthreadex() + // if (USE_THREADS_CreateThread is not definned), we use CreateThread() + #define USE_THREADS_CreateThread +#endif + +typedef + #ifdef USE_THREADS_CreateThread + DWORD + #else + unsigned + #endif + THREAD_FUNC_RET_TYPE; + +#define THREAD_FUNC_RET_ZERO 0 + +typedef DWORD_PTR CAffinityMask; +typedef DWORD_PTR CCpuSet; + +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu)) + +#else // _WIN32 + +typedef struct +{ + pthread_t _tid; + int _created; +} CThread; + +#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) +WRes Thread_Close(CThread *p); +// #define Thread_Wait Thread_Wait_Close + +typedef void * THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_RET_ZERO NULL + + +typedef UInt64 CAffinityMask; + +#ifdef Z7_AFFINITY_SUPPORTED + +typedef cpu_set_t CCpuSet; +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) + +#else + +typedef UInt64 CCpuSet; +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu)) +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) + +#endif + + +#endif // _WIN32 + + +#define THREAD_FUNC_CALL_TYPE Z7_STDCALL + +#if defined(_WIN32) && defined(__GNUC__) +/* GCC compiler for x86 32-bit uses the rule: + the stack is 16-byte aligned before CALL instruction for function calling. + But only root function main() contains instructions that + set 16-byte alignment for stack pointer. And another functions + just keep alignment, if it was set in some parent function. + + The problem: + if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(), + the root function of thread doesn't set 16-byte alignment. + And stack frames in all child functions also will be unaligned in that case. + + Here we set (force_align_arg_pointer) attribute for root function of new thread. + Do we need (force_align_arg_pointer) also for another systems? */ + + #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer)) + // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions +#else + #define THREAD_FUNC_ATTRIB_ALIGN_ARG +#endif + +#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); +WRes Thread_Wait_Close(CThread *p); + +#ifdef _WIN32 +#define Thread_Create_With_CpuSet(p, func, param, cs) \ + Thread_Create_With_Affinity(p, func, param, *cs) +#else +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); +#endif + + +#ifdef _WIN32 + +typedef HANDLE CEvent; +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; +#define Event_Construct(p) *(p) = NULL +#define Event_IsCreated(p) (*(p) != NULL) +#define Event_Close(p) HandlePtr_Close(p) +#define Event_Wait(p) Handle_WaitObject(*(p)) +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); + +typedef HANDLE CSemaphore; +#define Semaphore_Construct(p) *(p) = NULL +#define Semaphore_IsCreated(p) (*(p) != NULL) +#define Semaphore_Close(p) HandlePtr_Close(p) +#define Semaphore_Wait(p) Handle_WaitObject(*(p)) +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +WRes Semaphore_Release1(CSemaphore *p); + +typedef CRITICAL_SECTION CCriticalSection; +WRes CriticalSection_Init(CCriticalSection *p); +#define CriticalSection_Delete(p) DeleteCriticalSection(p) +#define CriticalSection_Enter(p) EnterCriticalSection(p) +#define CriticalSection_Leave(p) LeaveCriticalSection(p) + + +#else // _WIN32 + +typedef struct +{ + int _created; + int _manual_reset; + int _state; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(p) (p)->_created = 0 +#define Event_IsCreated(p) ((p)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); + +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes Event_Wait(CEvent *p); +WRes Event_Close(CEvent *p); + + +typedef struct +{ + int _created; + UInt32 _count; + UInt32 _maxCount; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 +#define Semaphore_IsCreated(p) ((p)->_created) + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef struct +{ + pthread_mutex_t _mutex; +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +void CriticalSection_Delete(CCriticalSection *cs); +void CriticalSection_Enter(CCriticalSection *cs); +void CriticalSection_Leave(CCriticalSection *cs); + +LONG InterlockedIncrement(LONG volatile *addend); +LONG InterlockedDecrement(LONG volatile *addend); + +#endif // _WIN32 + +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/Xz.c b/crnlib/lzma/Xz.c new file mode 100644 index 00000000..92bae029 --- /dev/null +++ b/crnlib/lzma/Xz.c @@ -0,0 +1,92 @@ +/* Xz.c - Xz +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "7zCrc.h" +#include "CpuArch.h" +#include "Xz.h" +#include "XzCrc64.h" + +const Byte XZ_SIG[XZ_SIG_SIZE] = { 0xFD, '7', 'z', 'X', 'Z', 0 }; +/* const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE] = { 'Y', 'Z' }; */ + +unsigned Xz_WriteVarInt(Byte *buf, UInt64 v) +{ + unsigned i = 0; + do + { + buf[i++] = (Byte)((v & 0x7F) | 0x80); + v >>= 7; + } + while (v != 0); + buf[(size_t)i - 1] &= 0x7F; + return i; +} + +void Xz_Construct(CXzStream *p) +{ + p->numBlocks = 0; + p->blocks = NULL; + p->flags = 0; +} + +void Xz_Free(CXzStream *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->blocks); + p->numBlocks = 0; + p->blocks = NULL; +} + +unsigned XzFlags_GetCheckSize(CXzStreamFlags f) +{ + unsigned t = XzFlags_GetCheckType(f); + return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3)); +} + +void XzCheck_Init(CXzCheck *p, unsigned mode) +{ + p->mode = mode; + switch (mode) + { + case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break; + case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break; + case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break; + default: break; + } +} + +void XzCheck_Update(CXzCheck *p, const void *data, size_t size) +{ + switch (p->mode) + { + case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break; + case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break; + case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break; + default: break; + } +} + +int XzCheck_Final(CXzCheck *p, Byte *digest) +{ + switch (p->mode) + { + case XZ_CHECK_CRC32: + SetUi32(digest, CRC_GET_DIGEST(p->crc)) + break; + case XZ_CHECK_CRC64: + { + int i; + UInt64 v = CRC64_GET_DIGEST(p->crc64); + for (i = 0; i < 8; i++, v >>= 8) + digest[i] = (Byte)(v & 0xFF); + break; + } + case XZ_CHECK_SHA256: + Sha256_Final(&p->sha, digest); + break; + default: + return 0; + } + return 1; +} diff --git a/crnlib/lzma/Xz.h b/crnlib/lzma/Xz.h new file mode 100644 index 00000000..34502e99 --- /dev/null +++ b/crnlib/lzma/Xz.h @@ -0,0 +1,536 @@ +/* Xz.h - Xz interface +2024-01-26 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_XZ_H +#define ZIP7_INC_XZ_H + +#include "Sha256.h" +#include "Delta.h" + +EXTERN_C_BEGIN + +#define XZ_ID_Subblock 1 +#define XZ_ID_Delta 3 +#define XZ_ID_X86 4 +#define XZ_ID_PPC 5 +#define XZ_ID_IA64 6 +#define XZ_ID_ARM 7 +#define XZ_ID_ARMT 8 +#define XZ_ID_SPARC 9 +#define XZ_ID_ARM64 0xa +#define XZ_ID_RISCV 0xb +#define XZ_ID_LZMA2 0x21 + +unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); +unsigned Xz_WriteVarInt(Byte *buf, UInt64 v); + +/* ---------- xz block ---------- */ + +#define XZ_BLOCK_HEADER_SIZE_MAX 1024 + +#define XZ_NUM_FILTERS_MAX 4 +#define XZ_BF_NUM_FILTERS_MASK 3 +#define XZ_BF_PACK_SIZE (1 << 6) +#define XZ_BF_UNPACK_SIZE (1 << 7) + +#define XZ_FILTER_PROPS_SIZE_MAX 20 + +typedef struct +{ + UInt64 id; + UInt32 propsSize; + Byte props[XZ_FILTER_PROPS_SIZE_MAX]; +} CXzFilter; + +typedef struct +{ + UInt64 packSize; + UInt64 unpackSize; + Byte flags; + CXzFilter filters[XZ_NUM_FILTERS_MAX]; +} CXzBlock; + +#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1) +#define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0) +#define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0) +#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) + +SRes XzBlock_Parse(CXzBlock *p, const Byte *header); +SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes); + +/* ---------- xz stream ---------- */ + +#define XZ_SIG_SIZE 6 +#define XZ_FOOTER_SIG_SIZE 2 + +extern const Byte XZ_SIG[XZ_SIG_SIZE]; + +/* +extern const Byte XZ_FOOTER_SIG[XZ_FOOTER_SIG_SIZE]; +*/ + +#define XZ_FOOTER_SIG_0 'Y' +#define XZ_FOOTER_SIG_1 'Z' + +#define XZ_STREAM_FLAGS_SIZE 2 +#define XZ_STREAM_CRC_SIZE 4 + +#define XZ_STREAM_HEADER_SIZE (XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE) +#define XZ_STREAM_FOOTER_SIZE (XZ_FOOTER_SIG_SIZE + XZ_STREAM_FLAGS_SIZE + XZ_STREAM_CRC_SIZE + 4) + +#define XZ_CHECK_MASK 0xF +#define XZ_CHECK_NO 0 +#define XZ_CHECK_CRC32 1 +#define XZ_CHECK_CRC64 4 +#define XZ_CHECK_SHA256 10 + +typedef struct +{ + unsigned mode; + UInt32 crc; + UInt64 crc64; + CSha256 sha; +} CXzCheck; + +void XzCheck_Init(CXzCheck *p, unsigned mode); +void XzCheck_Update(CXzCheck *p, const void *data, size_t size); +int XzCheck_Final(CXzCheck *p, Byte *digest); + +typedef UInt16 CXzStreamFlags; + +#define XzFlags_IsSupported(f) ((f) <= XZ_CHECK_MASK) +#define XzFlags_GetCheckType(f) ((f) & XZ_CHECK_MASK) +#define XzFlags_HasDataCrc32(f) (Xz_GetCheckType(f) == XZ_CHECK_CRC32) +unsigned XzFlags_GetCheckSize(CXzStreamFlags f); + +SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); +SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream); + +typedef struct +{ + UInt64 unpackSize; + UInt64 totalSize; +} CXzBlockSizes; + +typedef struct +{ + CXzStreamFlags flags; + // Byte _pad[6]; + size_t numBlocks; + CXzBlockSizes *blocks; + UInt64 startOffset; +} CXzStream; + +void Xz_Construct(CXzStream *p); +void Xz_Free(CXzStream *p, ISzAllocPtr alloc); + +#define XZ_SIZE_OVERFLOW ((UInt64)(Int64)-1) + +UInt64 Xz_GetUnpackSize(const CXzStream *p); +UInt64 Xz_GetPackSize(const CXzStream *p); + +typedef struct +{ + size_t num; + size_t numAllocated; + CXzStream *streams; +} CXzs; + +void Xzs_Construct(CXzs *p); +void Xzs_Free(CXzs *p, ISzAllocPtr alloc); +SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); + +UInt64 Xzs_GetNumBlocks(const CXzs *p); +UInt64 Xzs_GetUnpackSize(const CXzs *p); + + +// ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder + +typedef enum +{ + CODER_STATUS_NOT_SPECIFIED, /* use main error code instead */ + CODER_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + CODER_STATUS_NOT_FINISHED, /* stream was not finished */ + CODER_STATUS_NEEDS_MORE_INPUT /* you must provide more input bytes */ +} ECoderStatus; + + +// ECoderFinishMode values are identical to ELzmaFinishMode + +typedef enum +{ + CODER_FINISH_ANY, /* finish at any point */ + CODER_FINISH_END /* block must be finished at the end */ +} ECoderFinishMode; + + +typedef struct +{ + void *p; // state object; + void (*Free)(void *p, ISzAllocPtr alloc); + SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); + void (*Init)(void *p); + SRes (*Code2)(void *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + int srcWasFinished, ECoderFinishMode finishMode, + // int *wasFinished, + ECoderStatus *status); + SizeT (*Filter)(void *p, Byte *data, SizeT size); +} IStateCoder; + + +typedef struct +{ + UInt32 methodId; + UInt32 delta; + UInt32 ip; + UInt32 X86_State; + Byte delta_State[DELTA_STATE_SIZE]; +} CXzBcFilterStateBase; + +typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size); + +SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, + Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc); + + +#define MIXCODER_NUM_FILTERS_MAX 4 + +typedef struct +{ + ISzAllocPtr alloc; + Byte *buf; + unsigned numCoders; + + Byte *outBuf; + size_t outBufSize; + size_t outWritten; // is equal to lzmaDecoder.dicPos (in outBuf mode) + BoolInt wasFinished; + SRes res; + ECoderStatus status; + // BoolInt SingleBufMode; + + int finished[MIXCODER_NUM_FILTERS_MAX - 1]; + size_t pos[MIXCODER_NUM_FILTERS_MAX - 1]; + size_t size[MIXCODER_NUM_FILTERS_MAX - 1]; + UInt64 ids[MIXCODER_NUM_FILTERS_MAX]; + SRes results[MIXCODER_NUM_FILTERS_MAX]; + IStateCoder coders[MIXCODER_NUM_FILTERS_MAX]; +} CMixCoder; + + +typedef enum +{ + XZ_STATE_STREAM_HEADER, + XZ_STATE_STREAM_INDEX, + XZ_STATE_STREAM_INDEX_CRC, + XZ_STATE_STREAM_FOOTER, + XZ_STATE_STREAM_PADDING, + XZ_STATE_BLOCK_HEADER, + XZ_STATE_BLOCK, + XZ_STATE_BLOCK_FOOTER +} EXzState; + + +typedef struct +{ + EXzState state; + unsigned pos; + unsigned alignPos; + unsigned indexPreSize; + + CXzStreamFlags streamFlags; + + unsigned blockHeaderSize; + UInt64 packSize; + UInt64 unpackSize; + + UInt64 numBlocks; // number of finished blocks in current stream + UInt64 indexSize; + UInt64 indexPos; + UInt64 padSize; + + UInt64 numStartedStreams; + UInt64 numFinishedStreams; + UInt64 numTotalBlocks; + + UInt32 crc; + CMixCoder decoder; + CXzBlock block; + CXzCheck check; + CSha256 sha; + + BoolInt parseMode; + BoolInt headerParsedOk; + BoolInt decodeToStreamSignature; + unsigned decodeOnlyOneBlock; + + Byte *outBuf; + size_t outBufSize; + size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked + + Byte shaDigest[SHA256_DIGEST_SIZE]; + Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; +} CXzUnpacker; + +/* alloc : aligned for cache line allocation is better */ +void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc); +void XzUnpacker_Init(CXzUnpacker *p); +void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize); +void XzUnpacker_Free(CXzUnpacker *p); + +/* + XzUnpacker + The sequence for decoding functions: + { + XzUnpacker_Construct() + [Decoding_Calls] + XzUnpacker_Free() + } + + [Decoding_Calls] + + There are 3 types of interfaces for [Decoding_Calls] calls: + + Interface-1 : Partial output buffers: + { + XzUnpacker_Init() + for() + { + XzUnpacker_Code(); + } + XzUnpacker_IsStreamWasFinished() + } + + Interface-2 : Direct output buffer: + Use it, if you know exact size of decoded data, and you need + whole xz unpacked data in one output buffer. + xz unpacker doesn't allocate additional buffer for lzma2 dictionary in that mode. + { + XzUnpacker_Init() + XzUnpacker_SetOutBufMode(); // to set output buffer and size + for() + { + XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code() + } + XzUnpacker_IsStreamWasFinished() + } + + Interface-3 : Direct output buffer : One call full decoding + It unpacks whole input buffer to output buffer in one call. + It uses Interface-2 internally. + { + XzUnpacker_CodeFull() + XzUnpacker_IsStreamWasFinished() + } +*/ + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + CODER_FINISH_ANY - use smallest number of input bytes + CODER_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + CODER_STATUS_NOT_FINISHED, + CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases: + 1) it needs more input data to finish current xz stream + 2) xz stream was finished successfully. But the decoder supports multiple + concatented xz streams. So it expects more input data for new xz streams. + Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully. + + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_DATA - Data error + SZ_ERROR_UNSUPPORTED - Unsupported method or method properties + SZ_ERROR_CRC - CRC error + // SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + + SZ_ERROR_NO_ARCHIVE - the error with xz Stream Header with one of the following reasons: + - xz Stream Signature failure + - CRC32 of xz Stream Header is failed + - The size of Stream padding is not multiple of four bytes. + It's possible to get that error, if xz stream was finished and the stream + contains some another data. In that case you can call XzUnpacker_GetExtraSize() + function to get real size of xz stream. +*/ + + +SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, int srcFinished, + ECoderFinishMode finishMode, ECoderStatus *status); + +SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, + ECoderFinishMode finishMode, ECoderStatus *status); + +/* +If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished() +after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code(). +*/ + +BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p); + +/* +XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes, + if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state. +These bytes can be some data after xz archive, or +it can be start of new xz stream. + +Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of +xz stream in two cases, if XzUnpacker_Code() returns: + res == SZ_OK && status == CODER_STATUS_NEEDS_MORE_INPUT + res == SZ_ERROR_NO_ARCHIVE +*/ + +UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p); + + +/* + for random block decoding: + XzUnpacker_Init(); + set CXzUnpacker::streamFlags + XzUnpacker_PrepareToRandomBlockDecoding() + loop + { + XzUnpacker_Code() + XzUnpacker_IsBlockFinished() + } +*/ + +void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p); +BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p); + +#define XzUnpacker_GetPackSizeForIndex(p) ((p)->packSize + (p)->blockHeaderSize + XzFlags_GetCheckSize((p)->streamFlags)) + + + + + + +/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */ + +/* + if (CXzDecMtProps::numThreads > 1), the decoder can try to use + Multi-Threading. The decoder analyses xz block header, and if + there are pack size and unpack size values stored in xz block header, + the decoder reads compressed data of block to internal buffers, + and then it can start parallel decoding, if there are another blocks. + The decoder can switch back to Single-Thread decoding after some conditions. + + The sequence of calls for xz decoding with in/out Streams: + { + XzDecMt_Create() + XzDecMtProps_Init(XzDecMtProps) to set default values of properties + // then you can change some XzDecMtProps parameters with required values + // here you can set the number of threads and (memUseMax) - the maximum + Memory usage for multithreading decoding. + for() + { + XzDecMt_Decode() // one call per one file + } + XzDecMt_Destroy() + } +*/ + + +typedef struct +{ + size_t inBufSize_ST; // size of input buffer for Single-Thread decoding + size_t outStep_ST; // size of output buffer for Single-Thread decoding + BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. + + #ifndef Z7_ST + unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding + size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created + size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. + // it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer. + #endif +} CXzDecMtProps; + +void XzDecMtProps_Init(CXzDecMtProps *p); + +typedef struct CXzDecMt CXzDecMt; +typedef CXzDecMt * CXzDecMtHandle; +// Z7_DECLARE_HANDLE(CXzDecMtHandle) + +/* + alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). + allocMid : for big allocations, aligned allocation is better +*/ + +CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); +void XzDecMt_Destroy(CXzDecMtHandle p); + + +typedef struct +{ + Byte UnpackSize_Defined; + Byte NumStreams_Defined; + Byte NumBlocks_Defined; + + Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream. + Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data + + UInt64 InSize; // pack size processed. That value doesn't include the data after + // end of xz stream, if that data was not correct + UInt64 OutSize; + + UInt64 NumStreams; + UInt64 NumBlocks; + + SRes DecodeRes; // the error code of xz streams data decoding + SRes ReadRes; // error code from ISeqInStream:Read() + SRes ProgressRes; // error code from ICompressProgress:Progress() + + SRes CombinedRes; // Combined result error code that shows main rusult + // = S_OK, if there is no error. + // but check also (DataAfterEnd) that can show additional minor errors. + + SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream + // = SZ_ERROR_PROGRESS, if error from ICompressProgress + // = SZ_ERROR_WRITE, if error from ISeqOutStream + // = SZ_ERROR_* codes for decoding +} CXzStatInfo; + +void XzStatInfo_Clear(CXzStatInfo *p); + +/* + +XzDecMt_Decode() +SRes: it's combined decoding result. It also is equal to stat->CombinedRes. + + SZ_OK - no error + check also output value in (stat->DataAfterEnd) + that can show additional possible error + + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_NO_ARCHIVE - is not xz archive + SZ_ERROR_ARCHIVE - Headers error + SZ_ERROR_DATA - Data Error + SZ_ERROR_UNSUPPORTED - Unsupported method or method properties + SZ_ERROR_CRC - CRC Error + SZ_ERROR_INPUT_EOF - it needs more input data + SZ_ERROR_WRITE - ISeqOutStream error + (SZ_ERROR_READ) - ISeqInStream errors + (SZ_ERROR_PROGRESS) - ICompressProgress errors + // SZ_ERROR_THREAD - error in multi-threading functions + MY_SRes_HRESULT_FROM_WRes(WRes_error) - error in multi-threading function +*/ + +SRes XzDecMt_Decode(CXzDecMtHandle p, + const CXzDecMtProps *props, + const UInt64 *outDataSize, // NULL means undefined + int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished + ISeqOutStreamPtr outStream, + // Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + // const Byte *inData, size_t inDataSize, + CXzStatInfo *stat, // out: decoding results and statistics + int *isMT, // out: 0 means that ST (Single-Thread) version was used + // 1 means that MT (Multi-Thread) version was used + ICompressProgressPtr progress); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/XzCrc64.c b/crnlib/lzma/XzCrc64.c new file mode 100644 index 00000000..a0023a6f --- /dev/null +++ b/crnlib/lzma/XzCrc64.c @@ -0,0 +1,140 @@ +/* XzCrc64.c -- CRC64 calculation +2023-12-08 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "XzCrc64.h" +#include "CpuArch.h" + +#define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42) + +// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC64_DEBUG_BE +#ifdef Z7_CRC64_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#ifdef Z7_CRC64_NUM_TABLES + #define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES +#else + #define Z7_CRC64_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC64_NUM_TABLES_USE < 1 + #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES +#endif + + +#if Z7_CRC64_NUM_TABLES_USE != 1 + +#ifndef MY_CPU_BE + #define FUNC_NAME_LE_2(s) XzCrc64UpdateT ## s + #define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s) + #define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC64_NUM_TABLES_USE) + UInt64 Z7_FASTCALL FUNC_NAME_LE (UInt64 v, const void *data, size_t size, const UInt64 *table); +#endif +#ifndef MY_CPU_LE + #define FUNC_NAME_BE_2(s) XzCrc64UpdateBeT ## s + #define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s) + #define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC64_NUM_TABLES_USE) + UInt64 Z7_FASTCALL FUNC_NAME_BE (UInt64 v, const void *data, size_t size, const UInt64 *table); +#endif + +#if defined(MY_CPU_LE) + #define FUNC_REF FUNC_NAME_LE +#elif defined(MY_CPU_BE) + #define FUNC_REF FUNC_NAME_BE +#else + #define FUNC_REF g_Crc64Update + static UInt64 (Z7_FASTCALL *FUNC_REF)(UInt64 v, const void *data, size_t size, const UInt64 *table); +#endif + +#endif + + +MY_ALIGN(64) +static UInt64 g_Crc64Table[256 * Z7_CRC64_NUM_TABLES_USE]; + + +UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size) +{ +#if Z7_CRC64_NUM_TABLES_USE == 1 + #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + const UInt64 *table = g_Crc64Table; + const Byte *p = (const Byte *)data; + const Byte *lim = p + size; + for (; p != lim; p++) + v = CRC64_UPDATE_BYTE_2(v, *p); + return v; + #undef CRC64_UPDATE_BYTE_2 +#else + return FUNC_REF (v, data, size, g_Crc64Table); +#endif +} + + +Z7_NO_INLINE +void Z7_FASTCALL Crc64GenerateTable(void) +{ + unsigned i; + for (i = 0; i < 256; i++) + { + UInt64 r = i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1))); + g_Crc64Table[i] = r; + } + +#if Z7_CRC64_NUM_TABLES_USE != 1 +#if 1 || 1 && defined(MY_CPU_X86) // low register count + for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i++) + { + const UInt64 r0 = g_Crc64Table[(size_t)i]; + g_Crc64Table[(size_t)i + 256] = g_Crc64Table[(Byte)r0] ^ (r0 >> 8); + } +#else + for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i += 2) + { + UInt64 r0 = g_Crc64Table[(size_t)(i) ]; + UInt64 r1 = g_Crc64Table[(size_t)(i) + 1]; + r0 = g_Crc64Table[(Byte)r0] ^ (r0 >> 8); + r1 = g_Crc64Table[(Byte)r1] ^ (r1 >> 8); + g_Crc64Table[(size_t)i + 256 ] = r0; + g_Crc64Table[(size_t)i + 256 + 1] = r1; + } +#endif + +#ifndef MY_CPU_LE + { +#ifndef MY_CPU_BE + UInt32 k = 1; + if (*(const Byte *)&k == 1) + FUNC_REF = FUNC_NAME_LE; + else +#endif + { +#ifndef MY_CPU_BE + FUNC_REF = FUNC_NAME_BE; +#endif + for (i = 0; i < 256 * Z7_CRC64_NUM_TABLES_USE; i++) + { + const UInt64 x = g_Crc64Table[i]; + g_Crc64Table[i] = Z7_BSWAP64(x); + } + } + } +#endif // ndef MY_CPU_LE +#endif // Z7_CRC64_NUM_TABLES_USE != 1 +} + +#undef kCrc64Poly +#undef Z7_CRC64_NUM_TABLES_USE +#undef FUNC_REF +#undef FUNC_NAME_LE_2 +#undef FUNC_NAME_LE_1 +#undef FUNC_NAME_LE +#undef FUNC_NAME_BE_2 +#undef FUNC_NAME_BE_1 +#undef FUNC_NAME_BE diff --git a/crnlib/lzma/XzCrc64.h b/crnlib/lzma/XzCrc64.h new file mode 100644 index 00000000..4d9e9f82 --- /dev/null +++ b/crnlib/lzma/XzCrc64.h @@ -0,0 +1,26 @@ +/* XzCrc64.h -- CRC64 calculation +2023-12-08 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_XZ_CRC64_H +#define ZIP7_INC_XZ_CRC64_H + +#include + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +// extern UInt64 g_Crc64Table[]; + +void Z7_FASTCALL Crc64GenerateTable(void); + +#define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF) +#define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL) +// #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size); +// UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/XzCrc64Opt.c b/crnlib/lzma/XzCrc64Opt.c new file mode 100644 index 00000000..c855dd59 --- /dev/null +++ b/crnlib/lzma/XzCrc64Opt.c @@ -0,0 +1,261 @@ +/* XzCrc64Opt.c -- CRC64 calculation (optimized functions) +2023-12-08 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1 + +// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu +// #define Z7_CRC64_DEBUG_BE +#ifdef Z7_CRC64_DEBUG_BE +#undef MY_CPU_LE +#define MY_CPU_BE +#endif + +#if defined(MY_CPU_64BIT) +#define Z7_CRC64_USE_64BIT +#endif + +// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c +#ifdef Z7_CRC64_NUM_TABLES +#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES +#else +#define Z7_CRC64_NUM_TABLES_USE 12 +#endif + +#if Z7_CRC64_NUM_TABLES_USE % 4 || \ + Z7_CRC64_NUM_TABLES_USE < 4 || \ + Z7_CRC64_NUM_TABLES_USE > 4 * 4 + #error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + + +#ifndef MY_CPU_BE + +#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) + +#define Q64LE(n, d) \ + ( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \ + ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] ) + +#define R64(a) *((const UInt64 *)(const void *)p + (a)) + +#else + +#define Q32LE(n, d) \ + ( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] ) + +#define R32(a) *((const UInt32 *)(const void *)p + (a)) + +#endif + + +#define CRC64_FUNC_PRE_LE2(step) \ +UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) + +#define CRC64_FUNC_PRE_LE(step) \ + CRC64_FUNC_PRE_LE2(step); \ + CRC64_FUNC_PRE_LE2(step) + +CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC64_UPDATE_BYTE_2(v, *p); + lim = p + size; + if (size >= Z7_CRC64_NUM_TABLES_USE) + { + lim -= Z7_CRC64_NUM_TABLES_USE; + do + { +#if Z7_CRC64_NUM_TABLES_USE == 4 + const UInt32 d = (UInt32)v ^ R32(0); + v = (v >> 32) ^ Q32LE(0, d); +#elif Z7_CRC64_NUM_TABLES_USE == 8 +#ifdef Z7_CRC64_USE_64BIT + v ^= R64(0); + v = Q64LE(0, v); +#else + UInt32 v0, v1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + v = Q32LE(1, v0) ^ Q32LE(0, v1); +#endif +#elif Z7_CRC64_NUM_TABLES_USE == 12 + UInt32 w; + UInt32 v0, v1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + w = R32(2); + v = Q32LE(0, w); + v ^= Q32LE(2, v0) ^ Q32LE(1, v1); +#elif Z7_CRC64_NUM_TABLES_USE == 16 +#ifdef Z7_CRC64_USE_64BIT + UInt64 w; + UInt64 x; + w = R64(1); x = Q64LE(0, w); + v ^= R64(0); v = x ^ Q64LE(1, v); +#else + UInt32 v0, v1; + UInt32 r0, r1; + v0 = (UInt32)v ^ R32(0); + v1 = (UInt32)(v >> 32) ^ R32(1); + r0 = R32(2); + r1 = R32(3); + v = Q32LE(1, r0) ^ Q32LE(0, r1); + v ^= Q32LE(3, v0) ^ Q32LE(2, v1); +#endif +#else +#error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + p += Z7_CRC64_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC64_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC64_UPDATE_BYTE_2(v, *p); + return v; +} + +#undef CRC64_UPDATE_BYTE_2 +#undef R32 +#undef R64 +#undef Q32LE +#undef Q64LE +#undef CRC64_FUNC_PRE_LE +#undef CRC64_FUNC_PRE_LE2 + +#endif + + + + +#ifndef MY_CPU_LE + +#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8)) + +#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0) + +#define Q64BE(n, d) \ + ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \ + ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] ) + +#ifdef Z7_CRC64_DEBUG_BE + #define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a)) +#else + #define R64BE(a) *((const UInt64 *)(const void *)p + (a)) +#endif + +#else + +#define Q32BE(n, d) \ + ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \ + ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \ + ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] ) + +#ifdef Z7_CRC64_DEBUG_BE + #define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a)) +#else + #define R32BE(a) *((const UInt32 *)(const void *)p + (a)) +#endif + +#endif + +#define CRC64_FUNC_PRE_BE2(step) \ +UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table) + +#define CRC64_FUNC_PRE_BE(step) \ + CRC64_FUNC_PRE_BE2(step); \ + CRC64_FUNC_PRE_BE2(step) + +CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) +{ + const Byte *p = (const Byte *)data; + const Byte *lim; + v = Z7_BSWAP64(v); + for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++) + v = CRC64_UPDATE_BYTE_2_BE(v, *p); + lim = p + size; + if (size >= Z7_CRC64_NUM_TABLES_USE) + { + lim -= Z7_CRC64_NUM_TABLES_USE; + do + { +#if Z7_CRC64_NUM_TABLES_USE == 4 + const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0); + v = (v << 32) ^ Q32BE(0, d); +#elif Z7_CRC64_NUM_TABLES_USE == 12 + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + const UInt32 w = R32BE(2); + v = Q32BE(0, w); + v ^= Q32BE(2, d1) ^ Q32BE(1, d0); + +#elif Z7_CRC64_NUM_TABLES_USE == 8 + #ifdef Z7_CRC64_USE_64BIT + v ^= R64BE(0); + v = Q64BE(0, v); + #else + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + v = Q32BE(1, d1) ^ Q32BE(0, d0); + #endif +#elif Z7_CRC64_NUM_TABLES_USE == 16 + #ifdef Z7_CRC64_USE_64BIT + const UInt64 w = R64BE(1); + v ^= R64BE(0); + v = Q64BE(0, w) ^ Q64BE(1, v); + #else + const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0); + const UInt32 d0 = (UInt32)(v ) ^ R32BE(1); + const UInt32 w1 = R32BE(2); + const UInt32 w0 = R32BE(3); + v = Q32BE(1, w1) ^ Q32BE(0, w0); + v ^= Q32BE(3, d1) ^ Q32BE(2, d0); + #endif +#elif +#error Stop_Compiling_Bad_CRC64_NUM_TABLES +#endif + p += Z7_CRC64_NUM_TABLES_USE; + } + while (p <= lim); + lim += Z7_CRC64_NUM_TABLES_USE; + } + for (; p < lim; p++) + v = CRC64_UPDATE_BYTE_2_BE(v, *p); + return Z7_BSWAP64(v); +} + +#undef CRC64_UPDATE_BYTE_2_BE +#undef R32BE +#undef R64BE +#undef Q32BE +#undef Q64BE +#undef CRC64_FUNC_PRE_BE +#undef CRC64_FUNC_PRE_BE2 + +#endif +#undef Z7_CRC64_NUM_TABLES_USE +#endif diff --git a/crnlib/lzma/XzDec.c b/crnlib/lzma/XzDec.c new file mode 100644 index 00000000..f4160eaa --- /dev/null +++ b/crnlib/lzma/XzDec.c @@ -0,0 +1,2877 @@ +/* XzDec.c -- Xz Decode +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +// #define XZ_DUMP + +/* #define XZ_DUMP */ + +#ifdef XZ_DUMP +#include +#endif + +// #define SHOW_DEBUG_INFO + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +#define PRF_STR(s) PRF(printf("\n" s "\n")) +#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) + +#include +#include + +#include "7zCrc.h" +#include "Alloc.h" +#include "Bra.h" +#include "CpuArch.h" +#include "Delta.h" +#include "Lzma2Dec.h" + +// #define USE_SUBBLOCK + +#ifdef USE_SUBBLOCK +#include "Bcj3Dec.c" +#include "SbDec.h" +#endif + +#include "Xz.h" + +#define XZ_CHECK_SIZE_MAX 64 + +#define CODER_BUF_SIZE ((size_t)1 << 17) + +unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value) +{ + unsigned i, limit; + *value = 0; + limit = (maxSize > 9) ? 9 : (unsigned)maxSize; + + for (i = 0; i < limit;) + { + Byte b = p[i]; + *value |= (UInt64)(b & 0x7F) << (7 * i++); + if ((b & 0x80) == 0) + return (b == 0 && i != 1) ? 0 : i; + } + return 0; +} + + +/* ---------- XzBcFilterState ---------- */ + +#define BRA_BUF_SIZE (1 << 14) + +typedef struct +{ + size_t bufPos; + size_t bufConv; + size_t bufTotal; + Byte *buf; // must be aligned for 4 bytes + Xz_Func_BcFilterStateBase_Filter filter_func; + // int encodeMode; + CXzBcFilterStateBase base; + // Byte buf[BRA_BUF_SIZE]; +} CXzBcFilterState; + + +static void XzBcFilterState_Free(void *pp, ISzAllocPtr alloc) +{ + if (pp) + { + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + ISzAlloc_Free(alloc, p->buf); + ISzAlloc_Free(alloc, pp); + } +} + + +static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) +{ + CXzBcFilterStateBase *p = &((CXzBcFilterState *)pp)->base; + UNUSED_VAR(alloc) + p->ip = 0; + if (p->methodId == XZ_ID_Delta) + { + if (propSize != 1) + return SZ_ERROR_UNSUPPORTED; + p->delta = (UInt32)props[0] + 1; + } + else + { + if (propSize == 4) + { + const UInt32 v = GetUi32(props); + switch (p->methodId) + { + case XZ_ID_PPC: + case XZ_ID_ARM: + case XZ_ID_SPARC: + case XZ_ID_ARM64: + if (v & 3) + return SZ_ERROR_UNSUPPORTED; + break; + case XZ_ID_ARMT: + case XZ_ID_RISCV: + if (v & 1) + return SZ_ERROR_UNSUPPORTED; + break; + case XZ_ID_IA64: + if (v & 0xf) + return SZ_ERROR_UNSUPPORTED; + break; + default: break; + } + p->ip = v; + } + else if (propSize != 0) + return SZ_ERROR_UNSUPPORTED; + } + return SZ_OK; +} + + +static void XzBcFilterState_Init(void *pp) +{ + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + p->bufPos = p->bufConv = p->bufTotal = 0; + p->base.X86_State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL; + if (p->base.methodId == XZ_ID_Delta) + Delta_Init(p->base.delta_State); +} + + +static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] = +{ + Z7_BRANCH_CONV_DEC_2 (BranchConv_PPC), + Z7_BRANCH_CONV_DEC_2 (BranchConv_IA64), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARMT), + Z7_BRANCH_CONV_DEC_2 (BranchConv_SPARC), + Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM64), + Z7_BRANCH_CONV_DEC_2 (BranchConv_RISCV) +}; + +static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size) +{ + switch (p->methodId) + { + case XZ_ID_Delta: + Delta_Decode(p->delta_State, p->delta, data, size); + break; + case XZ_ID_X86: + size = (SizeT)(z7_BranchConvSt_X86_Dec(data, size, p->ip, &p->X86_State) - data); + break; + default: + if (p->methodId >= XZ_ID_PPC) + { + const UInt32 i = p->methodId - XZ_ID_PPC; + if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Dec)) + size = (SizeT)(g_Funcs_BranchConv_RISC_Dec[i](data, size, p->ip) - data); + } + break; + } + p->ip += (UInt32)size; + return size; +} + + +static SizeT XzBcFilterState_Filter(void *pp, Byte *data, SizeT size) +{ + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + return p->filter_func(&p->base, data, size); +} + + +static SRes XzBcFilterState_Code2(void *pp, + Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, int srcWasFinished, + ECoderFinishMode finishMode, + // int *wasFinished + ECoderStatus *status) +{ + CXzBcFilterState *p = ((CXzBcFilterState *)pp); + SizeT destRem = *destLen; + SizeT srcRem = *srcLen; + UNUSED_VAR(finishMode) + + *destLen = 0; + *srcLen = 0; + // *wasFinished = False; + *status = CODER_STATUS_NOT_FINISHED; + + while (destRem != 0) + { + { + size_t size = p->bufConv - p->bufPos; + if (size) + { + if (size > destRem) + size = destRem; + memcpy(dest, p->buf + p->bufPos, size); + p->bufPos += size; + *destLen += size; + dest += size; + destRem -= size; + continue; + } + } + + p->bufTotal -= p->bufPos; + memmove(p->buf, p->buf + p->bufPos, p->bufTotal); + p->bufPos = 0; + p->bufConv = 0; + { + size_t size = BRA_BUF_SIZE - p->bufTotal; + if (size > srcRem) + size = srcRem; + memcpy(p->buf + p->bufTotal, src, size); + *srcLen += size; + src += size; + srcRem -= size; + p->bufTotal += size; + } + if (p->bufTotal == 0) + break; + + p->bufConv = p->filter_func(&p->base, p->buf, p->bufTotal); + + if (p->bufConv == 0) + { + if (!srcWasFinished) + break; + p->bufConv = p->bufTotal; + } + } + + if (p->bufTotal == p->bufPos && srcRem == 0 && srcWasFinished) + { + *status = CODER_STATUS_FINISHED_WITH_MARK; + // *wasFinished = 1; + } + + return SZ_OK; +} + + +#define XZ_IS_SUPPORTED_FILTER_ID(id) \ + ((id) >= XZ_ID_Delta && (id) <= XZ_ID_RISCV) + +SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id, + Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc) +{ + CXzBcFilterState *decoder; + if (!XZ_IS_SUPPORTED_FILTER_ID(id)) + return SZ_ERROR_UNSUPPORTED; + decoder = (CXzBcFilterState *)p->p; + if (!decoder) + { + decoder = (CXzBcFilterState *)ISzAlloc_Alloc(alloc, sizeof(CXzBcFilterState)); + if (!decoder) + return SZ_ERROR_MEM; + decoder->buf = ISzAlloc_Alloc(alloc, BRA_BUF_SIZE); + if (!decoder->buf) + { + ISzAlloc_Free(alloc, decoder); + return SZ_ERROR_MEM; + } + p->p = decoder; + p->Free = XzBcFilterState_Free; + p->SetProps = XzBcFilterState_SetProps; + p->Init = XzBcFilterState_Init; + p->Code2 = XzBcFilterState_Code2; + p->Filter = XzBcFilterState_Filter; + decoder->filter_func = func; + } + decoder->base.methodId = (UInt32)id; + // decoder->encodeMode = encodeMode; + return SZ_OK; +} + + + +/* ---------- SbState ---------- */ + +#ifdef USE_SUBBLOCK + +static void SbState_Free(void *pp, ISzAllocPtr alloc) +{ + CSbDec *p = (CSbDec *)pp; + SbDec_Free(p); + ISzAlloc_Free(alloc, pp); +} + +static SRes SbState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) +{ + UNUSED_VAR(pp) + UNUSED_VAR(props) + UNUSED_VAR(alloc) + return (propSize == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED; +} + +static void SbState_Init(void *pp) +{ + SbDec_Init((CSbDec *)pp); +} + +static SRes SbState_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + int srcWasFinished, ECoderFinishMode finishMode, + // int *wasFinished + ECoderStatus *status) +{ + CSbDec *p = (CSbDec *)pp; + SRes res; + UNUSED_VAR(srcWasFinished) + p->dest = dest; + p->destLen = *destLen; + p->src = src; + p->srcLen = *srcLen; + p->finish = finishMode; /* change it */ + res = SbDec_Decode((CSbDec *)pp); + *destLen -= p->destLen; + *srcLen -= p->srcLen; + // *wasFinished = (*destLen == 0 && *srcLen == 0); /* change it */ + *status = (*destLen == 0 && *srcLen == 0) ? + CODER_STATUS_FINISHED_WITH_MARK : + CODER_STATUS_NOT_FINISHED; + return res; +} + +static SRes SbState_SetFromMethod(IStateCoder *p, ISzAllocPtr alloc) +{ + CSbDec *decoder = (CSbDec *)p->p; + if (!decoder) + { + decoder = (CSbDec *)ISzAlloc_Alloc(alloc, sizeof(CSbDec)); + if (!decoder) + return SZ_ERROR_MEM; + p->p = decoder; + p->Free = SbState_Free; + p->SetProps = SbState_SetProps; + p->Init = SbState_Init; + p->Code2 = SbState_Code2; + p->Filter = NULL; + } + SbDec_Construct(decoder); + SbDec_SetAlloc(decoder, alloc); + return SZ_OK; +} + +#endif + + + +/* ---------- Lzma2 ---------- */ + +typedef struct +{ + CLzma2Dec decoder; + BoolInt outBufMode; +} CLzma2Dec_Spec; + + +static void Lzma2State_Free(void *pp, ISzAllocPtr alloc) +{ + CLzma2Dec_Spec *p = (CLzma2Dec_Spec *)pp; + if (p->outBufMode) + Lzma2Dec_FreeProbs(&p->decoder, alloc); + else + Lzma2Dec_Free(&p->decoder, alloc); + ISzAlloc_Free(alloc, pp); +} + +static SRes Lzma2State_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) +{ + if (propSize != 1) + return SZ_ERROR_UNSUPPORTED; + { + CLzma2Dec_Spec *p = (CLzma2Dec_Spec *)pp; + if (p->outBufMode) + return Lzma2Dec_AllocateProbs(&p->decoder, props[0], alloc); + else + return Lzma2Dec_Allocate(&p->decoder, props[0], alloc); + } +} + +static void Lzma2State_Init(void *pp) +{ + Lzma2Dec_Init(&((CLzma2Dec_Spec *)pp)->decoder); +} + + +/* + if (outBufMode), then (dest) is not used. Use NULL. + Data is unpacked to (spec->decoder.decoder.dic) output buffer. +*/ + +static SRes Lzma2State_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + int srcWasFinished, ECoderFinishMode finishMode, + // int *wasFinished, + ECoderStatus *status) +{ + CLzma2Dec_Spec *spec = (CLzma2Dec_Spec *)pp; + ELzmaStatus status2; + /* ELzmaFinishMode fm = (finishMode == LZMA_FINISH_ANY) ? LZMA_FINISH_ANY : LZMA_FINISH_END; */ + SRes res; + UNUSED_VAR(srcWasFinished) + if (spec->outBufMode) + { + SizeT dicPos = spec->decoder.decoder.dicPos; + SizeT dicLimit = dicPos + *destLen; + res = Lzma2Dec_DecodeToDic(&spec->decoder, dicLimit, src, srcLen, (ELzmaFinishMode)finishMode, &status2); + *destLen = spec->decoder.decoder.dicPos - dicPos; + } + else + res = Lzma2Dec_DecodeToBuf(&spec->decoder, dest, destLen, src, srcLen, (ELzmaFinishMode)finishMode, &status2); + // *wasFinished = (status2 == LZMA_STATUS_FINISHED_WITH_MARK); + // ECoderStatus values are identical to ELzmaStatus values of LZMA2 decoder + *status = (ECoderStatus)status2; + return res; +} + + +static SRes Lzma2State_SetFromMethod(IStateCoder *p, Byte *outBuf, size_t outBufSize, ISzAllocPtr alloc) +{ + CLzma2Dec_Spec *spec = (CLzma2Dec_Spec *)p->p; + if (!spec) + { + spec = (CLzma2Dec_Spec *)ISzAlloc_Alloc(alloc, sizeof(CLzma2Dec_Spec)); + if (!spec) + return SZ_ERROR_MEM; + p->p = spec; + p->Free = Lzma2State_Free; + p->SetProps = Lzma2State_SetProps; + p->Init = Lzma2State_Init; + p->Code2 = Lzma2State_Code2; + p->Filter = NULL; + Lzma2Dec_CONSTRUCT(&spec->decoder) + } + spec->outBufMode = False; + if (outBuf) + { + spec->outBufMode = True; + spec->decoder.decoder.dic = outBuf; + spec->decoder.decoder.dicBufSize = outBufSize; + } + return SZ_OK; +} + + +static SRes Lzma2State_ResetOutBuf(IStateCoder *p, Byte *outBuf, size_t outBufSize) +{ + CLzma2Dec_Spec *spec = (CLzma2Dec_Spec *)p->p; + if ((spec->outBufMode && !outBuf) || (!spec->outBufMode && outBuf)) + return SZ_ERROR_FAIL; + if (outBuf) + { + spec->decoder.decoder.dic = outBuf; + spec->decoder.decoder.dicBufSize = outBufSize; + } + return SZ_OK; +} + + + +static void MixCoder_Construct(CMixCoder *p, ISzAllocPtr alloc) +{ + unsigned i; + p->alloc = alloc; + p->buf = NULL; + p->numCoders = 0; + + p->outBufSize = 0; + p->outBuf = NULL; + // p->SingleBufMode = False; + + for (i = 0; i < MIXCODER_NUM_FILTERS_MAX; i++) + p->coders[i].p = NULL; +} + + +static void MixCoder_Free(CMixCoder *p) +{ + unsigned i; + p->numCoders = 0; + for (i = 0; i < MIXCODER_NUM_FILTERS_MAX; i++) + { + IStateCoder *sc = &p->coders[i]; + if (sc->p) + { + sc->Free(sc->p, p->alloc); + sc->p = NULL; + } + } + if (p->buf) + { + ISzAlloc_Free(p->alloc, p->buf); + p->buf = NULL; /* 9.31: the BUG was fixed */ + } +} + +static void MixCoder_Init(CMixCoder *p) +{ + unsigned i; + for (i = 0; i < MIXCODER_NUM_FILTERS_MAX - 1; i++) + { + p->size[i] = 0; + p->pos[i] = 0; + p->finished[i] = 0; + } + for (i = 0; i < p->numCoders; i++) + { + IStateCoder *coder = &p->coders[i]; + coder->Init(coder->p); + p->results[i] = SZ_OK; + } + p->outWritten = 0; + p->wasFinished = False; + p->res = SZ_OK; + p->status = CODER_STATUS_NOT_SPECIFIED; +} + + +static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize) +{ + IStateCoder *sc = &p->coders[coderIndex]; + p->ids[coderIndex] = methodId; + if (methodId == XZ_ID_LZMA2) + return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc); +#ifdef USE_SUBBLOCK + if (methodId == XZ_ID_Subblock) + return SbState_SetFromMethod(sc, p->alloc); +#endif + if (coderIndex == 0) + return SZ_ERROR_UNSUPPORTED; + return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId, + XzBcFilterStateBase_Filter_Dec, p->alloc); +} + + +static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize) +{ + IStateCoder *sc = &p->coders[coderIndex]; + if (methodId == XZ_ID_LZMA2) + return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize); + return SZ_ERROR_UNSUPPORTED; +} + + + +/* + if (destFinish) - then unpack data block is finished at (*destLen) position, + and we can return data that were not processed by filter + +output (status) can be : + CODER_STATUS_NOT_FINISHED + CODER_STATUS_FINISHED_WITH_MARK + CODER_STATUS_NEEDS_MORE_INPUT - not implemented still +*/ + +static SRes MixCoder_Code(CMixCoder *p, + Byte *dest, SizeT *destLen, int destFinish, + const Byte *src, SizeT *srcLen, int srcWasFinished, + ECoderFinishMode finishMode) +{ + SizeT destLenOrig = *destLen; + SizeT srcLenOrig = *srcLen; + + *destLen = 0; + *srcLen = 0; + + if (p->wasFinished) + return p->res; + + p->status = CODER_STATUS_NOT_FINISHED; + + // if (p->SingleBufMode) + if (p->outBuf) + { + SRes res; + SizeT destLen2, srcLen2; + int wasFinished; + + PRF_STR("------- MixCoder Single ----------") + + srcLen2 = srcLenOrig; + destLen2 = destLenOrig; + + { + IStateCoder *coder = &p->coders[0]; + res = coder->Code2(coder->p, NULL, &destLen2, src, &srcLen2, srcWasFinished, finishMode, + // &wasFinished, + &p->status); + wasFinished = (p->status == CODER_STATUS_FINISHED_WITH_MARK); + } + + p->res = res; + + /* + if (wasFinished) + p->status = CODER_STATUS_FINISHED_WITH_MARK; + else + { + if (res == SZ_OK) + if (destLen2 != destLenOrig) + p->status = CODER_STATUS_NEEDS_MORE_INPUT; + } + */ + + + *srcLen = srcLen2; + src += srcLen2; + p->outWritten += destLen2; + + if (res != SZ_OK || srcWasFinished || wasFinished) + p->wasFinished = True; + + if (p->numCoders == 1) + *destLen = destLen2; + else if (p->wasFinished) + { + unsigned i; + size_t processed = p->outWritten; + + for (i = 1; i < p->numCoders; i++) + { + IStateCoder *coder = &p->coders[i]; + processed = coder->Filter(coder->p, p->outBuf, processed); + if (wasFinished || (destFinish && p->outWritten == destLenOrig)) + processed = p->outWritten; + PRF_STR_INT("filter", i) + } + *destLen = processed; + } + return res; + } + + PRF_STR("standard mix") + + if (p->numCoders != 1) + { + if (!p->buf) + { + p->buf = (Byte *)ISzAlloc_Alloc(p->alloc, CODER_BUF_SIZE * (MIXCODER_NUM_FILTERS_MAX - 1)); + if (!p->buf) + return SZ_ERROR_MEM; + } + + finishMode = CODER_FINISH_ANY; + } + + for (;;) + { + BoolInt processed = False; + BoolInt allFinished = True; + SRes resMain = SZ_OK; + unsigned i; + + p->status = CODER_STATUS_NOT_FINISHED; + /* + if (p->numCoders == 1 && *destLen == destLenOrig && finishMode == LZMA_FINISH_ANY) + break; + */ + + for (i = 0; i < p->numCoders; i++) + { + SRes res; + IStateCoder *coder = &p->coders[i]; + Byte *dest2; + SizeT destLen2, srcLen2; // destLen2_Orig; + const Byte *src2; + int srcFinished2; + int encodingWasFinished; + ECoderStatus status2; + + if (i == 0) + { + src2 = src; + srcLen2 = srcLenOrig - *srcLen; + srcFinished2 = srcWasFinished; + } + else + { + size_t k = i - 1; + src2 = p->buf + (CODER_BUF_SIZE * k) + p->pos[k]; + srcLen2 = p->size[k] - p->pos[k]; + srcFinished2 = p->finished[k]; + } + + if (i == p->numCoders - 1) + { + dest2 = dest; + destLen2 = destLenOrig - *destLen; + } + else + { + if (p->pos[i] != p->size[i]) + continue; + dest2 = p->buf + (CODER_BUF_SIZE * i); + destLen2 = CODER_BUF_SIZE; + } + + // destLen2_Orig = destLen2; + + if (p->results[i] != SZ_OK) + { + if (resMain == SZ_OK) + resMain = p->results[i]; + continue; + } + + res = coder->Code2(coder->p, + dest2, &destLen2, + src2, &srcLen2, srcFinished2, + finishMode, + // &encodingWasFinished, + &status2); + + if (res != SZ_OK) + { + p->results[i] = res; + if (resMain == SZ_OK) + resMain = res; + } + + encodingWasFinished = (status2 == CODER_STATUS_FINISHED_WITH_MARK); + + if (!encodingWasFinished) + { + allFinished = False; + if (p->numCoders == 1 && res == SZ_OK) + p->status = status2; + } + + if (i == 0) + { + *srcLen += srcLen2; + src += srcLen2; + } + else + p->pos[(size_t)i - 1] += srcLen2; + + if (i == p->numCoders - 1) + { + *destLen += destLen2; + dest += destLen2; + } + else + { + p->size[i] = destLen2; + p->pos[i] = 0; + p->finished[i] = encodingWasFinished; + } + + if (destLen2 != 0 || srcLen2 != 0) + processed = True; + } + + if (!processed) + { + if (allFinished) + p->status = CODER_STATUS_FINISHED_WITH_MARK; + return resMain; + } + } +} + + +SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf) +{ + *p = (CXzStreamFlags)GetBe16(buf + XZ_SIG_SIZE); + if (CrcCalc(buf + XZ_SIG_SIZE, XZ_STREAM_FLAGS_SIZE) != + GetUi32(buf + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE)) + return SZ_ERROR_NO_ARCHIVE; + return XzFlags_IsSupported(*p) ? SZ_OK : SZ_ERROR_UNSUPPORTED; +} + +static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf) +{ + return indexSize == (((UInt64)GetUi32(buf + 4) + 1) << 2) + && GetUi32(buf) == CrcCalc(buf + 4, 6) + && flags == GetBe16(buf + 8) + && buf[10] == XZ_FOOTER_SIG_0 + && buf[11] == XZ_FOOTER_SIG_1; +} + +#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ + { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ + if (s == 0) return SZ_ERROR_ARCHIVE; \ + pos += s; } + + +static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p) +{ + const unsigned numFilters = XzBlock_GetNumFilters(p) - 1; + unsigned i; + { + const CXzFilter *f = &p->filters[numFilters]; + if (f->id != XZ_ID_LZMA2 || f->propsSize != 1 || f->props[0] > 40) + return False; + } + + for (i = 0; i < numFilters; i++) + { + const CXzFilter *f = &p->filters[i]; + if (f->id == XZ_ID_Delta) + { + if (f->propsSize != 1) + return False; + } + else if (!XZ_IS_SUPPORTED_FILTER_ID(f->id) + || (f->propsSize != 0 && f->propsSize != 4)) + return False; + } + return True; +} + + +SRes XzBlock_Parse(CXzBlock *p, const Byte *header) +{ + unsigned pos; + unsigned numFilters, i; + unsigned headerSize = (unsigned)header[0] << 2; + + /* (headerSize != 0) : another code checks */ + + if (CrcCalc(header, headerSize) != GetUi32(header + headerSize)) + return SZ_ERROR_ARCHIVE; + + pos = 1; + p->flags = header[pos++]; + + p->packSize = (UInt64)(Int64)-1; + if (XzBlock_HasPackSize(p)) + { + READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize) + if (p->packSize == 0 || p->packSize + headerSize >= (UInt64)1 << 63) + return SZ_ERROR_ARCHIVE; + } + + p->unpackSize = (UInt64)(Int64)-1; + if (XzBlock_HasUnpackSize(p)) + { + READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize) + } + + numFilters = XzBlock_GetNumFilters(p); + for (i = 0; i < numFilters; i++) + { + CXzFilter *filter = p->filters + i; + UInt64 size; + READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id) + READ_VARINT_AND_CHECK(header, pos, headerSize, &size) + if (size > headerSize - pos || size > XZ_FILTER_PROPS_SIZE_MAX) + return SZ_ERROR_ARCHIVE; + filter->propsSize = (UInt32)size; + memcpy(filter->props, header + pos, (size_t)size); + pos += (unsigned)size; + + #ifdef XZ_DUMP + printf("\nf[%u] = %2X: ", i, (unsigned)filter->id); + { + unsigned i; + for (i = 0; i < size; i++) + printf(" %2X", filter->props[i]); + } + #endif + } + + if (XzBlock_HasUnsupportedFlags(p)) + return SZ_ERROR_UNSUPPORTED; + + while (pos < headerSize) + if (header[pos++] != 0) + return SZ_ERROR_ARCHIVE; + return SZ_OK; +} + + + + +static SRes XzDecMix_Init(CMixCoder *p, const CXzBlock *block, Byte *outBuf, size_t outBufSize) +{ + unsigned i; + BoolInt needReInit = True; + unsigned numFilters = XzBlock_GetNumFilters(block); + + if (numFilters == p->numCoders && ((p->outBuf && outBuf) || (!p->outBuf && !outBuf))) + { + needReInit = False; + for (i = 0; i < numFilters; i++) + if (p->ids[i] != block->filters[numFilters - 1 - i].id) + { + needReInit = True; + break; + } + } + + // p->SingleBufMode = (outBuf != NULL); + p->outBuf = outBuf; + p->outBufSize = outBufSize; + + // p->SingleBufMode = False; + // outBuf = NULL; + + if (needReInit) + { + MixCoder_Free(p); + for (i = 0; i < numFilters; i++) + { + RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize)) + } + p->numCoders = numFilters; + } + else + { + RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize)) + } + + for (i = 0; i < numFilters; i++) + { + const CXzFilter *f = &block->filters[numFilters - 1 - i]; + IStateCoder *sc = &p->coders[i]; + RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc)) + } + + MixCoder_Init(p); + return SZ_OK; +} + + + +void XzUnpacker_Init(CXzUnpacker *p) +{ + p->state = XZ_STATE_STREAM_HEADER; + p->pos = 0; + p->numStartedStreams = 0; + p->numFinishedStreams = 0; + p->numTotalBlocks = 0; + p->padSize = 0; + p->decodeOnlyOneBlock = 0; + + p->parseMode = False; + p->decodeToStreamSignature = False; + + // p->outBuf = NULL; + // p->outBufSize = 0; + p->outDataWritten = 0; +} + + +void XzUnpacker_SetOutBuf(CXzUnpacker *p, Byte *outBuf, size_t outBufSize) +{ + p->outBuf = outBuf; + p->outBufSize = outBufSize; +} + + +void XzUnpacker_Construct(CXzUnpacker *p, ISzAllocPtr alloc) +{ + MixCoder_Construct(&p->decoder, alloc); + p->outBuf = NULL; + p->outBufSize = 0; + XzUnpacker_Init(p); +} + + +void XzUnpacker_Free(CXzUnpacker *p) +{ + MixCoder_Free(&p->decoder); +} + + +void XzUnpacker_PrepareToRandomBlockDecoding(CXzUnpacker *p) +{ + p->indexSize = 0; + p->numBlocks = 0; + Sha256_Init(&p->sha); + p->state = XZ_STATE_BLOCK_HEADER; + p->pos = 0; + p->decodeOnlyOneBlock = 1; +} + + +static void XzUnpacker_UpdateIndex(CXzUnpacker *p, UInt64 packSize, UInt64 unpackSize) +{ + Byte temp[32]; + unsigned num = Xz_WriteVarInt(temp, packSize); + num += Xz_WriteVarInt(temp + num, unpackSize); + Sha256_Update(&p->sha, temp, num); + p->indexSize += num; + p->numBlocks++; +} + + + +SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, int srcFinished, + ECoderFinishMode finishMode, ECoderStatus *status) +{ + SizeT destLenOrig = *destLen; + SizeT srcLenOrig = *srcLen; + *destLen = 0; + *srcLen = 0; + *status = CODER_STATUS_NOT_SPECIFIED; + + for (;;) + { + SizeT srcRem; + + if (p->state == XZ_STATE_BLOCK) + { + SizeT destLen2 = destLenOrig - *destLen; + SizeT srcLen2 = srcLenOrig - *srcLen; + SRes res; + + ECoderFinishMode finishMode2 = finishMode; + BoolInt srcFinished2 = (BoolInt)srcFinished; + BoolInt destFinish = False; + + if (p->block.packSize != (UInt64)(Int64)-1) + { + UInt64 rem = p->block.packSize - p->packSize; + if (srcLen2 >= rem) + { + srcFinished2 = True; + srcLen2 = (SizeT)rem; + } + if (rem == 0 && p->block.unpackSize == p->unpackSize) + return SZ_ERROR_DATA; + } + + if (p->block.unpackSize != (UInt64)(Int64)-1) + { + UInt64 rem = p->block.unpackSize - p->unpackSize; + if (destLen2 >= rem) + { + destFinish = True; + finishMode2 = CODER_FINISH_END; + destLen2 = (SizeT)rem; + } + } + + /* + if (srcLen2 == 0 && destLen2 == 0) + { + *status = CODER_STATUS_NOT_FINISHED; + return SZ_OK; + } + */ + + { + res = MixCoder_Code(&p->decoder, + (p->outBuf ? NULL : dest), &destLen2, destFinish, + src, &srcLen2, srcFinished2, + finishMode2); + + *status = p->decoder.status; + XzCheck_Update(&p->check, (p->outBuf ? p->outBuf + p->outDataWritten : dest), destLen2); + if (!p->outBuf) + dest += destLen2; + p->outDataWritten += destLen2; + } + + (*srcLen) += srcLen2; + src += srcLen2; + p->packSize += srcLen2; + (*destLen) += destLen2; + p->unpackSize += destLen2; + + RINOK(res) + + if (*status != CODER_STATUS_FINISHED_WITH_MARK) + { + if (p->block.packSize == p->packSize + && *status == CODER_STATUS_NEEDS_MORE_INPUT) + { + PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT") + *status = CODER_STATUS_NOT_SPECIFIED; + return SZ_ERROR_DATA; + } + + return SZ_OK; + } + { + XzUnpacker_UpdateIndex(p, XzUnpacker_GetPackSizeForIndex(p), p->unpackSize); + p->state = XZ_STATE_BLOCK_FOOTER; + p->pos = 0; + p->alignPos = 0; + *status = CODER_STATUS_NOT_SPECIFIED; + + if ((p->block.packSize != (UInt64)(Int64)-1 && p->block.packSize != p->packSize) + || (p->block.unpackSize != (UInt64)(Int64)-1 && p->block.unpackSize != p->unpackSize)) + { + PRF_STR("ERROR: block.size mismatch") + return SZ_ERROR_DATA; + } + } + // continue; + } + + srcRem = srcLenOrig - *srcLen; + + // XZ_STATE_BLOCK_FOOTER can transit to XZ_STATE_BLOCK_HEADER without input bytes + if (srcRem == 0 && p->state != XZ_STATE_BLOCK_FOOTER) + { + *status = CODER_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + switch ((int)p->state) + { + case XZ_STATE_STREAM_HEADER: + { + if (p->pos < XZ_STREAM_HEADER_SIZE) + { + if (p->pos < XZ_SIG_SIZE && *src != XZ_SIG[p->pos]) + return SZ_ERROR_NO_ARCHIVE; + if (p->decodeToStreamSignature) + return SZ_OK; + p->buf[p->pos++] = *src++; + (*srcLen)++; + } + else + { + RINOK(Xz_ParseHeader(&p->streamFlags, p->buf)) + p->numStartedStreams++; + p->indexSize = 0; + p->numBlocks = 0; + Sha256_Init(&p->sha); + p->state = XZ_STATE_BLOCK_HEADER; + p->pos = 0; + } + break; + } + + case XZ_STATE_BLOCK_HEADER: + { + if (p->pos == 0) + { + p->buf[p->pos++] = *src++; + (*srcLen)++; + if (p->buf[0] == 0) + { + if (p->decodeOnlyOneBlock) + return SZ_ERROR_DATA; + p->indexPreSize = 1 + Xz_WriteVarInt(p->buf + 1, p->numBlocks); + p->indexPos = p->indexPreSize; + p->indexSize += p->indexPreSize; + Sha256_Final(&p->sha, p->shaDigest); + Sha256_Init(&p->sha); + p->crc = CrcUpdate(CRC_INIT_VAL, p->buf, p->indexPreSize); + p->state = XZ_STATE_STREAM_INDEX; + break; + } + p->blockHeaderSize = ((unsigned)p->buf[0] << 2) + 4; + break; + } + + if (p->pos != p->blockHeaderSize) + { + unsigned cur = p->blockHeaderSize - p->pos; + if (cur > srcRem) + cur = (unsigned)srcRem; + memcpy(p->buf + p->pos, src, cur); + p->pos += cur; + (*srcLen) += cur; + src += cur; + } + else + { + RINOK(XzBlock_Parse(&p->block, p->buf)) + if (!XzBlock_AreSupportedFilters(&p->block)) + return SZ_ERROR_UNSUPPORTED; + p->numTotalBlocks++; + p->state = XZ_STATE_BLOCK; + p->packSize = 0; + p->unpackSize = 0; + XzCheck_Init(&p->check, XzFlags_GetCheckType(p->streamFlags)); + if (p->parseMode) + { + p->headerParsedOk = True; + return SZ_OK; + } + RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize)) + } + break; + } + + case XZ_STATE_BLOCK_FOOTER: + { + if ((((unsigned)p->packSize + p->alignPos) & 3) != 0) + { + if (srcRem == 0) + { + *status = CODER_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + (*srcLen)++; + p->alignPos++; + if (*src++ != 0) + return SZ_ERROR_CRC; + } + else + { + const unsigned checkSize = XzFlags_GetCheckSize(p->streamFlags); + unsigned cur = checkSize - p->pos; + if (cur != 0) + { + if (srcRem == 0) + { + *status = CODER_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (cur > srcRem) + cur = (unsigned)srcRem; + memcpy(p->buf + p->pos, src, cur); + p->pos += cur; + (*srcLen) += cur; + src += cur; + if (checkSize != p->pos) + break; + } + { + Byte digest[XZ_CHECK_SIZE_MAX]; + p->state = XZ_STATE_BLOCK_HEADER; + p->pos = 0; + if (XzCheck_Final(&p->check, digest) && memcmp(digest, p->buf, checkSize) != 0) + return SZ_ERROR_CRC; + if (p->decodeOnlyOneBlock) + { + *status = CODER_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + } + } + break; + } + + case XZ_STATE_STREAM_INDEX: + { + if (p->pos < p->indexPreSize) + { + (*srcLen)++; + if (*src++ != p->buf[p->pos++]) + return SZ_ERROR_CRC; + } + else + { + if (p->indexPos < p->indexSize) + { + UInt64 cur = p->indexSize - p->indexPos; + if (srcRem > cur) + srcRem = (SizeT)cur; + p->crc = CrcUpdate(p->crc, src, srcRem); + Sha256_Update(&p->sha, src, srcRem); + (*srcLen) += srcRem; + src += srcRem; + p->indexPos += srcRem; + } + else if ((p->indexPos & 3) != 0) + { + Byte b = *src++; + p->crc = CRC_UPDATE_BYTE(p->crc, b); + (*srcLen)++; + p->indexPos++; + p->indexSize++; + if (b != 0) + return SZ_ERROR_CRC; + } + else + { + Byte digest[SHA256_DIGEST_SIZE]; + p->state = XZ_STATE_STREAM_INDEX_CRC; + p->indexSize += 4; + p->pos = 0; + Sha256_Final(&p->sha, digest); + if (memcmp(digest, p->shaDigest, SHA256_DIGEST_SIZE) != 0) + return SZ_ERROR_CRC; + } + } + break; + } + + case XZ_STATE_STREAM_INDEX_CRC: + { + if (p->pos < 4) + { + (*srcLen)++; + p->buf[p->pos++] = *src++; + } + else + { + const Byte *ptr = p->buf; + p->state = XZ_STATE_STREAM_FOOTER; + p->pos = 0; + if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr)) + return SZ_ERROR_CRC; + } + break; + } + + case XZ_STATE_STREAM_FOOTER: + { + unsigned cur = XZ_STREAM_FOOTER_SIZE - p->pos; + if (cur > srcRem) + cur = (unsigned)srcRem; + memcpy(p->buf + p->pos, src, cur); + p->pos += cur; + (*srcLen) += cur; + src += cur; + if (p->pos == XZ_STREAM_FOOTER_SIZE) + { + p->state = XZ_STATE_STREAM_PADDING; + p->numFinishedStreams++; + p->padSize = 0; + if (!Xz_CheckFooter(p->streamFlags, p->indexSize, p->buf)) + return SZ_ERROR_CRC; + } + break; + } + + case XZ_STATE_STREAM_PADDING: + { + if (*src != 0) + { + if (((UInt32)p->padSize & 3) != 0) + return SZ_ERROR_NO_ARCHIVE; + p->pos = 0; + p->state = XZ_STATE_STREAM_HEADER; + } + else + { + (*srcLen)++; + src++; + p->padSize++; + } + break; + } + + case XZ_STATE_BLOCK: break; /* to disable GCC warning */ + + default: return SZ_ERROR_FAIL; + } + } + /* + if (p->state == XZ_STATE_FINISHED) + *status = CODER_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + */ +} + + +SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, + ECoderFinishMode finishMode, ECoderStatus *status) +{ + XzUnpacker_Init(p); + XzUnpacker_SetOutBuf(p, dest, *destLen); + + return XzUnpacker_Code(p, + NULL, destLen, + src, srcLen, True, + finishMode, status); +} + + +BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p) +{ + return (p->state == XZ_STATE_BLOCK_HEADER) && (p->pos == 0); +} + +BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p) +{ + return (p->state == XZ_STATE_STREAM_PADDING) && (((UInt32)p->padSize & 3) == 0); +} + +UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p) +{ + UInt64 num = 0; + if (p->state == XZ_STATE_STREAM_PADDING) + num = p->padSize; + else if (p->state == XZ_STATE_STREAM_HEADER) + num = p->padSize + p->pos; + return num; +} + + + + + + + + + + + + + + + + + + + + + +#ifndef Z7_ST +#include "MtDec.h" +#endif + + +void XzDecMtProps_Init(CXzDecMtProps *p) +{ + p->inBufSize_ST = 1 << 18; + p->outStep_ST = 1 << 20; + p->ignoreErrors = False; + + #ifndef Z7_ST + p->numThreads = 1; + p->inBufSize_MT = 1 << 18; + p->memUseMax = sizeof(size_t) << 28; + #endif +} + + + +#ifndef Z7_ST + +/* ---------- CXzDecMtThread ---------- */ + +typedef struct +{ + Byte *outBuf; + size_t outBufSize; + size_t outPreSize; + size_t inPreSize; + size_t inPreHeaderSize; + size_t blockPackSize_for_Index; // including block header and checksum. + size_t blockPackTotal; // including stream header, block header and checksum. + size_t inCodeSize; + size_t outCodeSize; + ECoderStatus status; + SRes codeRes; + BoolInt skipMode; + // BoolInt finishedWithMark; + EMtDecParseState parseState; + BoolInt parsing_Truncated; + BoolInt atBlockHeader; + CXzStreamFlags streamFlags; + // UInt64 numFinishedStreams + UInt64 numStreams; + UInt64 numTotalBlocks; + UInt64 numBlocks; + + BoolInt dec_created; + CXzUnpacker dec; + + Byte mtPad[1 << 7]; +} CXzDecMtThread; + +#endif + + +/* ---------- CXzDecMt ---------- */ + +struct CXzDecMt +{ + CAlignOffsetAlloc alignOffsetAlloc; + ISzAllocPtr allocMid; + + CXzDecMtProps props; + size_t unpackBlockMaxSize; + + ISeqInStreamPtr inStream; + ISeqOutStreamPtr outStream; + ICompressProgressPtr progress; + + BoolInt finishMode; + BoolInt outSize_Defined; + UInt64 outSize; + + UInt64 outProcessed; + UInt64 inProcessed; + UInt64 readProcessed; + BoolInt readWasFinished; + SRes readRes; + SRes writeRes; + + Byte *outBuf; + size_t outBufSize; + Byte *inBuf; + size_t inBufSize; + + CXzUnpacker dec; + + ECoderStatus status; + SRes codeRes; + + #ifndef Z7_ST + BoolInt mainDecoderWasCalled; + // int statErrorDefined; + int finishedDecoderIndex; + + // global values that are used in Parse stage + CXzStreamFlags streamFlags; + // UInt64 numFinishedStreams + UInt64 numStreams; + UInt64 numTotalBlocks; + UInt64 numBlocks; + + // UInt64 numBadBlocks; + SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage + // it can be = SZ_ERROR_INPUT_EOF + // it can be = SZ_ERROR_DATA, in some another cases + BoolInt isBlockHeaderState_Parse; + BoolInt isBlockHeaderState_Write; + UInt64 outProcessed_Parse; + BoolInt parsing_Truncated; + + BoolInt mtc_WasConstructed; + CMtDec mtc; + CXzDecMtThread coders[MTDEC_THREADS_MAX]; + #endif +}; + + + +CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid) +{ + CXzDecMt *p = (CXzDecMt *)ISzAlloc_Alloc(alloc, sizeof(CXzDecMt)); + if (!p) + return NULL; + + AlignOffsetAlloc_CreateVTable(&p->alignOffsetAlloc); + p->alignOffsetAlloc.baseAlloc = alloc; + p->alignOffsetAlloc.numAlignBits = 7; + p->alignOffsetAlloc.offset = 0; + + p->allocMid = allocMid; + + p->outBuf = NULL; + p->outBufSize = 0; + p->inBuf = NULL; + p->inBufSize = 0; + + XzUnpacker_Construct(&p->dec, &p->alignOffsetAlloc.vt); + + p->unpackBlockMaxSize = 0; + + XzDecMtProps_Init(&p->props); + + #ifndef Z7_ST + p->mtc_WasConstructed = False; + { + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CXzDecMtThread *coder = &p->coders[i]; + coder->dec_created = False; + coder->outBuf = NULL; + coder->outBufSize = 0; + } + } + #endif + + return (CXzDecMtHandle)p; +} + + +#ifndef Z7_ST + +static void XzDecMt_FreeOutBufs(CXzDecMt *p) +{ + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CXzDecMtThread *coder = &p->coders[i]; + if (coder->outBuf) + { + ISzAlloc_Free(p->allocMid, coder->outBuf); + coder->outBuf = NULL; + coder->outBufSize = 0; + } + } + p->unpackBlockMaxSize = 0; +} + +#endif + + + +static void XzDecMt_FreeSt(CXzDecMt *p) +{ + XzUnpacker_Free(&p->dec); + + if (p->outBuf) + { + ISzAlloc_Free(p->allocMid, p->outBuf); + p->outBuf = NULL; + } + p->outBufSize = 0; + + if (p->inBuf) + { + ISzAlloc_Free(p->allocMid, p->inBuf); + p->inBuf = NULL; + } + p->inBufSize = 0; +} + + +// #define GET_CXzDecMt_p CXzDecMt *p = pp; + +void XzDecMt_Destroy(CXzDecMtHandle p) +{ + // GET_CXzDecMt_p + + XzDecMt_FreeSt(p); + + #ifndef Z7_ST + + if (p->mtc_WasConstructed) + { + MtDec_Destruct(&p->mtc); + p->mtc_WasConstructed = False; + } + { + unsigned i; + for (i = 0; i < MTDEC_THREADS_MAX; i++) + { + CXzDecMtThread *t = &p->coders[i]; + if (t->dec_created) + { + // we don't need to free dict here + XzUnpacker_Free(&t->dec); + t->dec_created = False; + } + } + } + XzDecMt_FreeOutBufs(p); + + #endif + + ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p); +} + + + +#ifndef Z7_ST + +static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) +{ + CXzDecMt *me = (CXzDecMt *)obj; + CXzDecMtThread *coder = &me->coders[coderIndex]; + size_t srcSize = cc->srcSize; + + cc->srcSize = 0; + cc->outPos = 0; + cc->state = MTDEC_PARSE_CONTINUE; + + cc->canCreateNewThread = True; + + if (cc->startCall) + { + coder->outPreSize = 0; + coder->inPreSize = 0; + coder->inPreHeaderSize = 0; + coder->parseState = MTDEC_PARSE_CONTINUE; + coder->parsing_Truncated = False; + coder->skipMode = False; + coder->codeRes = SZ_OK; + coder->status = CODER_STATUS_NOT_SPECIFIED; + coder->inCodeSize = 0; + coder->outCodeSize = 0; + + coder->numStreams = me->numStreams; + coder->numTotalBlocks = me->numTotalBlocks; + coder->numBlocks = me->numBlocks; + + if (!coder->dec_created) + { + XzUnpacker_Construct(&coder->dec, &me->alignOffsetAlloc.vt); + coder->dec_created = True; + } + + XzUnpacker_Init(&coder->dec); + + if (me->isBlockHeaderState_Parse) + { + coder->dec.streamFlags = me->streamFlags; + coder->atBlockHeader = True; + XzUnpacker_PrepareToRandomBlockDecoding(&coder->dec); + } + else + { + coder->atBlockHeader = False; + me->isBlockHeaderState_Parse = True; + } + + coder->dec.numStartedStreams = me->numStreams; + coder->dec.numTotalBlocks = me->numTotalBlocks; + coder->dec.numBlocks = me->numBlocks; + } + + while (!coder->skipMode) + { + ECoderStatus status; + SRes res; + size_t srcSize2 = srcSize; + size_t destSize = (size_t)0 - 1; + + coder->dec.parseMode = True; + coder->dec.headerParsedOk = False; + + PRF_STR_INT("Parse", srcSize2) + + res = XzUnpacker_Code(&coder->dec, + NULL, &destSize, + cc->src, &srcSize2, cc->srcFinished, + CODER_FINISH_END, &status); + + // PRF(printf(" res = %d, srcSize2 = %d", res, (unsigned)srcSize2)); + + coder->codeRes = res; + coder->status = status; + cc->srcSize += srcSize2; + srcSize -= srcSize2; + coder->inPreHeaderSize += srcSize2; + coder->inPreSize = coder->inPreHeaderSize; + + if (res != SZ_OK) + { + cc->state = + coder->parseState = MTDEC_PARSE_END; + /* + if (res == SZ_ERROR_MEM) + return res; + return SZ_OK; + */ + return; // res; + } + + if (coder->dec.headerParsedOk) + { + const CXzBlock *block = &coder->dec.block; + if (XzBlock_HasUnpackSize(block) + // && block->unpackSize <= me->props.outBlockMax + && XzBlock_HasPackSize(block)) + { + { + if (block->unpackSize * 2 * me->mtc.numStartedThreads > me->props.memUseMax) + { + cc->state = MTDEC_PARSE_OVERFLOW; + return; // SZ_OK; + } + } + { + const UInt64 packSize = block->packSize; + const UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3); + const unsigned checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags); + const UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize; + // if (blockPackSum <= me->props.inBlockMax) + // unpackBlockMaxSize + { + coder->blockPackSize_for_Index = (size_t)(coder->dec.blockHeaderSize + packSize + checkSize); + coder->blockPackTotal = (size_t)blockPackSum; + coder->outPreSize = (size_t)block->unpackSize; + coder->streamFlags = coder->dec.streamFlags; + me->streamFlags = coder->dec.streamFlags; + coder->skipMode = True; + break; + } + } + } + } + else + // if (coder->inPreSize <= me->props.inBlockMax) + { + if (!cc->srcFinished) + return; // SZ_OK; + cc->state = + coder->parseState = MTDEC_PARSE_END; + return; // SZ_OK; + } + cc->state = MTDEC_PARSE_OVERFLOW; + return; // SZ_OK; + } + + // ---------- skipMode ---------- + { + UInt64 rem = coder->blockPackTotal - coder->inPreSize; + size_t cur = srcSize; + if (cur > rem) + cur = (size_t)rem; + cc->srcSize += cur; + coder->inPreSize += cur; + srcSize -= cur; + + if (coder->inPreSize == coder->blockPackTotal) + { + if (srcSize == 0) + { + if (!cc->srcFinished) + return; // SZ_OK; + cc->state = MTDEC_PARSE_END; + } + else if ((cc->src)[cc->srcSize] == 0) // we check control byte of next block + cc->state = MTDEC_PARSE_END; + else + { + cc->state = MTDEC_PARSE_NEW; + + { + size_t blockMax = me->unpackBlockMaxSize; + if (blockMax < coder->outPreSize) + blockMax = coder->outPreSize; + { + UInt64 required = (UInt64)blockMax * (me->mtc.numStartedThreads + 1) * 2; + if (me->props.memUseMax < required) + cc->canCreateNewThread = False; + } + } + + if (me->outSize_Defined) + { + // next block can be zero size + const UInt64 rem2 = me->outSize - me->outProcessed_Parse; + if (rem2 < coder->outPreSize) + { + coder->parsing_Truncated = True; + cc->state = MTDEC_PARSE_END; + } + me->outProcessed_Parse += coder->outPreSize; + } + } + } + else if (cc->srcFinished) + cc->state = MTDEC_PARSE_END; + else + return; // SZ_OK; + + coder->parseState = cc->state; + cc->outPos = coder->outPreSize; + + me->numStreams = coder->dec.numStartedStreams; + me->numTotalBlocks = coder->dec.numTotalBlocks; + me->numBlocks = coder->dec.numBlocks + 1; + return; // SZ_OK; + } +} + + +static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex) +{ + CXzDecMt *me = (CXzDecMt *)pp; + CXzDecMtThread *coder = &me->coders[coderIndex]; + Byte *dest; + + if (!coder->dec.headerParsedOk) + return SZ_OK; + + dest = coder->outBuf; + + if (!dest || coder->outBufSize < coder->outPreSize) + { + if (dest) + { + ISzAlloc_Free(me->allocMid, dest); + coder->outBuf = NULL; + coder->outBufSize = 0; + } + { + size_t outPreSize = coder->outPreSize; + if (outPreSize == 0) + outPreSize = 1; + dest = (Byte *)ISzAlloc_Alloc(me->allocMid, outPreSize); + } + if (!dest) + return SZ_ERROR_MEM; + coder->outBuf = dest; + coder->outBufSize = coder->outPreSize; + + if (coder->outBufSize > me->unpackBlockMaxSize) + me->unpackBlockMaxSize = coder->outBufSize; + } + + // return SZ_ERROR_MEM; + + XzUnpacker_SetOutBuf(&coder->dec, coder->outBuf, coder->outBufSize); + + { + SRes res = XzDecMix_Init(&coder->dec.decoder, &coder->dec.block, coder->outBuf, coder->outBufSize); + // res = SZ_ERROR_UNSUPPORTED; // to test + coder->codeRes = res; + if (res != SZ_OK) + { + // if (res == SZ_ERROR_MEM) return res; + if (me->props.ignoreErrors && res != SZ_ERROR_MEM) + return SZ_OK; + return res; + } + } + + return SZ_OK; +} + + +static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex, + const Byte *src, size_t srcSize, int srcFinished, + // int finished, int blockFinished, + UInt64 *inCodePos, UInt64 *outCodePos, int *stop) +{ + CXzDecMt *me = (CXzDecMt *)pp; + CXzDecMtThread *coder = &me->coders[coderIndex]; + + *inCodePos = coder->inCodeSize; + *outCodePos = coder->outCodeSize; + *stop = True; + + if (srcSize > coder->inPreSize - coder->inCodeSize) + return SZ_ERROR_FAIL; + + if (coder->inCodeSize < coder->inPreHeaderSize) + { + size_t step = coder->inPreHeaderSize - coder->inCodeSize; + if (step > srcSize) + step = srcSize; + src += step; + srcSize -= step; + coder->inCodeSize += step; + *inCodePos = coder->inCodeSize; + if (coder->inCodeSize < coder->inPreHeaderSize) + { + *stop = False; + return SZ_OK; + } + } + + if (!coder->dec.headerParsedOk) + return SZ_OK; + if (!coder->outBuf) + return SZ_OK; + + if (coder->codeRes == SZ_OK) + { + ECoderStatus status; + SRes res; + size_t srcProcessed = srcSize; + size_t outSizeCur = coder->outPreSize - coder->dec.outDataWritten; + + // PRF(printf("\nCallback_Code: Code %d %d\n", (unsigned)srcSize, (unsigned)outSizeCur)); + + res = XzUnpacker_Code(&coder->dec, + NULL, &outSizeCur, + src, &srcProcessed, srcFinished, + // coder->finishedWithMark ? CODER_FINISH_END : CODER_FINISH_ANY, + CODER_FINISH_END, + &status); + + // PRF(printf(" res = %d, srcSize2 = %d, outSizeCur = %d", res, (unsigned)srcProcessed, (unsigned)outSizeCur)); + + coder->codeRes = res; + coder->status = status; + coder->inCodeSize += srcProcessed; + coder->outCodeSize = coder->dec.outDataWritten; + *inCodePos = coder->inCodeSize; + *outCodePos = coder->outCodeSize; + + if (res == SZ_OK) + { + if (srcProcessed == srcSize) + *stop = False; + return SZ_OK; + } + } + + if (me->props.ignoreErrors && coder->codeRes != SZ_ERROR_MEM) + { + *inCodePos = coder->inPreSize; + *outCodePos = coder->outPreSize; + return SZ_OK; + } + return coder->codeRes; +} + + +#define XZDECMT_STREAM_WRITE_STEP (1 << 24) + +static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex, + BoolInt needWriteToStream, + const Byte *src, size_t srcSize, BoolInt isCross, + // int srcFinished, + BoolInt *needContinue, + BoolInt *canRecode) +{ + CXzDecMt *me = (CXzDecMt *)pp; + const CXzDecMtThread *coder = &me->coders[coderIndex]; + + // PRF(printf("\nWrite processed = %d srcSize = %d\n", (unsigned)me->mtc.inProcessed, (unsigned)srcSize)); + + *needContinue = False; + *canRecode = True; + + if (!needWriteToStream) + return SZ_OK; + + if (!coder->dec.headerParsedOk || !coder->outBuf) + { + if (me->finishedDecoderIndex < 0) + me->finishedDecoderIndex = (int)coderIndex; + return SZ_OK; + } + + if (me->finishedDecoderIndex >= 0) + return SZ_OK; + + me->mtc.inProcessed += coder->inCodeSize; + + *canRecode = False; + + { + SRes res; + size_t size = coder->outCodeSize; + Byte *data = coder->outBuf; + + // we use in me->dec: sha, numBlocks, indexSize + + if (!me->isBlockHeaderState_Write) + { + XzUnpacker_PrepareToRandomBlockDecoding(&me->dec); + me->dec.decodeOnlyOneBlock = False; + me->dec.numStartedStreams = coder->dec.numStartedStreams; + me->dec.streamFlags = coder->streamFlags; + + me->isBlockHeaderState_Write = True; + } + + me->dec.numTotalBlocks = coder->dec.numTotalBlocks; + XzUnpacker_UpdateIndex(&me->dec, coder->blockPackSize_for_Index, coder->outPreSize); + + if (coder->outPreSize != size) + { + if (me->props.ignoreErrors) + { + memset(data + size, 0, coder->outPreSize - size); + size = coder->outPreSize; + } + // me->numBadBlocks++; + if (me->mainErrorCode == SZ_OK) + { + if ((int)coder->status == LZMA_STATUS_NEEDS_MORE_INPUT) + me->mainErrorCode = SZ_ERROR_INPUT_EOF; + else + me->mainErrorCode = SZ_ERROR_DATA; + } + } + + if (me->writeRes != SZ_OK) + return me->writeRes; + + res = SZ_OK; + { + if (me->outSize_Defined) + { + const UInt64 rem = me->outSize - me->outProcessed; + if (size > rem) + size = (SizeT)rem; + } + + for (;;) + { + size_t cur = size; + size_t written; + if (cur > XZDECMT_STREAM_WRITE_STEP) + cur = XZDECMT_STREAM_WRITE_STEP; + + written = ISeqOutStream_Write(me->outStream, data, cur); + + // PRF(printf("\nWritten ask = %d written = %d\n", (unsigned)cur, (unsigned)written)); + + me->outProcessed += written; + if (written != cur) + { + me->writeRes = SZ_ERROR_WRITE; + res = me->writeRes; + break; + } + data += cur; + size -= cur; + // PRF_STR_INT("Written size =", size) + if (size == 0) + break; + res = MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0); + if (res != SZ_OK) + break; + } + } + + if (coder->codeRes != SZ_OK) + if (!me->props.ignoreErrors) + { + me->finishedDecoderIndex = (int)coderIndex; + return res; + } + + RINOK(res) + + if (coder->inPreSize != coder->inCodeSize + || coder->blockPackTotal != coder->inCodeSize) + { + me->finishedDecoderIndex = (int)coderIndex; + return SZ_OK; + } + + if (coder->parseState != MTDEC_PARSE_END) + { + *needContinue = True; + return SZ_OK; + } + } + + // (coder->state == MTDEC_PARSE_END) means that there are no other working threads + // so we can use mtc variables without lock + + PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed) + + me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; + { + CXzUnpacker *dec = &me->dec; + + PRF_STR_INT("PostSingle", srcSize) + + { + size_t srcProcessed = srcSize; + ECoderStatus status; + size_t outSizeCur = 0; + SRes res; + + // dec->decodeOnlyOneBlock = False; + dec->decodeToStreamSignature = True; + + me->mainDecoderWasCalled = True; + + if (coder->parsing_Truncated) + { + me->parsing_Truncated = True; + return SZ_OK; + } + + /* + We have processed all xz-blocks of stream, + And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where + (src) is a pointer to xz-Index structure. + We finish reading of current xz-Stream, including Zero padding after xz-Stream. + We exit, if we reach extra byte (first byte of new-Stream or another data). + But we don't update input stream pointer for that new extra byte. + If extra byte is not correct first byte of xz-signature, + we have SZ_ERROR_NO_ARCHIVE error here. + */ + + res = XzUnpacker_Code(dec, + NULL, &outSizeCur, + src, &srcProcessed, + me->mtc.readWasFinished, // srcFinished + CODER_FINISH_END, // CODER_FINISH_ANY, + &status); + + // res = SZ_ERROR_ARCHIVE; // for failure test + + me->status = status; + me->codeRes = res; + + if (isCross) + me->mtc.crossStart += srcProcessed; + + me->mtc.inProcessed += srcProcessed; + me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; + + srcSize -= srcProcessed; + src += srcProcessed; + + if (res != SZ_OK) + { + return SZ_OK; + // return res; + } + + if (dec->state == XZ_STATE_STREAM_HEADER) + { + *needContinue = True; + me->isBlockHeaderState_Parse = False; + me->isBlockHeaderState_Write = False; + + if (!isCross) + { + Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc); + if (!crossBuf) + return SZ_ERROR_MEM; + if (srcSize != 0) + memcpy(crossBuf, src, srcSize); + me->mtc.crossStart = 0; + me->mtc.crossEnd = srcSize; + } + + PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd) + + return SZ_OK; + } + + if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0) + { + return SZ_ERROR_FAIL; + } + + if (me->mtc.readWasFinished) + { + return SZ_OK; + } + } + + { + size_t inPos; + size_t inLim; + // const Byte *inData; + UInt64 inProgressPrev = me->mtc.inProcessed; + + // XzDecMt_Prepare_InBuf_ST(p); + Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc); + if (!crossBuf) + return SZ_ERROR_MEM; + + inPos = 0; + inLim = 0; + + // inData = crossBuf; + + for (;;) + { + SizeT inProcessed; + SizeT outProcessed; + ECoderStatus status; + SRes res; + + if (inPos == inLim) + { + if (!me->mtc.readWasFinished) + { + inPos = 0; + inLim = me->mtc.inBufSize; + me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim); + me->mtc.readProcessed += inLim; + if (inLim == 0 || me->mtc.readRes != SZ_OK) + me->mtc.readWasFinished = True; + } + } + + inProcessed = inLim - inPos; + outProcessed = 0; + + res = XzUnpacker_Code(dec, + NULL, &outProcessed, + crossBuf + inPos, &inProcessed, + (inProcessed == 0), // srcFinished + CODER_FINISH_END, &status); + + me->codeRes = res; + me->status = status; + inPos += inProcessed; + me->mtc.inProcessed += inProcessed; + me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; + + if (res != SZ_OK) + { + return SZ_OK; + // return res; + } + + if (dec->state == XZ_STATE_STREAM_HEADER) + { + *needContinue = True; + me->mtc.crossStart = inPos; + me->mtc.crossEnd = inLim; + me->isBlockHeaderState_Parse = False; + me->isBlockHeaderState_Write = False; + return SZ_OK; + } + + if (status != CODER_STATUS_NEEDS_MORE_INPUT) + return SZ_ERROR_FAIL; + + if (me->mtc.progress) + { + UInt64 inDelta = me->mtc.inProcessed - inProgressPrev; + if (inDelta >= (1 << 22)) + { + RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress)) + inProgressPrev = me->mtc.inProcessed; + } + } + if (me->mtc.readWasFinished) + return SZ_OK; + } + } + } +} + + +#endif + + + +void XzStatInfo_Clear(CXzStatInfo *p) +{ + p->InSize = 0; + p->OutSize = 0; + + p->NumStreams = 0; + p->NumBlocks = 0; + + p->UnpackSize_Defined = False; + + p->NumStreams_Defined = False; + p->NumBlocks_Defined = False; + + p->DataAfterEnd = False; + p->DecodingTruncated = False; + + p->DecodeRes = SZ_OK; + p->ReadRes = SZ_OK; + p->ProgressRes = SZ_OK; + + p->CombinedRes = SZ_OK; + p->CombinedRes_Type = SZ_OK; +} + + + +/* + XzDecMt_Decode_ST() can return SZ_OK or the following errors + - SZ_ERROR_MEM for memory allocation error + - error from XzUnpacker_Code() function + - SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case + - ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS. + But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors. + ISeqInStream::Read() result is set to p->readRes. + also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. +*/ + +static SRes XzDecMt_Decode_ST(CXzDecMt *p + #ifndef Z7_ST + , BoolInt tMode + #endif + , CXzStatInfo *stat) +{ + size_t outPos; + size_t inPos, inLim; + const Byte *inData; + UInt64 inPrev, outPrev; + + CXzUnpacker *dec; + + #ifndef Z7_ST + if (tMode) + { + XzDecMt_FreeOutBufs(p); + tMode = (BoolInt)MtDec_PrepareRead(&p->mtc); + } + #endif + + if (!p->outBuf || p->outBufSize != p->props.outStep_ST) + { + ISzAlloc_Free(p->allocMid, p->outBuf); + p->outBufSize = 0; + p->outBuf = (Byte *)ISzAlloc_Alloc(p->allocMid, p->props.outStep_ST); + if (!p->outBuf) + return SZ_ERROR_MEM; + p->outBufSize = p->props.outStep_ST; + } + + if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST) + { + ISzAlloc_Free(p->allocMid, p->inBuf); + p->inBufSize = 0; + p->inBuf = (Byte *)ISzAlloc_Alloc(p->allocMid, p->props.inBufSize_ST); + if (!p->inBuf) + return SZ_ERROR_MEM; + p->inBufSize = p->props.inBufSize_ST; + } + + dec = &p->dec; + dec->decodeToStreamSignature = False; + // dec->decodeOnlyOneBlock = False; + + XzUnpacker_SetOutBuf(dec, NULL, 0); + + inPrev = p->inProcessed; + outPrev = p->outProcessed; + + inPos = 0; + inLim = 0; + inData = NULL; + outPos = 0; + + for (;;) + { + SizeT outSize; + BoolInt finished; + ECoderFinishMode finishMode; + SizeT inProcessed; + ECoderStatus status; + SRes res; + + SizeT outProcessed; + + + + if (inPos == inLim) + { + #ifndef Z7_ST + if (tMode) + { + inData = MtDec_Read(&p->mtc, &inLim); + inPos = 0; + if (inData) + continue; + tMode = False; + inLim = 0; + } + #endif + + if (!p->readWasFinished) + { + inPos = 0; + inLim = p->inBufSize; + inData = p->inBuf; + p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim); + p->readProcessed += inLim; + if (inLim == 0 || p->readRes != SZ_OK) + p->readWasFinished = True; + } + } + + outSize = p->props.outStep_ST - outPos; + + finishMode = CODER_FINISH_ANY; + if (p->outSize_Defined) + { + const UInt64 rem = p->outSize - p->outProcessed; + if (outSize >= rem) + { + outSize = (SizeT)rem; + if (p->finishMode) + finishMode = CODER_FINISH_END; + } + } + + inProcessed = inLim - inPos; + outProcessed = outSize; + + res = XzUnpacker_Code(dec, p->outBuf + outPos, &outProcessed, + inData + inPos, &inProcessed, + (inPos == inLim), // srcFinished + finishMode, &status); + + p->codeRes = res; + p->status = status; + + inPos += inProcessed; + outPos += outProcessed; + p->inProcessed += inProcessed; + p->outProcessed += outProcessed; + + finished = ((inProcessed == 0 && outProcessed == 0) || res != SZ_OK); + + if (finished || outProcessed >= outSize) + if (outPos != 0) + { + const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos); + // p->outProcessed += written; // 21.01: BUG fixed + if (written != outPos) + { + stat->CombinedRes_Type = SZ_ERROR_WRITE; + return SZ_ERROR_WRITE; + } + outPos = 0; + } + + if (p->progress && res == SZ_OK) + { + if (p->inProcessed - inPrev >= (1 << 22) || + p->outProcessed - outPrev >= (1 << 22)) + { + res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed); + if (res != SZ_OK) + { + stat->CombinedRes_Type = SZ_ERROR_PROGRESS; + stat->ProgressRes = res; + return res; + } + inPrev = p->inProcessed; + outPrev = p->outProcessed; + } + } + + if (finished) + { + // p->codeRes is preliminary error from XzUnpacker_Code. + // and it can be corrected later as final result + // so we return SZ_OK here instead of (res); + return SZ_OK; + // return res; + } + } +} + + + +/* +XzStatInfo_SetStat() transforms + CXzUnpacker return code and status to combined CXzStatInfo results. + it can convert SZ_OK to SZ_ERROR_INPUT_EOF + it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1) +*/ + +static void XzStatInfo_SetStat(const CXzUnpacker *dec, + int finishMode, + // UInt64 readProcessed, + UInt64 inProcessed, + SRes res, // it's result from CXzUnpacker unpacker + ECoderStatus status, + BoolInt decodingTruncated, + CXzStatInfo *stat) +{ + UInt64 extraSize; + + stat->DecodingTruncated = (Byte)(decodingTruncated ? 1 : 0); + stat->InSize = inProcessed; + stat->NumStreams = dec->numStartedStreams; + stat->NumBlocks = dec->numTotalBlocks; + + stat->UnpackSize_Defined = True; + stat->NumStreams_Defined = True; + stat->NumBlocks_Defined = True; + + extraSize = XzUnpacker_GetExtraSize(dec); + + if (res == SZ_OK) + { + if (status == CODER_STATUS_NEEDS_MORE_INPUT) + { + // CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams + // any extra data is part of correct data + extraSize = 0; + // if xz stream was not finished, then we need more data + if (!XzUnpacker_IsStreamWasFinished(dec)) + res = SZ_ERROR_INPUT_EOF; + } + else + { + // CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding + // so he we have (status == CODER_STATUS_NOT_FINISHED) + // if (status != CODER_STATUS_FINISHED_WITH_MARK) + if (!decodingTruncated || finishMode) + res = SZ_ERROR_DATA; + } + } + else if (res == SZ_ERROR_NO_ARCHIVE) + { + /* + SZ_ERROR_NO_ARCHIVE is possible for 2 states: + XZ_STATE_STREAM_HEADER - if bad signature or bad CRC + XZ_STATE_STREAM_PADDING - if non-zero padding data + extraSize and inProcessed don't include "bad" byte + */ + // if (inProcessed == extraSize), there was no any good xz stream header, and we keep error + if (inProcessed != extraSize) // if there were good xz streams before error + { + // if (extraSize != 0 || readProcessed != inProcessed) + { + // he we suppose that all xz streams were finsihed OK, and we have + // some extra data after all streams + stat->DataAfterEnd = True; + res = SZ_OK; + } + } + } + + if (stat->DecodeRes == SZ_OK) + stat->DecodeRes = res; + + stat->InSize -= extraSize; +} + + + +SRes XzDecMt_Decode(CXzDecMtHandle p, + const CXzDecMtProps *props, + const UInt64 *outDataSize, int finishMode, + ISeqOutStreamPtr outStream, + // Byte *outBuf, size_t *outBufSize, + ISeqInStreamPtr inStream, + // const Byte *inData, size_t inDataSize, + CXzStatInfo *stat, + int *isMT, + ICompressProgressPtr progress) +{ + // GET_CXzDecMt_p + #ifndef Z7_ST + BoolInt tMode; + #endif + + XzStatInfo_Clear(stat); + + p->props = *props; + + p->inStream = inStream; + p->outStream = outStream; + p->progress = progress; + // p->stat = stat; + + p->outSize = 0; + p->outSize_Defined = False; + if (outDataSize) + { + p->outSize_Defined = True; + p->outSize = *outDataSize; + } + + p->finishMode = (BoolInt)finishMode; + + // p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test + + p->writeRes = SZ_OK; + p->outProcessed = 0; + p->inProcessed = 0; + p->readProcessed = 0; + p->readWasFinished = False; + p->readRes = SZ_OK; + + p->codeRes = SZ_OK; + p->status = CODER_STATUS_NOT_SPECIFIED; + + XzUnpacker_Init(&p->dec); + + *isMT = False; + + /* + p->outBuf = NULL; + p->outBufSize = 0; + if (!outStream) + { + p->outBuf = outBuf; + p->outBufSize = *outBufSize; + *outBufSize = 0; + } + */ + + + #ifndef Z7_ST + + p->isBlockHeaderState_Parse = False; + p->isBlockHeaderState_Write = False; + // p->numBadBlocks = 0; + p->mainErrorCode = SZ_OK; + p->mainDecoderWasCalled = False; + + tMode = False; + + if (p->props.numThreads > 1) + { + IMtDecCallback2 vt; + BoolInt needContinue; + SRes res; + // we just free ST buffers here + // but we still keep state variables, that was set in XzUnpacker_Init() + XzDecMt_FreeSt(p); + + p->outProcessed_Parse = 0; + p->parsing_Truncated = False; + + p->numStreams = 0; + p->numTotalBlocks = 0; + p->numBlocks = 0; + p->finishedDecoderIndex = -1; + + if (!p->mtc_WasConstructed) + { + p->mtc_WasConstructed = True; + MtDec_Construct(&p->mtc); + } + + p->mtc.mtCallback = &vt; + p->mtc.mtCallbackObject = p; + + p->mtc.progress = progress; + p->mtc.inStream = inStream; + p->mtc.alloc = &p->alignOffsetAlloc.vt; + // p->mtc.inData = inData; + // p->mtc.inDataSize = inDataSize; + p->mtc.inBufSize = p->props.inBufSize_MT; + // p->mtc.inBlockMax = p->props.inBlockMax; + p->mtc.numThreadsMax = p->props.numThreads; + + *isMT = True; + + vt.Parse = XzDecMt_Callback_Parse; + vt.PreCode = XzDecMt_Callback_PreCode; + vt.Code = XzDecMt_Callback_Code; + vt.Write = XzDecMt_Callback_Write; + + + res = MtDec_Code(&p->mtc); + + + stat->InSize = p->mtc.inProcessed; + + p->inProcessed = p->mtc.inProcessed; + p->readRes = p->mtc.readRes; + p->readWasFinished = p->mtc.readWasFinished; + p->readProcessed = p->mtc.readProcessed; + + tMode = True; + needContinue = False; + + if (res == SZ_OK) + { + if (p->mtc.mtProgress.res != SZ_OK) + { + res = p->mtc.mtProgress.res; + stat->ProgressRes = res; + stat->CombinedRes_Type = SZ_ERROR_PROGRESS; + } + else + needContinue = p->mtc.needContinue; + } + + if (!needContinue) + { + { + SRes codeRes; + BoolInt truncated = False; + ECoderStatus status; + const CXzUnpacker *dec; + + stat->OutSize = p->outProcessed; + + if (p->finishedDecoderIndex >= 0) + { + const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex]; + codeRes = coder->codeRes; + dec = &coder->dec; + status = coder->status; + } + else if (p->mainDecoderWasCalled) + { + codeRes = p->codeRes; + dec = &p->dec; + status = p->status; + truncated = p->parsing_Truncated; + } + else + return SZ_ERROR_FAIL; + + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; + + XzStatInfo_SetStat(dec, p->finishMode, + // p->mtc.readProcessed, + p->mtc.inProcessed, + codeRes, status, + truncated, + stat); + } + + if (res == SZ_OK) + { + stat->ReadRes = p->mtc.readRes; + + if (p->writeRes != SZ_OK) + { + res = p->writeRes; + stat->CombinedRes_Type = SZ_ERROR_WRITE; + } + else if (p->mtc.readRes != SZ_OK + // && p->mtc.inProcessed == p->mtc.readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) + { + res = p->mtc.readRes; + stat->CombinedRes_Type = SZ_ERROR_READ; + } + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; + } + + stat->CombinedRes = res; + if (stat->CombinedRes_Type == SZ_OK) + stat->CombinedRes_Type = res; + return res; + } + + PRF_STR("----- decoding ST -----") + } + + #endif + + + *isMT = False; + + { + SRes res = XzDecMt_Decode_ST(p + #ifndef Z7_ST + , tMode + #endif + , stat + ); + + #ifndef Z7_ST + // we must set error code from MT decoding at first + if (p->mainErrorCode != SZ_OK) + stat->DecodeRes = p->mainErrorCode; + #endif + + XzStatInfo_SetStat(&p->dec, + p->finishMode, + // p->readProcessed, + p->inProcessed, + p->codeRes, p->status, + False, // truncated + stat); + + stat->ReadRes = p->readRes; + + if (res == SZ_OK) + { + if (p->readRes != SZ_OK + // && p->inProcessed == p->readProcessed + && stat->DecodeRes == SZ_ERROR_INPUT_EOF) + { + // we set read error as combined error, only if that error was the reason + // of decoding problem + res = p->readRes; + stat->CombinedRes_Type = SZ_ERROR_READ; + } + else if (stat->DecodeRes != SZ_OK) + res = stat->DecodeRes; + } + + stat->CombinedRes = res; + if (stat->CombinedRes_Type == SZ_OK) + stat->CombinedRes_Type = res; + return res; + } +} + +#undef PRF +#undef PRF_STR +#undef PRF_STR_INT_2 diff --git a/crnlib/lzma/XzEnc.c b/crnlib/lzma/XzEnc.c new file mode 100644 index 00000000..3bfa51cb --- /dev/null +++ b/crnlib/lzma/XzEnc.c @@ -0,0 +1,1368 @@ +/* XzEnc.c -- Xz Encode +2024-03-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include +#include + +#include "7zCrc.h" +#include "Bra.h" +#include "CpuArch.h" + +#ifdef USE_SUBBLOCK +#include "Bcj3Enc.c" +#include "SbFind.c" +#include "SbEnc.c" +#endif + +#include "XzEnc.h" + +// #define Z7_ST + +#ifndef Z7_ST +#include "MtCoder.h" +#else +#define MTCODER_THREADS_MAX 1 +#define MTCODER_BLOCKS_MAX 1 +#endif + +#define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3) + +#define XZ_CHECK_SIZE_MAX 64 +/* max pack size for LZMA2 block + pad4 + check_size: */ +#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + XZ_CHECK_SIZE_MAX) + +#define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize)) + + +// #define XzBlock_ClearFlags(p) (p)->flags = 0; +#define XzBlock_ClearFlags_SetNumFilters(p, n) (p)->flags = (Byte)((n) - 1); +#define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE; +#define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE; + + +static SRes WriteBytes(ISeqOutStreamPtr s, const void *buf, size_t size) +{ + return (ISeqOutStream_Write(s, buf, size) == size) ? SZ_OK : SZ_ERROR_WRITE; +} + +static SRes WriteBytes_UpdateCrc(ISeqOutStreamPtr s, const void *buf, size_t size, UInt32 *crc) +{ + *crc = CrcUpdate(*crc, buf, size); + return WriteBytes(s, buf, size); +} + + +static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStreamPtr s) +{ + UInt32 crc; + Byte header[XZ_STREAM_HEADER_SIZE]; + memcpy(header, XZ_SIG, XZ_SIG_SIZE); + header[XZ_SIG_SIZE] = (Byte)(f >> 8); + header[XZ_SIG_SIZE + 1] = (Byte)(f & 0xFF); + crc = CrcCalc(header + XZ_SIG_SIZE, XZ_STREAM_FLAGS_SIZE); + SetUi32(header + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE, crc) + return WriteBytes(s, header, XZ_STREAM_HEADER_SIZE); +} + + +static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStreamPtr s) +{ + Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; + + unsigned pos = 1; + unsigned numFilters, i; + header[pos++] = p->flags; + + if (XzBlock_HasPackSize(p)) pos += Xz_WriteVarInt(header + pos, p->packSize); + if (XzBlock_HasUnpackSize(p)) pos += Xz_WriteVarInt(header + pos, p->unpackSize); + numFilters = XzBlock_GetNumFilters(p); + + for (i = 0; i < numFilters; i++) + { + const CXzFilter *f = &p->filters[i]; + pos += Xz_WriteVarInt(header + pos, f->id); + pos += Xz_WriteVarInt(header + pos, f->propsSize); + memcpy(header + pos, f->props, f->propsSize); + pos += f->propsSize; + } + + while ((pos & 3) != 0) + header[pos++] = 0; + + header[0] = (Byte)(pos >> 2); + SetUi32(header + pos, CrcCalc(header, pos)) + return WriteBytes(s, header, pos + 4); +} + + + + +typedef struct +{ + size_t numBlocks; + size_t size; + size_t allocated; + Byte *blocks; +} CXzEncIndex; + + +static void XzEncIndex_Construct(CXzEncIndex *p) +{ + p->numBlocks = 0; + p->size = 0; + p->allocated = 0; + p->blocks = NULL; +} + +static void XzEncIndex_Init(CXzEncIndex *p) +{ + p->numBlocks = 0; + p->size = 0; +} + +static void XzEncIndex_Free(CXzEncIndex *p, ISzAllocPtr alloc) +{ + if (p->blocks) + { + ISzAlloc_Free(alloc, p->blocks); + p->blocks = NULL; + } + p->numBlocks = 0; + p->size = 0; + p->allocated = 0; +} + + +static SRes XzEncIndex_ReAlloc(CXzEncIndex *p, size_t newSize, ISzAllocPtr alloc) +{ + Byte *blocks = (Byte *)ISzAlloc_Alloc(alloc, newSize); + if (!blocks) + return SZ_ERROR_MEM; + if (p->size != 0) + memcpy(blocks, p->blocks, p->size); + if (p->blocks) + ISzAlloc_Free(alloc, p->blocks); + p->blocks = blocks; + p->allocated = newSize; + return SZ_OK; +} + + +static SRes XzEncIndex_PreAlloc(CXzEncIndex *p, UInt64 numBlocks, UInt64 unpackSize, UInt64 totalSize, ISzAllocPtr alloc) +{ + UInt64 pos; + { + Byte buf[32]; + unsigned pos2 = Xz_WriteVarInt(buf, totalSize); + pos2 += Xz_WriteVarInt(buf + pos2, unpackSize); + pos = numBlocks * pos2; + } + + if (pos <= p->allocated - p->size) + return SZ_OK; + { + UInt64 newSize64 = p->size + pos; + size_t newSize = (size_t)newSize64; + if (newSize != newSize64) + return SZ_ERROR_MEM; + return XzEncIndex_ReAlloc(p, newSize, alloc); + } +} + + +static SRes XzEncIndex_AddIndexRecord(CXzEncIndex *p, UInt64 unpackSize, UInt64 totalSize, ISzAllocPtr alloc) +{ + Byte buf[32]; + unsigned pos = Xz_WriteVarInt(buf, totalSize); + pos += Xz_WriteVarInt(buf + pos, unpackSize); + + if (pos > p->allocated - p->size) + { + size_t newSize = p->allocated * 2 + 16 * 2; + if (newSize < p->size + pos) + return SZ_ERROR_MEM; + RINOK(XzEncIndex_ReAlloc(p, newSize, alloc)) + } + memcpy(p->blocks + p->size, buf, pos); + p->size += pos; + p->numBlocks++; + return SZ_OK; +} + + +static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, ISeqOutStreamPtr s) +{ + Byte buf[32]; + UInt64 globalPos; + UInt32 crc = CRC_INIT_VAL; + unsigned pos = 1 + Xz_WriteVarInt(buf + 1, p->numBlocks); + + globalPos = pos; + buf[0] = 0; + RINOK(WriteBytes_UpdateCrc(s, buf, pos, &crc)) + RINOK(WriteBytes_UpdateCrc(s, p->blocks, p->size, &crc)) + globalPos += p->size; + + pos = XZ_GET_PAD_SIZE(globalPos); + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + globalPos += pos; + + crc = CrcUpdate(crc, buf + 4 - pos, pos); + SetUi32(buf + 4, CRC_GET_DIGEST(crc)) + + SetUi32(buf + 8 + 4, (UInt32)(globalPos >> 2)) + buf[8 + 8] = (Byte)(flags >> 8); + buf[8 + 9] = (Byte)(flags & 0xFF); + SetUi32(buf + 8, CrcCalc(buf + 8 + 4, 6)) + buf[8 + 10] = XZ_FOOTER_SIG_0; + buf[8 + 11] = XZ_FOOTER_SIG_1; + + return WriteBytes(s, buf + 4 - pos, pos + 4 + 12); +} + + + +/* ---------- CSeqCheckInStream ---------- */ + +typedef struct +{ + ISeqInStream vt; + ISeqInStreamPtr realStream; + const Byte *data; + UInt64 limit; + UInt64 processed; + int realStreamFinished; + CXzCheck check; +} CSeqCheckInStream; + +static void SeqCheckInStream_Init(CSeqCheckInStream *p, unsigned checkMode) +{ + p->limit = (UInt64)(Int64)-1; + p->processed = 0; + p->realStreamFinished = 0; + XzCheck_Init(&p->check, checkMode); +} + +static void SeqCheckInStream_GetDigest(CSeqCheckInStream *p, Byte *digest) +{ + XzCheck_Final(&p->check, digest); +} + +static SRes SeqCheckInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqCheckInStream) + size_t size2 = *size; + SRes res = SZ_OK; + + if (p->limit != (UInt64)(Int64)-1) + { + UInt64 rem = p->limit - p->processed; + if (size2 > rem) + size2 = (size_t)rem; + } + if (size2 != 0) + { + if (p->realStream) + { + res = ISeqInStream_Read(p->realStream, data, &size2); + p->realStreamFinished = (size2 == 0) ? 1 : 0; + } + else + memcpy(data, p->data + (size_t)p->processed, size2); + XzCheck_Update(&p->check, data, size2); + p->processed += size2; + } + *size = size2; + return res; +} + + +/* ---------- CSeqSizeOutStream ---------- */ + +typedef struct +{ + ISeqOutStream vt; + ISeqOutStreamPtr realStream; + Byte *outBuf; + size_t outBufLimit; + UInt64 processed; +} CSeqSizeOutStream; + +static size_t SeqSizeOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqSizeOutStream) + if (p->realStream) + size = ISeqOutStream_Write(p->realStream, data, size); + else + { + if (size > p->outBufLimit - (size_t)p->processed) + return 0; + memcpy(p->outBuf + (size_t)p->processed, data, size); + } + p->processed += size; + return size; +} + + +/* ---------- CSeqInFilter ---------- */ + +#define FILTER_BUF_SIZE (1 << 20) + +typedef struct +{ + ISeqInStream vt; + ISeqInStreamPtr realStream; + IStateCoder StateCoder; + Byte *buf; + size_t curPos; + size_t endPos; + int srcWasFinished; +} CSeqInFilter; + + +static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] = +{ + Z7_BRANCH_CONV_ENC_2 (BranchConv_PPC), + Z7_BRANCH_CONV_ENC_2 (BranchConv_IA64), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARMT), + Z7_BRANCH_CONV_ENC_2 (BranchConv_SPARC), + Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM64), + Z7_BRANCH_CONV_ENC_2 (BranchConv_RISCV) +}; + +static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size) +{ + switch (p->methodId) + { + case XZ_ID_Delta: + Delta_Encode(p->delta_State, p->delta, data, size); + break; + case XZ_ID_X86: + size = (SizeT)(z7_BranchConvSt_X86_Enc(data, size, p->ip, &p->X86_State) - data); + break; + default: + if (p->methodId >= XZ_ID_PPC) + { + const UInt32 i = p->methodId - XZ_ID_PPC; + if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Enc)) + size = (SizeT)(g_Funcs_BranchConv_RISC_Enc[i](data, size, p->ip) - data); + } + break; + } + p->ip += (UInt32)size; + return size; +} + + +static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPtr alloc) +{ + if (!p->buf) + { + p->buf = (Byte *)ISzAlloc_Alloc(alloc, FILTER_BUF_SIZE); + if (!p->buf) + return SZ_ERROR_MEM; + } + p->curPos = p->endPos = 0; + p->srcWasFinished = 0; + RINOK(Xz_StateCoder_Bc_SetFromMethod_Func(&p->StateCoder, props->id, XzBcFilterStateBase_Filter_Enc, alloc)) + RINOK(p->StateCoder.SetProps(p->StateCoder.p, props->props, props->propsSize, alloc)) + p->StateCoder.Init(p->StateCoder.p); + return SZ_OK; +} + + +static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size) +{ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqInFilter) + const size_t sizeOriginal = *size; + if (sizeOriginal == 0) + return SZ_OK; + *size = 0; + + for (;;) + { + if (!p->srcWasFinished && p->curPos == p->endPos) + { + p->curPos = 0; + p->endPos = FILTER_BUF_SIZE; + RINOK(ISeqInStream_Read(p->realStream, p->buf, &p->endPos)) + if (p->endPos == 0) + p->srcWasFinished = 1; + } + { + SizeT srcLen = p->endPos - p->curPos; + ECoderStatus status; + SRes res; + *size = sizeOriginal; + res = p->StateCoder.Code2(p->StateCoder.p, + (Byte *)data, size, + p->buf + p->curPos, &srcLen, + p->srcWasFinished, CODER_FINISH_ANY, + &status); + p->curPos += srcLen; + if (*size != 0 || srcLen == 0 || res != SZ_OK) + return res; + } + } +} + +static void SeqInFilter_Construct(CSeqInFilter *p) +{ + p->buf = NULL; + p->StateCoder.p = NULL; + p->vt.Read = SeqInFilter_Read; +} + +static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) +{ + if (p->StateCoder.p) + { + p->StateCoder.Free(p->StateCoder.p, alloc); + p->StateCoder.p = NULL; + } + if (p->buf) + { + ISzAlloc_Free(alloc, p->buf); + p->buf = NULL; + } +} + + +/* ---------- CSbEncInStream ---------- */ + +#ifdef USE_SUBBLOCK + +typedef struct +{ + ISeqInStream vt; + ISeqInStreamPtr inStream; + CSbEnc enc; +} CSbEncInStream; + +static SRes SbEncInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size) +{ + CSbEncInStream *p = Z7_CONTAINER_FROM_VTBL(pp, CSbEncInStream, vt); + size_t sizeOriginal = *size; + if (sizeOriginal == 0) + return SZ_OK; + + for (;;) + { + if (p->enc.needRead && !p->enc.readWasFinished) + { + size_t processed = p->enc.needReadSizeMax; + RINOK(p->inStream->Read(p->inStream, p->enc.buf + p->enc.readPos, &processed)) + p->enc.readPos += processed; + if (processed == 0) + { + p->enc.readWasFinished = True; + p->enc.isFinalFinished = True; + } + p->enc.needRead = False; + } + + *size = sizeOriginal; + RINOK(SbEnc_Read(&p->enc, data, size)) + if (*size != 0 || !p->enc.needRead) + return SZ_OK; + } +} + +void SbEncInStream_Construct(CSbEncInStream *p, ISzAllocPtr alloc) +{ + SbEnc_Construct(&p->enc, alloc); + p->vt.Read = SbEncInStream_Read; +} + +SRes SbEncInStream_Init(CSbEncInStream *p) +{ + return SbEnc_Init(&p->enc); +} + +void SbEncInStream_Free(CSbEncInStream *p) +{ + SbEnc_Free(&p->enc); +} + +#endif + + + +/* ---------- CXzProps ---------- */ + + +void XzFilterProps_Init(CXzFilterProps *p) +{ + p->id = 0; + p->delta = 0; + p->ip = 0; + p->ipDefined = False; +} + +void XzProps_Init(CXzProps *p) +{ + p->checkId = XZ_CHECK_CRC32; + p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO; + p->numBlockThreads_Reduced = -1; + p->numBlockThreads_Max = -1; + p->numTotalThreads = -1; + p->reduceSize = (UInt64)(Int64)-1; + p->forceWriteSizesInHeader = 0; + // p->forceWriteSizesInHeader = 1; + + XzFilterProps_Init(&p->filterProps); + Lzma2EncProps_Init(&p->lzma2Props); +} + + +static void XzEncProps_Normalize_Fixed(CXzProps *p) +{ + UInt64 fileSize; + int t1, t1n, t2, t2r, t3; + { + CLzma2EncProps tp = p->lzma2Props; + if (tp.numTotalThreads <= 0) + tp.numTotalThreads = p->numTotalThreads; + Lzma2EncProps_Normalize(&tp); + t1n = tp.numTotalThreads; + } + + t1 = p->lzma2Props.numTotalThreads; + t2 = p->numBlockThreads_Max; + t3 = p->numTotalThreads; + + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; + + if (t3 <= 0) + { + if (t2 <= 0) + t2 = 1; + t3 = t1n * t2; + } + else if (t2 <= 0) + { + t2 = t3 / t1n; + if (t2 == 0) + { + t1 = 1; + t2 = t3; + } + if (t2 > MTCODER_THREADS_MAX) + t2 = MTCODER_THREADS_MAX; + } + else if (t1 <= 0) + { + t1 = t3 / t2; + if (t1 == 0) + t1 = 1; + } + else + t3 = t1n * t2; + + p->lzma2Props.numTotalThreads = t1; + + t2r = t2; + + fileSize = p->reduceSize; + + if ((p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1)) + p->lzma2Props.lzmaProps.reduceSize = p->blockSize; + + Lzma2EncProps_Normalize(&p->lzma2Props); + + t1 = p->lzma2Props.numTotalThreads; + + { + if (t2 > 1 && fileSize != (UInt64)(Int64)-1) + { + UInt64 numBlocks = fileSize / p->blockSize; + if (numBlocks * p->blockSize != fileSize) + numBlocks++; + if (numBlocks < (unsigned)t2) + { + t2r = (int)numBlocks; + if (t2r == 0) + t2r = 1; + t3 = t1 * t2r; + } + } + } + + p->numBlockThreads_Max = t2; + p->numBlockThreads_Reduced = t2r; + p->numTotalThreads = t3; +} + + +static void XzProps_Normalize(CXzProps *p) +{ + /* we normalize xzProps properties, but we normalize only some of CXzProps::lzma2Props properties. + Lzma2Enc_SetProps() will normalize lzma2Props later. */ + + if (p->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID) + { + p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; + p->numBlockThreads_Reduced = 1; + p->numBlockThreads_Max = 1; + if (p->lzma2Props.numTotalThreads <= 0) + p->lzma2Props.numTotalThreads = p->numTotalThreads; + return; + } + else + { + CLzma2EncProps *lzma2 = &p->lzma2Props; + if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + { + // xz-auto + p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; + + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) + { + // if (xz-auto && lzma2-solid) - we use solid for both + p->blockSize = XZ_PROPS_BLOCK_SIZE_SOLID; + p->numBlockThreads_Reduced = 1; + p->numBlockThreads_Max = 1; + if (p->lzma2Props.numTotalThreads <= 0) + p->lzma2Props.numTotalThreads = p->numTotalThreads; + } + else + { + // if (xz-auto && (lzma2-auto || lzma2-fixed_) + // we calculate block size for lzma2 and use that block size for xz, lzma2 uses single-chunk per block + CLzma2EncProps tp = p->lzma2Props; + if (tp.numTotalThreads <= 0) + tp.numTotalThreads = p->numTotalThreads; + + Lzma2EncProps_Normalize(&tp); + + p->blockSize = tp.blockSize; // fixed or solid + p->numBlockThreads_Reduced = tp.numBlockThreads_Reduced; + p->numBlockThreads_Max = tp.numBlockThreads_Max; + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + lzma2->blockSize = tp.blockSize; // fixed or solid, LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + if (lzma2->lzmaProps.reduceSize > tp.blockSize && tp.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) + lzma2->lzmaProps.reduceSize = tp.blockSize; + lzma2->numBlockThreads_Reduced = 1; + lzma2->numBlockThreads_Max = 1; + return; + } + } + else + { + // xz-fixed + // we can use xz::reduceSize or xz::blockSize as base for lzmaProps::reduceSize + + p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; + { + UInt64 r = p->reduceSize; + if (r > p->blockSize || r == (UInt64)(Int64)-1) + r = p->blockSize; + lzma2->lzmaProps.reduceSize = r; + } + if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO) + lzma2->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID; + else if (lzma2->blockSize > p->blockSize && lzma2->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID) + lzma2->blockSize = p->blockSize; + + XzEncProps_Normalize_Fixed(p); + } + } +} + + +/* ---------- CLzma2WithFilters ---------- */ + +typedef struct +{ + CLzma2EncHandle lzma2; + CSeqInFilter filter; + + #ifdef USE_SUBBLOCK + CSbEncInStream sb; + #endif +} CLzma2WithFilters; + + +static void Lzma2WithFilters_Construct(CLzma2WithFilters *p) +{ + p->lzma2 = NULL; + SeqInFilter_Construct(&p->filter); + + #ifdef USE_SUBBLOCK + SbEncInStream_Construct(&p->sb, alloc); + #endif +} + + +static SRes Lzma2WithFilters_Create(CLzma2WithFilters *p, ISzAllocPtr alloc, ISzAllocPtr bigAlloc) +{ + if (!p->lzma2) + { + p->lzma2 = Lzma2Enc_Create(alloc, bigAlloc); + if (!p->lzma2) + return SZ_ERROR_MEM; + } + return SZ_OK; +} + + +static void Lzma2WithFilters_Free(CLzma2WithFilters *p, ISzAllocPtr alloc) +{ + #ifdef USE_SUBBLOCK + SbEncInStream_Free(&p->sb); + #endif + + SeqInFilter_Free(&p->filter, alloc); + if (p->lzma2) + { + Lzma2Enc_Destroy(p->lzma2); + p->lzma2 = NULL; + } +} + + +typedef struct +{ + UInt64 unpackSize; + UInt64 totalSize; + size_t headerSize; +} CXzEncBlockInfo; + + +static SRes Xz_CompressBlock( + CLzma2WithFilters *lzmaf, + + ISeqOutStreamPtr outStream, + Byte *outBufHeader, + Byte *outBufData, size_t outBufDataLimit, + + ISeqInStreamPtr inStream, + // UInt64 expectedSize, + const Byte *inBuf, // used if (!inStream) + size_t inBufSize, // used if (!inStream), it's block size, props->blockSize is ignored + + const CXzProps *props, + ICompressProgressPtr progress, + int *inStreamFinished, /* only for inStream version */ + CXzEncBlockInfo *blockSizes, + ISzAllocPtr alloc, + ISzAllocPtr allocBig) +{ + CSeqCheckInStream checkInStream; + CSeqSizeOutStream seqSizeOutStream; + CXzBlock block; + unsigned filterIndex = 0; + CXzFilter *filter = NULL; + const CXzFilterProps *fp = &props->filterProps; + if (fp->id == 0) + fp = NULL; + + *inStreamFinished = False; + + RINOK(Lzma2WithFilters_Create(lzmaf, alloc, allocBig)) + + RINOK(Lzma2Enc_SetProps(lzmaf->lzma2, &props->lzma2Props)) + + // XzBlock_ClearFlags(&block) + XzBlock_ClearFlags_SetNumFilters(&block, 1 + (fp ? 1 : 0)) + + if (fp) + { + filter = &block.filters[filterIndex++]; + filter->id = fp->id; + filter->propsSize = 0; + + if (fp->id == XZ_ID_Delta) + { + filter->props[0] = (Byte)(fp->delta - 1); + filter->propsSize = 1; + } + else if (fp->ipDefined) + { + Byte *ptr = filter->props; + SetUi32(ptr, fp->ip) + filter->propsSize = 4; + } + } + + { + CXzFilter *f = &block.filters[filterIndex++]; + f->id = XZ_ID_LZMA2; + f->propsSize = 1; + f->props[0] = Lzma2Enc_WriteProperties(lzmaf->lzma2); + } + + seqSizeOutStream.vt.Write = SeqSizeOutStream_Write; + seqSizeOutStream.realStream = outStream; + seqSizeOutStream.outBuf = outBufData; + seqSizeOutStream.outBufLimit = outBufDataLimit; + seqSizeOutStream.processed = 0; + + /* + if (expectedSize != (UInt64)(Int64)-1) + { + block.unpackSize = expectedSize; + if (props->blockSize != (UInt64)(Int64)-1) + if (expectedSize > props->blockSize) + block.unpackSize = props->blockSize; + XzBlock_SetHasUnpackSize(&block) + } + */ + + if (outStream) + { + RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)) + } + + checkInStream.vt.Read = SeqCheckInStream_Read; + SeqCheckInStream_Init(&checkInStream, props->checkId); + + checkInStream.realStream = inStream; + checkInStream.data = inBuf; + checkInStream.limit = props->blockSize; + if (!inStream) + checkInStream.limit = inBufSize; + + if (fp) + { + #ifdef USE_SUBBLOCK + if (fp->id == XZ_ID_Subblock) + { + lzmaf->sb.inStream = &checkInStream.vt; + RINOK(SbEncInStream_Init(&lzmaf->sb)) + } + else + #endif + { + lzmaf->filter.realStream = &checkInStream.vt; + RINOK(SeqInFilter_Init(&lzmaf->filter, filter, alloc)) + } + } + + { + SRes res; + Byte *outBuf = NULL; + size_t outSize = 0; + BoolInt useStream = (fp || inStream); + // useStream = True; + + if (!useStream) + { + XzCheck_Update(&checkInStream.check, inBuf, inBufSize); + checkInStream.processed = inBufSize; + } + + if (!outStream) + { + outBuf = seqSizeOutStream.outBuf; // + (size_t)seqSizeOutStream.processed; + outSize = seqSizeOutStream.outBufLimit; // - (size_t)seqSizeOutStream.processed; + } + + res = Lzma2Enc_Encode2(lzmaf->lzma2, + outBuf ? NULL : &seqSizeOutStream.vt, + outBuf, + outBuf ? &outSize : NULL, + + useStream ? + (fp ? + ( + #ifdef USE_SUBBLOCK + (fp->id == XZ_ID_Subblock) ? &lzmaf->sb.vt: + #endif + &lzmaf->filter.vt) : + &checkInStream.vt) : NULL, + + useStream ? NULL : inBuf, + useStream ? 0 : inBufSize, + + progress); + + if (outBuf) + seqSizeOutStream.processed += outSize; + + RINOK(res) + blockSizes->unpackSize = checkInStream.processed; + } + { + Byte buf[4 + XZ_CHECK_SIZE_MAX]; + const unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed); + const UInt64 packSize = seqSizeOutStream.processed; + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + + SeqCheckInStream_GetDigest(&checkInStream, buf + 4); + RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), + padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))) + + blockSizes->totalSize = seqSizeOutStream.processed - padSize; + + if (!outStream) + { + seqSizeOutStream.outBuf = outBufHeader; + seqSizeOutStream.outBufLimit = XZ_BLOCK_HEADER_SIZE_MAX; + seqSizeOutStream.processed = 0; + + block.unpackSize = blockSizes->unpackSize; + XzBlock_SetHasUnpackSize(&block) + + block.packSize = packSize; + XzBlock_SetHasPackSize(&block) + + RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)) + + blockSizes->headerSize = (size_t)seqSizeOutStream.processed; + blockSizes->totalSize += seqSizeOutStream.processed; + } + } + + if (inStream) + *inStreamFinished = checkInStream.realStreamFinished; + else + { + *inStreamFinished = False; + if (checkInStream.processed != inBufSize) + return SZ_ERROR_FAIL; + } + + return SZ_OK; +} + + + +typedef struct +{ + ICompressProgress vt; + ICompressProgressPtr progress; + UInt64 inOffset; + UInt64 outOffset; +} CCompressProgress_XzEncOffset; + + +static SRes CompressProgress_XzEncOffset_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize) +{ + const CCompressProgress_XzEncOffset *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CCompressProgress_XzEncOffset, vt); + inSize += p->inOffset; + outSize += p->outOffset; + return ICompressProgress_Progress(p->progress, inSize, outSize); +} + + + + +struct CXzEnc +{ + ISzAllocPtr alloc; + ISzAllocPtr allocBig; + + CXzProps xzProps; + UInt64 expectedDataSize; + + CXzEncIndex xzIndex; + + CLzma2WithFilters lzmaf_Items[MTCODER_THREADS_MAX]; + + size_t outBufSize; /* size of allocated outBufs[i] */ + Byte *outBufs[MTCODER_BLOCKS_MAX]; + + #ifndef Z7_ST + unsigned checkType; + ISeqOutStreamPtr outStream; + BoolInt mtCoder_WasConstructed; + CMtCoder mtCoder; + CXzEncBlockInfo EncBlocks[MTCODER_BLOCKS_MAX]; + #endif +}; + + +static void XzEnc_Construct(CXzEnc *p) +{ + unsigned i; + + XzEncIndex_Construct(&p->xzIndex); + + for (i = 0; i < MTCODER_THREADS_MAX; i++) + Lzma2WithFilters_Construct(&p->lzmaf_Items[i]); + + #ifndef Z7_ST + p->mtCoder_WasConstructed = False; + { + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + p->outBufs[i] = NULL; + p->outBufSize = 0; + } + #endif +} + + +static void XzEnc_FreeOutBufs(CXzEnc *p) +{ + unsigned i; + for (i = 0; i < MTCODER_BLOCKS_MAX; i++) + if (p->outBufs[i]) + { + ISzAlloc_Free(p->alloc, p->outBufs[i]); + p->outBufs[i] = NULL; + } + p->outBufSize = 0; +} + + +static void XzEnc_Free(CXzEnc *p, ISzAllocPtr alloc) +{ + unsigned i; + + XzEncIndex_Free(&p->xzIndex, alloc); + + for (i = 0; i < MTCODER_THREADS_MAX; i++) + Lzma2WithFilters_Free(&p->lzmaf_Items[i], alloc); + + #ifndef Z7_ST + if (p->mtCoder_WasConstructed) + { + MtCoder_Destruct(&p->mtCoder); + p->mtCoder_WasConstructed = False; + } + XzEnc_FreeOutBufs(p); + #endif +} + + +CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CXzEnc *p = (CXzEnc *)ISzAlloc_Alloc(alloc, sizeof(CXzEnc)); + if (!p) + return NULL; + XzEnc_Construct(p); + XzProps_Init(&p->xzProps); + XzProps_Normalize(&p->xzProps); + p->expectedDataSize = (UInt64)(Int64)-1; + p->alloc = alloc; + p->allocBig = allocBig; + return (CXzEncHandle)p; +} + +// #define GET_CXzEnc_p CXzEnc *p = (CXzEnc *)(void *)pp; + +void XzEnc_Destroy(CXzEncHandle p) +{ + // GET_CXzEnc_p + XzEnc_Free(p, p->alloc); + ISzAlloc_Free(p->alloc, p); +} + + +SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props) +{ + // GET_CXzEnc_p + p->xzProps = *props; + XzProps_Normalize(&p->xzProps); + return SZ_OK; +} + + +void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize) +{ + // GET_CXzEnc_p + p->expectedDataSize = expectedDataSiize; +} + + + + +#ifndef Z7_ST + +static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex, + const Byte *src, size_t srcSize, int finished) +{ + CXzEnc *me = (CXzEnc *)pp; + SRes res; + CMtProgressThunk progressThunk; + Byte *dest; + UNUSED_VAR(finished) + { + CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; + bInfo->totalSize = 0; + bInfo->unpackSize = 0; + bInfo->headerSize = 0; + // v23.02: we don't compress empty blocks + // also we must ignore that empty block in XzEnc_MtCallback_Write() + if (srcSize == 0) + return SZ_OK; + } + dest = me->outBufs[outBufIndex]; + if (!dest) + { + dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize); + if (!dest) + return SZ_ERROR_MEM; + me->outBufs[outBufIndex] = dest; + } + + MtProgressThunk_CreateVTable(&progressThunk); + progressThunk.mtProgress = &me->mtCoder.mtProgress; + MtProgressThunk_INIT(&progressThunk) + + { + CXzEncBlockInfo blockSizes; + int inStreamFinished; + + res = Xz_CompressBlock( + &me->lzmaf_Items[coderIndex], + + NULL, + dest, + dest + XZ_BLOCK_HEADER_SIZE_MAX, me->outBufSize - XZ_BLOCK_HEADER_SIZE_MAX, + + NULL, + // srcSize, // expectedSize + src, srcSize, + + &me->xzProps, + &progressThunk.vt, + &inStreamFinished, + &blockSizes, + me->alloc, + me->allocBig); + + if (res == SZ_OK) + me->EncBlocks[outBufIndex] = blockSizes; + + return res; + } +} + + +static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex) +{ + CXzEnc *me = (CXzEnc *)pp; + const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; + // v23.02: we don't write empty blocks + // note: if (bInfo->unpackSize == 0) then there is no compressed data of block + if (bInfo->unpackSize == 0) + return SZ_OK; + { + const Byte *data = me->outBufs[outBufIndex]; + RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)) + { + const UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); + RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)) + } + return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); + } +} + +#endif + + + +SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress) +{ + // GET_CXzEnc_p + + const CXzProps *props = &p->xzProps; + + XzEncIndex_Init(&p->xzIndex); + { + UInt64 numBlocks = 1; + UInt64 blockSize = props->blockSize; + + if (blockSize != XZ_PROPS_BLOCK_SIZE_SOLID + && props->reduceSize != (UInt64)(Int64)-1) + { + numBlocks = props->reduceSize / blockSize; + if (numBlocks * blockSize != props->reduceSize) + numBlocks++; + } + else + blockSize = (UInt64)1 << 62; + + RINOK(XzEncIndex_PreAlloc(&p->xzIndex, numBlocks, blockSize, XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(blockSize), p->alloc)) + } + + RINOK(Xz_WriteHeader((CXzStreamFlags)props->checkId, outStream)) + + + #ifndef Z7_ST + if (props->numBlockThreads_Reduced > 1) + { + IMtCoderCallback2 vt; + + if (!p->mtCoder_WasConstructed) + { + p->mtCoder_WasConstructed = True; + MtCoder_Construct(&p->mtCoder); + } + + vt.Code = XzEnc_MtCallback_Code; + vt.Write = XzEnc_MtCallback_Write; + + p->checkType = props->checkId; + p->xzProps = *props; + + p->outStream = outStream; + + p->mtCoder.allocBig = p->allocBig; + p->mtCoder.progress = progress; + p->mtCoder.inStream = inStream; + p->mtCoder.inData = NULL; + p->mtCoder.inDataSize = 0; + p->mtCoder.mtCallback = &vt; + p->mtCoder.mtCallbackObject = p; + + if ( props->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID + || props->blockSize == XZ_PROPS_BLOCK_SIZE_AUTO) + return SZ_ERROR_FAIL; + + p->mtCoder.blockSize = (size_t)props->blockSize; + if (p->mtCoder.blockSize != props->blockSize) + return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */ + + { + size_t destBlockSize = XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(p->mtCoder.blockSize); + if (destBlockSize < p->mtCoder.blockSize) + return SZ_ERROR_PARAM; + if (p->outBufSize != destBlockSize) + XzEnc_FreeOutBufs(p); + p->outBufSize = destBlockSize; + } + + p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; + p->mtCoder.expectedDataSize = p->expectedDataSize; + + RINOK(MtCoder_Code(&p->mtCoder)) + } + else + #endif + { + int writeStartSizes; + CCompressProgress_XzEncOffset progress2; + Byte *bufData = NULL; + size_t bufSize = 0; + + progress2.vt.Progress = CompressProgress_XzEncOffset_Progress; + progress2.inOffset = 0; + progress2.outOffset = 0; + progress2.progress = progress; + + writeStartSizes = 0; + + if (props->blockSize != XZ_PROPS_BLOCK_SIZE_SOLID) + { + writeStartSizes = (props->forceWriteSizesInHeader > 0); + + if (writeStartSizes) + { + size_t t2; + size_t t = (size_t)props->blockSize; + if (t != props->blockSize) + return SZ_ERROR_PARAM; + t = XZ_GET_MAX_BLOCK_PACK_SIZE(t); + if (t < props->blockSize) + return SZ_ERROR_PARAM; + t2 = XZ_BLOCK_HEADER_SIZE_MAX + t; + if (!p->outBufs[0] || t2 != p->outBufSize) + { + XzEnc_FreeOutBufs(p); + p->outBufs[0] = (Byte *)ISzAlloc_Alloc(p->alloc, t2); + if (!p->outBufs[0]) + return SZ_ERROR_MEM; + p->outBufSize = t2; + } + bufData = p->outBufs[0] + XZ_BLOCK_HEADER_SIZE_MAX; + bufSize = t; + } + } + + for (;;) + { + CXzEncBlockInfo blockSizes; + int inStreamFinished; + + /* + UInt64 rem = (UInt64)(Int64)-1; + if (props->reduceSize != (UInt64)(Int64)-1 + && props->reduceSize >= progress2.inOffset) + rem = props->reduceSize - progress2.inOffset; + */ + + blockSizes.headerSize = 0; // for GCC + + RINOK(Xz_CompressBlock( + &p->lzmaf_Items[0], + + writeStartSizes ? NULL : outStream, + writeStartSizes ? p->outBufs[0] : NULL, + bufData, bufSize, + + inStream, + // rem, + NULL, 0, + + props, + progress ? &progress2.vt : NULL, + &inStreamFinished, + &blockSizes, + p->alloc, + p->allocBig)) + + { + UInt64 totalPackFull = blockSizes.totalSize + XZ_GET_PAD_SIZE(blockSizes.totalSize); + + if (writeStartSizes) + { + RINOK(WriteBytes(outStream, p->outBufs[0], blockSizes.headerSize)) + RINOK(WriteBytes(outStream, bufData, (size_t)totalPackFull - blockSizes.headerSize)) + } + + RINOK(XzEncIndex_AddIndexRecord(&p->xzIndex, blockSizes.unpackSize, blockSizes.totalSize, p->alloc)) + + progress2.inOffset += blockSizes.unpackSize; + progress2.outOffset += totalPackFull; + } + + if (inStreamFinished) + break; + } + } + + return XzEncIndex_WriteFooter(&p->xzIndex, (CXzStreamFlags)props->checkId, outStream); +} + + +#include "Alloc.h" + +SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + const CXzProps *props, ICompressProgressPtr progress) +{ + SRes res; + CXzEncHandle xz = XzEnc_Create(&g_Alloc, &g_BigAlloc); + if (!xz) + return SZ_ERROR_MEM; + res = XzEnc_SetProps(xz, props); + if (res == SZ_OK) + res = XzEnc_Encode(xz, outStream, inStream, progress); + XzEnc_Destroy(xz); + return res; +} + + +SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream) +{ + SRes res; + CXzEncIndex xzIndex; + XzEncIndex_Construct(&xzIndex); + res = Xz_WriteHeader((CXzStreamFlags)0, outStream); + if (res == SZ_OK) + res = XzEncIndex_WriteFooter(&xzIndex, (CXzStreamFlags)0, outStream); + XzEncIndex_Free(&xzIndex, NULL); // g_Alloc + return res; +} diff --git a/crnlib/lzma/XzEnc.h b/crnlib/lzma/XzEnc.h new file mode 100644 index 00000000..31026f7e --- /dev/null +++ b/crnlib/lzma/XzEnc.h @@ -0,0 +1,61 @@ +/* XzEnc.h -- Xz Encode +2023-04-13 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_XZ_ENC_H +#define ZIP7_INC_XZ_ENC_H + +#include "Lzma2Enc.h" + +#include "Xz.h" + +EXTERN_C_BEGIN + + +#define XZ_PROPS_BLOCK_SIZE_AUTO LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO +#define XZ_PROPS_BLOCK_SIZE_SOLID LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID + + +typedef struct +{ + UInt32 id; + UInt32 delta; + UInt32 ip; + int ipDefined; +} CXzFilterProps; + +void XzFilterProps_Init(CXzFilterProps *p); + + +typedef struct +{ + CLzma2EncProps lzma2Props; + CXzFilterProps filterProps; + unsigned checkId; + UInt64 blockSize; + int numBlockThreads_Reduced; + int numBlockThreads_Max; + int numTotalThreads; + int forceWriteSizesInHeader; + UInt64 reduceSize; +} CXzProps; + +void XzProps_Init(CXzProps *p); + +typedef struct CXzEnc CXzEnc; +typedef CXzEnc * CXzEncHandle; +// Z7_DECLARE_HANDLE(CXzEncHandle) + +CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); +void XzEnc_Destroy(CXzEncHandle p); +SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props); +void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize); +SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress); + +SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + const CXzProps *props, ICompressProgressPtr progress); + +SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream); + +EXTERN_C_END + +#endif diff --git a/crnlib/lzma/XzIn.c b/crnlib/lzma/XzIn.c new file mode 100644 index 00000000..49470a72 --- /dev/null +++ b/crnlib/lzma/XzIn.c @@ -0,0 +1,340 @@ +/* XzIn.c - Xz input +2023-09-07 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "7zCrc.h" +#include "CpuArch.h" +#include "Xz.h" + +/* +#define XZ_FOOTER_SIG_CHECK(p) (memcmp((p), XZ_FOOTER_SIG, XZ_FOOTER_SIG_SIZE) == 0) +*/ +#define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1) + + +SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream) +{ + Byte sig[XZ_STREAM_HEADER_SIZE]; + size_t processedSize = XZ_STREAM_HEADER_SIZE; + RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize)) + if (processedSize != XZ_STREAM_HEADER_SIZE + || memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) + return SZ_ERROR_NO_ARCHIVE; + return Xz_ParseHeader(p, sig); +} + +#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ + { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ + if (s == 0) return SZ_ERROR_ARCHIVE; \ + pos += s; } + +SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes) +{ + Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; + unsigned headerSize; + *headerSizeRes = 0; + RINOK(SeqInStream_ReadByte(inStream, &header[0])) + headerSize = header[0]; + if (headerSize == 0) + { + *headerSizeRes = 1; + *isIndex = True; + return SZ_OK; + } + + *isIndex = False; + headerSize = (headerSize << 2) + 4; + *headerSizeRes = (UInt32)headerSize; + { + size_t processedSize = headerSize - 1; + RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize)) + if (processedSize != headerSize - 1) + return SZ_ERROR_INPUT_EOF; + } + return XzBlock_Parse(p, header); +} + +#define ADD_SIZE_CHECK(size, val) \ + { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } + +UInt64 Xz_GetUnpackSize(const CXzStream *p) +{ + UInt64 size = 0; + size_t i; + for (i = 0; i < p->numBlocks; i++) + { + ADD_SIZE_CHECK(size, p->blocks[i].unpackSize) + } + return size; +} + +UInt64 Xz_GetPackSize(const CXzStream *p) +{ + UInt64 size = 0; + size_t i; + for (i = 0; i < p->numBlocks; i++) + { + ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3) + } + return size; +} + +/* +SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream) +{ + return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f)); +} +*/ + +static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc) +{ + size_t numBlocks, pos = 1; + UInt32 crc; + + if (size < 5 || buf[0] != 0) + return SZ_ERROR_ARCHIVE; + + size -= 4; + crc = CrcCalc(buf, size); + if (crc != GetUi32(buf + size)) + return SZ_ERROR_ARCHIVE; + + { + UInt64 numBlocks64; + READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64) + numBlocks = (size_t)numBlocks64; + if (numBlocks != numBlocks64 || numBlocks * 2 > size) + return SZ_ERROR_ARCHIVE; + } + + Xz_Free(p, alloc); + if (numBlocks != 0) + { + size_t i; + p->numBlocks = numBlocks; + p->blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks); + if (!p->blocks) + return SZ_ERROR_MEM; + for (i = 0; i < numBlocks; i++) + { + CXzBlockSizes *block = &p->blocks[i]; + READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize) + READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize) + if (block->totalSize == 0) + return SZ_ERROR_ARCHIVE; + } + } + while ((pos & 3) != 0) + if (buf[pos++] != 0) + return SZ_ERROR_ARCHIVE; + return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; +} + +static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc) +{ + SRes res; + size_t size; + Byte *buf; + if (indexSize > ((UInt32)1 << 31)) + return SZ_ERROR_UNSUPPORTED; + size = (size_t)indexSize; + if (size != indexSize) + return SZ_ERROR_UNSUPPORTED; + buf = (Byte *)ISzAlloc_Alloc(alloc, size); + if (!buf) + return SZ_ERROR_MEM; + res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED); + if (res == SZ_OK) + res = Xz_ReadIndex2(p, buf, size, alloc); + ISzAlloc_Free(alloc, buf); + return res; +} + +static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size) +{ + RINOK(LookInStream_SeekTo(stream, offset)) + return LookInStream_Read(stream, buf, size); + /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */ +} + +static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc) +{ + UInt64 indexSize; + Byte buf[XZ_STREAM_FOOTER_SIZE]; + UInt64 pos = (UInt64)*startOffset; + + if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE) + return SZ_ERROR_NO_ARCHIVE; + + pos -= XZ_STREAM_FOOTER_SIZE; + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) + + if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + { + UInt32 total = 0; + pos += XZ_STREAM_FOOTER_SIZE; + + for (;;) + { + size_t i; + #define TEMP_BUF_SIZE (1 << 10) + Byte temp[TEMP_BUF_SIZE]; + + i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos; + pos -= i; + RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)) + total += (UInt32)i; + for (; i != 0; i--) + if (temp[i - 1] != 0) + break; + if (i != 0) + { + if ((i & 3) != 0) + return SZ_ERROR_NO_ARCHIVE; + pos += i; + break; + } + if (pos < XZ_STREAM_FOOTER_SIZE || total > (1 << 16)) + return SZ_ERROR_NO_ARCHIVE; + } + + if (pos < XZ_STREAM_FOOTER_SIZE) + return SZ_ERROR_NO_ARCHIVE; + pos -= XZ_STREAM_FOOTER_SIZE; + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) + if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + return SZ_ERROR_NO_ARCHIVE; + } + + p->flags = (CXzStreamFlags)GetBe16(buf + 8); + + if (!XzFlags_IsSupported(p->flags)) + return SZ_ERROR_UNSUPPORTED; + + { + /* to eliminate GCC 6.3 warning: + dereferencing type-punned pointer will break strict-aliasing rules */ + const Byte *buf_ptr = buf; + if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6)) + return SZ_ERROR_ARCHIVE; + } + + indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2; + + if (pos < indexSize) + return SZ_ERROR_ARCHIVE; + + pos -= indexSize; + RINOK(LookInStream_SeekTo(stream, pos)) + RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) + + { + UInt64 totalSize = Xz_GetPackSize(p); + if (totalSize == XZ_SIZE_OVERFLOW + || totalSize >= ((UInt64)1 << 63) + || pos < totalSize + XZ_STREAM_HEADER_SIZE) + return SZ_ERROR_ARCHIVE; + pos -= (totalSize + XZ_STREAM_HEADER_SIZE); + RINOK(LookInStream_SeekTo(stream, pos)) + *startOffset = (Int64)pos; + } + { + CXzStreamFlags headerFlags; + CSecToRead secToRead; + SecToRead_CreateVTable(&secToRead); + secToRead.realStream = stream; + + RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)) + return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE; + } +} + + +/* ---------- Xz Streams ---------- */ + +void Xzs_Construct(CXzs *p) +{ + p->num = p->numAllocated = 0; + p->streams = 0; +} + +void Xzs_Free(CXzs *p, ISzAllocPtr alloc) +{ + size_t i; + for (i = 0; i < p->num; i++) + Xz_Free(&p->streams[i], alloc); + ISzAlloc_Free(alloc, p->streams); + p->num = p->numAllocated = 0; + p->streams = 0; +} + +UInt64 Xzs_GetNumBlocks(const CXzs *p) +{ + UInt64 num = 0; + size_t i; + for (i = 0; i < p->num; i++) + num += p->streams[i].numBlocks; + return num; +} + +UInt64 Xzs_GetUnpackSize(const CXzs *p) +{ + UInt64 size = 0; + size_t i; + for (i = 0; i < p->num; i++) + { + ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i])) + } + return size; +} + +/* +UInt64 Xzs_GetPackSize(const CXzs *p) +{ + UInt64 size = 0; + size_t i; + for (i = 0; i < p->num; i++) + { + ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i])) + } + return size; +} +*/ + +SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc) +{ + Int64 endOffset = 0; + RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)) + *startOffset = endOffset; + for (;;) + { + CXzStream st; + SRes res; + Xz_Construct(&st); + res = Xz_ReadBackward(&st, stream, startOffset, alloc); + st.startOffset = (UInt64)*startOffset; + RINOK(res) + if (p->num == p->numAllocated) + { + const size_t newNum = p->num + p->num / 4 + 1; + void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); + if (!data) + return SZ_ERROR_MEM; + p->numAllocated = newNum; + if (p->num != 0) + memcpy(data, p->streams, p->num * sizeof(CXzStream)); + ISzAlloc_Free(alloc, p->streams); + p->streams = (CXzStream *)data; + } + p->streams[p->num++] = st; + if (*startOffset == 0) + break; + RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) + if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) + return SZ_ERROR_PROGRESS; + } + return SZ_OK; +} diff --git a/crnlib/lzma_7zBuf.cpp b/crnlib/lzma_7zBuf.cpp deleted file mode 100644 index 4051954f..00000000 --- a/crnlib/lzma_7zBuf.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* 7zBuf.c -- Byte Buffer -2008-03-28 -Igor Pavlov -Public domain */ -#include "crn_core.h" - -#include "lzma_7zBuf.h" - -namespace crnlib { - -void Buf_Init(CBuf* p) { - p->data = 0; - p->size = 0; -} - -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc) { - p->size = 0; - if (size == 0) { - p->data = 0; - return 1; - } - p->data = (Byte*)alloc->Alloc(alloc, size); - if (p->data != 0) { - p->size = size; - return 1; - } - return 0; -} - -void Buf_Free(CBuf* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->data); - p->data = 0; - p->size = 0; -} -} diff --git a/crnlib/lzma_7zBuf.h b/crnlib/lzma_7zBuf.h deleted file mode 100644 index cf58f34f..00000000 --- a/crnlib/lzma_7zBuf.h +++ /dev/null @@ -1,34 +0,0 @@ -/* 7zBuf.h -- Byte Buffer -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __7Z_BUF_H -#define __7Z_BUF_H - -#include "lzma_Types.h" - -namespace crnlib { - -typedef struct -{ - Byte* data; - size_t size; -} CBuf; - -void Buf_Init(CBuf* p); -int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc); -void Buf_Free(CBuf* p, ISzAlloc* alloc); - -typedef struct -{ - Byte* data; - size_t size; - size_t pos; -} CDynBuf; - -void DynBuf_Construct(CDynBuf* p); -void DynBuf_SeekToBeg(CDynBuf* p); -int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc); -void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc); -} - -#endif diff --git a/crnlib/lzma_7zBuf2.cpp b/crnlib/lzma_7zBuf2.cpp deleted file mode 100644 index f57596d9..00000000 --- a/crnlib/lzma_7zBuf2.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* 7zBuf2.c -- Byte Buffer -2008-10-04 : Igor Pavlov : Public domain */ - -#include "crn_core.h" -#include -#include "lzma_7zBuf.h" - -namespace crnlib { - -void DynBuf_Construct(CDynBuf* p) { - p->data = 0; - p->size = 0; - p->pos = 0; -} - -void DynBuf_SeekToBeg(CDynBuf* p) { - p->pos = 0; -} - -int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc) { - if (size > p->size - p->pos) { - size_t newSize = p->pos + size; - Byte* data; - newSize += newSize / 4; - data = (Byte*)alloc->Alloc(alloc, newSize); - if (data == 0) - return 0; - p->size = newSize; - memcpy(data, p->data, p->pos); - alloc->Free(alloc, p->data); - p->data = data; - } - memcpy(p->data + p->pos, buf, size); - p->pos += size; - return 1; -} - -void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->data); - p->data = 0; - p->size = 0; - p->pos = 0; -} -} diff --git a/crnlib/lzma_7zCrc.cpp b/crnlib/lzma_7zCrc.cpp deleted file mode 100644 index 53411060..00000000 --- a/crnlib/lzma_7zCrc.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* 7zCrc.c -- CRC32 calculation -2008-08-05 -Igor Pavlov -Public domain */ -#include "crn_core.h" - -#include "lzma_7zCrc.h" - -namespace crnlib { - -#define kCrcPoly 0xEDB88320 -UInt32 g_CrcTable[256]; - -void MY_FAST_CALL CrcGenerateTable(void) { - UInt32 i; - for (i = 0; i < 256; i++) { - UInt32 r = i; - int j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - g_CrcTable[i] = r; - } -} - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void* data, size_t size) { - const Byte* p = (const Byte*)data; - for (; size > 0; size--, p++) - v = CRC_UPDATE_BYTE(v, *p); - return v; -} - -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size) { - return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; -} -} diff --git a/crnlib/lzma_7zCrc.h b/crnlib/lzma_7zCrc.h deleted file mode 100644 index dc528de1..00000000 --- a/crnlib/lzma_7zCrc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* 7zCrc.h -- CRC32 calculation -2008-03-13 -Igor Pavlov -Public domain */ - -#ifndef __7Z_CRC_H -#define __7Z_CRC_H - -#include - -#include "lzma_Types.h" - -namespace crnlib { - -extern UInt32 g_CrcTable[]; - -void MY_FAST_CALL CrcGenerateTable(void); - -#define CRC_INIT_VAL 0xFFFFFFFF -#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) -#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) - -UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void* data, size_t size); -UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size); -} - -#endif diff --git a/crnlib/lzma_7zFile.cpp b/crnlib/lzma_7zFile.cpp deleted file mode 100644 index c7eb8458..00000000 --- a/crnlib/lzma_7zFile.cpp +++ /dev/null @@ -1,257 +0,0 @@ -/* 7zFile.c -- File IO -2008-11-22 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_7zFile.h" - -#ifndef USE_WINDOWS_FILE - -#include - -#endif - -#ifdef USE_WINDOWS_FILE - -/* - ReadFile and WriteFile functions in Windows have BUG: - If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) - from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES - (Insufficient system resources exist to complete the requested service). - Probably in some version of Windows there are problems with other sizes: - for 32 MB (maybe also for 16 MB). - And message can be "Network connection was lost" -*/ - -#define kChunkSizeMax (1 << 22) - -#endif - -namespace crnlib { - -void File_Construct(CSzFile* p) { -#ifdef USE_WINDOWS_FILE - p->handle = INVALID_HANDLE_VALUE; -#else - p->file = NULL; -#endif -} - -static WRes File_Open(CSzFile* p, const char* name, int writeMode) { -#ifdef USE_WINDOWS_FILE - p->handle = CreateFileA(name, - writeMode ? GENERIC_WRITE : GENERIC_READ, - FILE_SHARE_READ, NULL, - writeMode ? CREATE_ALWAYS : OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); -#else - p->file = fopen(name, writeMode ? "wb+" : "rb"); - return (p->file != 0) ? 0 : errno; -#endif -} - -WRes InFile_Open(CSzFile* p, const char* name) { - return File_Open(p, name, 0); -} -WRes OutFile_Open(CSzFile* p, const char* name) { - return File_Open(p, name, 1); -} - -WRes File_Close(CSzFile* p) { -#ifdef USE_WINDOWS_FILE - if (p->handle != INVALID_HANDLE_VALUE) { - if (!CloseHandle(p->handle)) - return GetLastError(); - p->handle = INVALID_HANDLE_VALUE; - } -#else - if (p->file != NULL) { - int res = fclose(p->file); - if (res != 0) - return res; - p->file = NULL; - } -#endif - return 0; -} - -WRes File_Read(CSzFile* p, void* data, size_t* size) { - size_t originalSize = *size; - if (originalSize == 0) - return 0; - -#ifdef USE_WINDOWS_FILE - - *size = 0; - do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); - data = (void*)((Byte*)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } while (originalSize > 0); - return 0; - -#else - - *size = fread(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - -#endif -} - -WRes File_Write(CSzFile* p, const void* data, size_t* size) { - size_t originalSize = *size; - if (originalSize == 0) - return 0; - -#ifdef USE_WINDOWS_FILE - - *size = 0; - do { - DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; - DWORD processed = 0; - BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); - data = (void*)((Byte*)data + processed); - originalSize -= processed; - *size += processed; - if (!res) - return GetLastError(); - if (processed == 0) - break; - } while (originalSize > 0); - return 0; - -#else - - *size = fwrite(data, 1, originalSize, p->file); - if (*size == originalSize) - return 0; - return ferror(p->file); - -#endif -} - -WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin) { -#ifdef USE_WINDOWS_FILE - - LARGE_INTEGER value; - DWORD moveMethod; - value.LowPart = (DWORD)*pos; - value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ - switch (origin) { - case SZ_SEEK_SET: - moveMethod = FILE_BEGIN; - break; - case SZ_SEEK_CUR: - moveMethod = FILE_CURRENT; - break; - case SZ_SEEK_END: - moveMethod = FILE_END; - break; - default: - return ERROR_INVALID_PARAMETER; - } - value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); - if (value.LowPart == 0xFFFFFFFF) { - WRes res = GetLastError(); - if (res != NO_ERROR) - return res; - } - *pos = ((Int64)value.HighPart << 32) | value.LowPart; - return 0; - -#else - - int moveMethod; - int res; - switch (origin) { - case SZ_SEEK_SET: - moveMethod = SEEK_SET; - break; - case SZ_SEEK_CUR: - moveMethod = SEEK_CUR; - break; - case SZ_SEEK_END: - moveMethod = SEEK_END; - break; - default: - return 1; - } - res = fseek(p->file, (long)*pos, moveMethod); - *pos = ftell(p->file); - return res; - -#endif -} - -WRes File_GetLength(CSzFile* p, UInt64* length) { -#ifdef USE_WINDOWS_FILE - - DWORD sizeHigh; - DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); - if (sizeLow == 0xFFFFFFFF) { - DWORD res = GetLastError(); - if (res != NO_ERROR) - return res; - } - *length = (((UInt64)sizeHigh) << 32) + sizeLow; - return 0; - -#else - - long pos = ftell(p->file); - int res = fseek(p->file, 0, SEEK_END); - *length = ftell(p->file); - fseek(p->file, pos, SEEK_SET); - return res; - -#endif -} - -/* ---------- FileSeqInStream ---------- */ - -static SRes FileSeqInStream_Read(void* pp, void* buf, size_t* size) { - CFileSeqInStream* p = (CFileSeqInStream*)pp; - return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; -} - -void FileSeqInStream_CreateVTable(CFileSeqInStream* p) { - p->s.Read = FileSeqInStream_Read; -} - -/* ---------- FileInStream ---------- */ - -static SRes FileInStream_Read(void* pp, void* buf, size_t* size) { - CFileInStream* p = (CFileInStream*)pp; - return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; -} - -static SRes FileInStream_Seek(void* pp, Int64* pos, ESzSeek origin) { - CFileInStream* p = (CFileInStream*)pp; - return File_Seek(&p->file, pos, origin); -} - -void FileInStream_CreateVTable(CFileInStream* p) { - p->s.Read = FileInStream_Read; - p->s.Seek = FileInStream_Seek; -} - -/* ---------- FileOutStream ---------- */ - -static size_t FileOutStream_Write(void* pp, const void* data, size_t size) { - CFileOutStream* p = (CFileOutStream*)pp; - File_Write(&p->file, data, &size); - return size; -} - -void FileOutStream_CreateVTable(CFileOutStream* p) { - p->s.Write = FileOutStream_Write; -} -} diff --git a/crnlib/lzma_7zFile.h b/crnlib/lzma_7zFile.h deleted file mode 100644 index 16d10fdc..00000000 --- a/crnlib/lzma_7zFile.h +++ /dev/null @@ -1,73 +0,0 @@ -/* 7zFile.h -- File IO -2008-11-22 : Igor Pavlov : Public domain */ - -#ifndef __7Z_FILE_H -#define __7Z_FILE_H - -#ifdef _WIN32 -#define USE_WINDOWS_FILE -#endif - -#ifdef USE_WINDOWS_FILE -#include -#else -#include -#endif - -#include "lzma_Types.h" - -namespace crnlib { - -/* ---------- File ---------- */ - -typedef struct -{ -#ifdef USE_WINDOWS_FILE - HANDLE handle; -#else - FILE* file; -#endif -} CSzFile; - -void File_Construct(CSzFile* p); -WRes InFile_Open(CSzFile* p, const char* name); -WRes OutFile_Open(CSzFile* p, const char* name); -WRes File_Close(CSzFile* p); - -/* reads max(*size, remain file's size) bytes */ -WRes File_Read(CSzFile* p, void* data, size_t* size); - -/* writes *size bytes */ -WRes File_Write(CSzFile* p, const void* data, size_t* size); - -WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin); -WRes File_GetLength(CSzFile* p, UInt64* length); - -/* ---------- FileInStream ---------- */ - -typedef struct -{ - ISeqInStream s; - CSzFile file; -} CFileSeqInStream; - -void FileSeqInStream_CreateVTable(CFileSeqInStream* p); - -typedef struct -{ - ISeekInStream s; - CSzFile file; -} CFileInStream; - -void FileInStream_CreateVTable(CFileInStream* p); - -typedef struct -{ - ISeqOutStream s; - CSzFile file; -} CFileOutStream; - -void FileOutStream_CreateVTable(CFileOutStream* p); -} - -#endif diff --git a/crnlib/lzma_7zStream.cpp b/crnlib/lzma_7zStream.cpp deleted file mode 100644 index 1fae1efa..00000000 --- a/crnlib/lzma_7zStream.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* 7zStream.c -- 7z Stream functions -2008-11-23 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include - -#include "lzma_Types.h" - -namespace crnlib { - -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType) { - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size) { - return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf) { - size_t processed = 1; - RINOK(stream->Read(stream, buf, &processed)); - return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; -} - -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset) { - Int64 t = offset; - return stream->Seek(stream, &t, SZ_SEEK_SET); -} - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size) { - void* lookBuf; - if (*size == 0) - return SZ_OK; - RINOK(stream->Look(stream, &lookBuf, size)); - memcpy(buf, lookBuf, *size); - return stream->Skip(stream, *size); -} - -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType) { - while (size != 0) { - size_t processed = size; - RINOK(stream->Read(stream, buf, &processed)); - if (processed == 0) - return errorType; - buf = (void*)((Byte*)buf + processed); - size -= processed; - } - return SZ_OK; -} - -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size) { - return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); -} - -static SRes LookToRead_Look_Lookahead(void* pp, void** buf, size_t* size) { - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - size2 = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, &size2); - p->size = size2; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Look_Exact(void* pp, void** buf, size_t* size) { - SRes res = SZ_OK; - CLookToRead* p = (CLookToRead*)pp; - size_t size2 = p->size - p->pos; - if (size2 == 0 && *size > 0) { - p->pos = 0; - if (*size > LookToRead_BUF_SIZE) - *size = LookToRead_BUF_SIZE; - res = p->realStream->Read(p->realStream, p->buf, size); - size2 = p->size = *size; - } - if (size2 < *size) - *size = size2; - *buf = p->buf + p->pos; - return res; -} - -static SRes LookToRead_Skip(void* pp, size_t offset) { - CLookToRead* p = (CLookToRead*)pp; - p->pos += offset; - return SZ_OK; -} - -static SRes LookToRead_Read(void* pp, void* buf, size_t* size) { - CLookToRead* p = (CLookToRead*)pp; - size_t rem = p->size - p->pos; - if (rem == 0) - return p->realStream->Read(p->realStream, buf, size); - if (rem > *size) - rem = *size; - memcpy(buf, p->buf + p->pos, rem); - p->pos += rem; - *size = rem; - return SZ_OK; -} - -static SRes LookToRead_Seek(void* pp, Int64* pos, ESzSeek origin) { - CLookToRead* p = (CLookToRead*)pp; - p->pos = p->size = 0; - return p->realStream->Seek(p->realStream, pos, origin); -} - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead) { - p->s.Look = lookahead ? LookToRead_Look_Lookahead : LookToRead_Look_Exact; - p->s.Skip = LookToRead_Skip; - p->s.Read = LookToRead_Read; - p->s.Seek = LookToRead_Seek; -} - -void LookToRead_Init(CLookToRead* p) { - p->pos = p->size = 0; -} - -static SRes SecToLook_Read(void* pp, void* buf, size_t* size) { - CSecToLook* p = (CSecToLook*)pp; - return LookInStream_LookRead(p->realStream, buf, size); -} - -void SecToLook_CreateVTable(CSecToLook* p) { - p->s.Read = SecToLook_Read; -} - -static SRes SecToRead_Read(void* pp, void* buf, size_t* size) { - CSecToRead* p = (CSecToRead*)pp; - return p->realStream->Read(p->realStream, buf, size); -} - -void SecToRead_CreateVTable(CSecToRead* p) { - p->s.Read = SecToRead_Read; -} -} diff --git a/crnlib/lzma_7zVersion.h b/crnlib/lzma_7zVersion.h deleted file mode 100644 index 595dec5f..00000000 --- a/crnlib/lzma_7zVersion.h +++ /dev/null @@ -1,7 +0,0 @@ -#define MY_VER_MAJOR 4 -#define MY_VER_MINOR 63 -#define MY_VER_BUILD 0 -#define MY_VERSION "4.63" -#define MY_DATE "2008-12-31" -#define MY_COPYRIGHT ": Igor Pavlov : Public domain" -#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE diff --git a/crnlib/lzma_Alloc.cpp b/crnlib/lzma_Alloc.cpp deleted file mode 100644 index 5de1f4f3..00000000 --- a/crnlib/lzma_Alloc.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* Alloc.c -- Memory allocation functions -2008-09-24 -Igor Pavlov -Public domain */ -#include "crn_core.h" -#ifdef _WIN32 -#include -#endif -#include - -#include "lzma_Alloc.h" - -namespace crnlib { - -/* #define _SZ_ALLOC_DEBUG */ - -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG -#include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; -#endif - -void* MyAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - { - void* p = crnlib::crnlib_malloc(size); - fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); - return p; - } -#else - return crnlib::crnlib_malloc(size); -#endif -} - -void MyFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); -#endif - crnlib::crnlib_free(address); -} - -#ifdef _WIN32 - -void* MidAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); -#endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void MidFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); -#endif - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES -#endif - -#ifdef _7ZIP_LARGE_PAGES -SIZE_T g_LargePageSize = 0; -typedef SIZE_T(WINAPI* GetLargePageMinimumP)(); -#endif - -void SetLargePageSize() { -#ifdef _7ZIP_LARGE_PAGES - SIZE_T size = 0; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (largePageMinimum == 0) - return; - size = largePageMinimum(); - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; -#endif -} - -void* BigAlloc(size_t size) { - if (size == 0) - return 0; -#ifdef _SZ_ALLOC_DEBUG - fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); -#endif - -#ifdef _7ZIP_LARGE_PAGES - if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { - void* res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), - MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res != 0) - return res; - } -#endif - return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); -} - -void BigFree(void* address) { -#ifdef _SZ_ALLOC_DEBUG - if (address != 0) - fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); -#endif - - if (address == 0) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#endif -} diff --git a/crnlib/lzma_Alloc.h b/crnlib/lzma_Alloc.h deleted file mode 100644 index 296813de..00000000 --- a/crnlib/lzma_Alloc.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Alloc.h -- Memory allocation functions -2008-03-13 -Igor Pavlov -Public domain */ - -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H - -#include - -namespace crnlib { - -void* MyAlloc(size_t size); -void MyFree(void* address); - -#ifdef _WIN32 - -void SetLargePageSize(); - -void* MidAlloc(size_t size); -void MidFree(void* address); -void* BigAlloc(size_t size); -void BigFree(void* address); - -#else - -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) - -#endif -} - -#endif diff --git a/crnlib/lzma_Bcj2.cpp b/crnlib/lzma_Bcj2.cpp deleted file mode 100644 index e1d84771..00000000 --- a/crnlib/lzma_Bcj2.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Bcj2.c -- Converter for x86 code (BCJ2) -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_Bcj2.h" - -namespace crnlib { - -#ifdef _LZMA_PROB32 -#define CProb UInt32 -#else -#define CProb UInt16 -#endif - -#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1)&0xF0) == 0x80) -#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_READ_BYTE (*buffer++) -#define RC_TEST \ - { \ - if (buffer == bufferLim) \ - return SZ_ERROR_DATA; \ - } -#define RC_INIT2 \ - code = 0; \ - range = 0xFFFFFFFF; \ - { \ - int i; \ - for (i = 0; i < 5; i++) { \ - RC_TEST; \ - code = (code << 8) | RC_READ_BYTE; \ - } \ - } - -#define NORMALIZE \ - if (range < kTopValue) { \ - RC_TEST; \ - range <<= 8; \ - code = (code << 8) | RC_READ_BYTE; \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); \ - NORMALIZE; -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); \ - NORMALIZE; - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize) { - CProb p[256 + 2]; - SizeT inPos = 0, outPos = 0; - - const Byte *buffer, *bufferLim; - UInt32 range, code; - Byte prevByte = 0; - - unsigned int i; - for (i = 0; i < sizeof(p) / sizeof(p[0]); i++) - p[i] = kBitModelTotal >> 1; - - buffer = buf3; - bufferLim = buffer + size3; - RC_INIT2 - - if (outSize == 0) - return SZ_OK; - - for (;;) { - Byte b; - CProb* prob; - UInt32 bound; - UInt32 ttt; - - SizeT limit = size0 - inPos; - if (outSize - outPos < limit) - limit = outSize - outPos; - while (limit != 0) { - Byte b = buf0[inPos]; - outBuf[outPos++] = b; - if (IsJ(prevByte, b)) - break; - inPos++; - prevByte = b; - limit--; - } - - if (limit == 0 || outPos == outSize) - break; - - b = buf0[inPos++]; - - if (b == 0xE8) - prob = p + prevByte; - else if (b == 0xE9) - prob = p + 256; - else - prob = p + 257; - - IF_BIT_0(prob) { - UPDATE_0(prob) - prevByte = b; - } - else { - UInt32 dest; - const Byte* v; - UPDATE_1(prob) - if (b == 0xE8) { - v = buf1; - if (size1 < 4) - return SZ_ERROR_DATA; - buf1 += 4; - size1 -= 4; - } else { - v = buf2; - if (size2 < 4) - return SZ_ERROR_DATA; - buf2 += 4; - size2 -= 4; - } - dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | - ((UInt32)v[2] << 8) | ((UInt32)v[3])) - - ((UInt32)outPos + 4); - outBuf[outPos++] = (Byte)dest; - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 8); - if (outPos == outSize) - break; - outBuf[outPos++] = (Byte)(dest >> 16); - if (outPos == outSize) - break; - outBuf[outPos++] = prevByte = (Byte)(dest >> 24); - } - } - return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; -} -} diff --git a/crnlib/lzma_Bcj2.h b/crnlib/lzma_Bcj2.h deleted file mode 100644 index 760ff9ed..00000000 --- a/crnlib/lzma_Bcj2.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Bcj2.h -- Converter for x86 code (BCJ2) -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __BCJ2_H -#define __BCJ2_H - -#include "lzma_Types.h" - -namespace crnlib { - -/* -Conditions: - outSize <= FullOutputSize, - where FullOutputSize is full size of output stream of x86_2 filter. - -If buf0 overlaps outBuf, there are two required conditions: - 1) (buf0 >= outBuf) - 2) (buf0 + size0 >= outBuf + FullOutputSize). - -Returns: - SZ_OK - SZ_ERROR_DATA - Data error -*/ - -int Bcj2_Decode( - const Byte* buf0, SizeT size0, - const Byte* buf1, SizeT size1, - const Byte* buf2, SizeT size2, - const Byte* buf3, SizeT size3, - Byte* outBuf, SizeT outSize); -} - -#endif diff --git a/crnlib/lzma_Bra.cpp b/crnlib/lzma_Bra.cpp deleted file mode 100644 index b19b5005..00000000 --- a/crnlib/lzma_Bra.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Bra.c -- Converters for RISC code -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_Bra.h" - -namespace crnlib { - -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 8; - for (i = 0; i <= size; i += 4) { - if (data[i + 3] == 0xEB) { - UInt32 dest; - UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); - src <<= 2; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 2; - data[i + 2] = (Byte)(dest >> 16); - data[i + 1] = (Byte)(dest >> 8); - data[i + 0] = (Byte)dest; - } - } - return i; -} - -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - ip += 4; - for (i = 0; i <= size; i += 2) { - if ((data[i + 1] & 0xF8) == 0xF0 && - (data[i + 3] & 0xF8) == 0xF8) { - UInt32 dest; - UInt32 src = - (((UInt32)data[i + 1] & 0x7) << 19) | - ((UInt32)data[i + 0] << 11) | - (((UInt32)data[i + 3] & 0x7) << 8) | - (data[i + 2]); - - src <<= 1; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - dest >>= 1; - - data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); - data[i + 0] = (Byte)(dest >> 11); - data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); - data[i + 2] = (Byte)dest; - i += 2; - } - } - return i; -} - -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) { - UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | - ((UInt32)data[i + 1] << 16) | - ((UInt32)data[i + 2] << 8) | - ((UInt32)data[i + 3] & (~3)); - - UInt32 dest; - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] &= 0x3; - data[i + 3] |= dest; - } - } - return i; -} - -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - UInt32 i; - if (size < 4) - return 0; - size -= 4; - for (i = 0; i <= size; i += 4) { - if (((data[i] == 0x40) && ((data[i + 1] & 0xC0) == 0x00)) || - ((data[i] == 0x7F) && ((data[i + 1] & 0xC0) == 0xC0))) { - UInt32 src = - ((UInt32)data[i + 0] << 24) | - ((UInt32)data[i + 1] << 16) | - ((UInt32)data[i + 2] << 8) | - ((UInt32)data[i + 3]); - UInt32 dest; - - src <<= 2; - if (encoding) - dest = ip + i + src; - else - dest = src - (ip + i); - dest >>= 2; - - dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; - - data[i + 0] = (Byte)(dest >> 24); - data[i + 1] = (Byte)(dest >> 16); - data[i + 2] = (Byte)(dest >> 8); - data[i + 3] = (Byte)dest; - } - } - return i; -} -} diff --git a/crnlib/lzma_Bra.h b/crnlib/lzma_Bra.h deleted file mode 100644 index 88c5db21..00000000 --- a/crnlib/lzma_Bra.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Bra.h -- Branch converters for executables -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __BRA_H -#define __BRA_H - -#include "lzma_Types.h" - -namespace crnlib { - -/* -These functions convert relative addresses to absolute addresses -in CALL instructions to increase the compression ratio. - - In: - data - data buffer - size - size of data - ip - current virtual Instruction Pinter (IP) value - state - state variable for x86 converter - encoding - 0 (for decoding), 1 (for encoding) - - Out: - state - state variable for x86 converter - - Returns: - The number of processed bytes. If you call these functions with multiple calls, - you must start next call with first byte after block of processed bytes. - - Type Endian Alignment LookAhead - - x86 little 1 4 - ARMT little 2 2 - ARM little 4 0 - PPC big 4 0 - SPARC big 4 0 - IA64 little 16 0 - - size must be >= Alignment + LookAhead, if it's not last block. - If (size < Alignment + LookAhead), converter returns 0. - - Example: - - UInt32 ip = 0; - for () - { - ; size must be >= Alignment + LookAhead, if it's not last block - SizeT processed = Convert(data, size, ip, 1); - data += processed; - size -= processed; - ip += processed; - } -*/ - -#define x86_Convert_Init(state) \ - { state = 0; } -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding); -SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); -} - -#endif diff --git a/crnlib/lzma_Bra86.cpp b/crnlib/lzma_Bra86.cpp deleted file mode 100644 index 96a87f6f..00000000 --- a/crnlib/lzma_Bra86.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/* Bra86.c -- Converter for x86 code (BCJ) -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_Bra.h" - -namespace crnlib { - -#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) - -const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; -const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; - -SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding) { - SizeT bufferPos = 0, prevPosT; - UInt32 prevMask = *state & 0x7; - if (size < 5) - return 0; - ip += 5; - prevPosT = (SizeT)0 - 1; - - for (;;) { - Byte* p = data + bufferPos; - Byte* limit = data + size - 4; - for (; p < limit; p++) - if ((*p & 0xFE) == 0xE8) - break; - bufferPos = (SizeT)(p - data); - if (p >= limit) - break; - prevPosT = bufferPos - prevPosT; - if (prevPosT > 3) - prevMask = 0; - else { - prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; - if (prevMask != 0) { - Byte b = p[4 - kMaskToBitNumber[prevMask]]; - if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) { - prevPosT = bufferPos; - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - continue; - } - } - } - prevPosT = bufferPos; - - if (Test86MSByte(p[4])) { - UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); - UInt32 dest; - for (;;) { - Byte b; - int index; - if (encoding) - dest = (ip + (UInt32)bufferPos) + src; - else - dest = src - (ip + (UInt32)bufferPos); - if (prevMask == 0) - break; - index = kMaskToBitNumber[prevMask] * 8; - b = (Byte)(dest >> (24 - index)); - if (!Test86MSByte(b)) - break; - src = dest ^ ((1 << (32 - index)) - 1); - } - p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); - p[3] = (Byte)(dest >> 16); - p[2] = (Byte)(dest >> 8); - p[1] = (Byte)dest; - bufferPos += 5; - } else { - prevMask = ((prevMask << 1) & 0x7) | 1; - bufferPos++; - } - } - prevPosT = bufferPos - prevPosT; - *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); - return bufferPos; -} -} diff --git a/crnlib/lzma_BraIA64.cpp b/crnlib/lzma_BraIA64.cpp deleted file mode 100644 index f2ca07a7..00000000 --- a/crnlib/lzma_BraIA64.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* BraIA64.c -- Converter for IA-64 code -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_Bra.h" - -namespace crnlib { - -static const Byte kBranchTable[32] = - { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 6, 6, 0, 0, 7, 7, - 4, 4, 0, 0, 4, 4, 0, 0}; - -SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { - SizeT i; - if (size < 16) - return 0; - size -= 16; - for (i = 0; i <= size; i += 16) { - UInt32 instrTemplate = data[i] & 0x1F; - UInt32 mask = kBranchTable[instrTemplate]; - UInt32 bitPos = 5; - int slot; - for (slot = 0; slot < 3; slot++, bitPos += 41) { - UInt32 bytePos, bitRes; - UInt64 instruction, instNorm; - int j; - if (((mask >> slot) & 1) == 0) - continue; - bytePos = (bitPos >> 3); - bitRes = bitPos & 0x7; - instruction = 0; - for (j = 0; j < 6; j++) - instruction += (UInt64)data[i + j + bytePos] << (8 * j); - - instNorm = instruction >> bitRes; - if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0) { - UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF); - UInt32 dest; - src |= ((UInt32)(instNorm >> 36) & 1) << 20; - - src <<= 4; - - if (encoding) - dest = ip + (UInt32)i + src; - else - dest = src - (ip + (UInt32)i); - - dest >>= 4; - - instNorm &= ~((UInt64)(0x8FFFFF) << 13); - instNorm |= ((UInt64)(dest & 0xFFFFF) << 13); - instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20)); - - instruction &= (1 << bitRes) - 1; - instruction |= (instNorm << bitRes); - for (j = 0; j < 6; j++) - data[i + j + bytePos] = (Byte)(instruction >> (8 * j)); - } - } - } - return i; -} -} diff --git a/crnlib/lzma_CpuArch.h b/crnlib/lzma_CpuArch.h deleted file mode 100644 index d993f40e..00000000 --- a/crnlib/lzma_CpuArch.h +++ /dev/null @@ -1,72 +0,0 @@ -/* CpuArch.h -2008-08-05 -Igor Pavlov -Public domain */ - -#ifndef __CPUARCH_H -#define __CPUARCH_H - -/* -LITTLE_ENDIAN_UNALIGN means: - 1) CPU is LITTLE_ENDIAN - 2) it's allowed to make unaligned memory accesses -if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know -about these properties of platform. -*/ - -#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__) -#define LITTLE_ENDIAN_UNALIGN -#endif - -#ifdef LITTLE_ENDIAN_UNALIGN - -#define GetUi16(p) (*(const UInt16*)(p)) -#define GetUi32(p) (*(const UInt32*)(p)) -#define GetUi64(p) (*(const UInt64*)(p)) -#define SetUi32(p, d) *(UInt32*)(p) = (d); - -#else - -#define GetUi16(p) (((const Byte*)(p))[0] | ((UInt16)((const Byte*)(p))[1] << 8)) - -#define GetUi32(p) ( \ - ((const Byte*)(p))[0] | \ - ((UInt32)((const Byte*)(p))[1] << 8) | \ - ((UInt32)((const Byte*)(p))[2] << 16) | \ - ((UInt32)((const Byte*)(p))[3] << 24)) - -#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte*)(p)) + 4) << 32)) - -#define SetUi32(p, d) \ - { \ - UInt32 _x_ = (d); \ - ((Byte*)(p))[0] = (Byte)_x_; \ - ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ - ((Byte*)(p))[2] = (Byte)(_x_ >> 16); \ - ((Byte*)(p))[3] = (Byte)(_x_ >> 24); \ - } - -#endif - -#if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) - -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) -#define GetBe32(p) _byteswap_ulong(*(const UInt32*)(const Byte*)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64*)(const Byte*)(p)) - -#else - -#define GetBe32(p) ( \ - ((UInt32)((const Byte*)(p))[0] << 24) | \ - ((UInt32)((const Byte*)(p))[1] << 16) | \ - ((UInt32)((const Byte*)(p))[2] << 8) | \ - ((const Byte*)(p))[3]) - -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte*)(p)) + 4)) - -#endif - -#define GetBe16(p) (((UInt16)((const Byte*)(p))[0] << 8) | ((const Byte*)(p))[1]) - -#endif diff --git a/crnlib/lzma_LzFind.cpp b/crnlib/lzma_LzFind.cpp deleted file mode 100644 index d3a8736d..00000000 --- a/crnlib/lzma_LzFind.cpp +++ /dev/null @@ -1,682 +0,0 @@ -/* LzFind.c -- Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include - -#include "lzma_LzFind.h" -#include "lzma_LzHash.h" - -namespace crnlib { - -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)3 << 30) - -#define kStartMaxLen 3 - -static void LzInWindow_Free(CMatchFinder* p, ISzAlloc* alloc) { - if (!p->directInput) { - alloc->Free(alloc, p->bufferBase); - p->bufferBase = 0; - } -} - -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ - -static int LzInWindow_Create(CMatchFinder* p, UInt32 keepSizeReserv, ISzAlloc* alloc) { - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) { - p->blockSize = blockSize; - return 1; - } - if (p->bufferBase == 0 || p->blockSize != blockSize) { - LzInWindow_Free(p, alloc); - p->blockSize = blockSize; - p->bufferBase = (Byte*)alloc->Alloc(alloc, (size_t)blockSize); - } - return (p->bufferBase != 0); -} - -Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p) { - return p->buffer; -} -Byte MatchFinder_GetIndexByte(CMatchFinder* p, Int32 index) { - return p->buffer[index]; -} - -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder* p) { - return p->streamPos - p->pos; -} - -void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue) { - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} - -static void MatchFinder_ReadBlock(CMatchFinder* p) { - if (p->streamEndWasReached || p->result != SZ_OK) - return; - for (;;) { - Byte* dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); - if (size == 0) - return; - p->result = p->stream->Read(p->stream, dest, &size); - if (p->result != SZ_OK) - return; - if (size == 0) { - p->streamEndWasReached = 1; - return; - } - p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) - return; - } -} - -void MatchFinder_MoveBlock(CMatchFinder* p) { - memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); - p->buffer = p->bufferBase + p->keepSizeBefore; -} - -int MatchFinder_NeedMove(CMatchFinder* p) { - /* if (p->streamEndWasReached) return 0; */ - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); -} - -void MatchFinder_ReadIfRequired(CMatchFinder* p) { - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder* p) { - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_SetDefaultSettings(CMatchFinder* p) { - p->cutValue = 32; - p->btMode = 1; - p->numHashBytes = 4; - /* p->skipModeBits = 0; */ - p->directInput = 0; - p->bigHash = 0; -} - -#define kCrcPoly 0xEDB88320 - -void MatchFinder_Construct(CMatchFinder* p) { - UInt32 i; - p->bufferBase = 0; - p->directInput = 0; - p->hash = 0; - MatchFinder_SetDefaultSettings(p); - - for (i = 0; i < 256; i++) { - UInt32 r = i; - int j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); - p->crc[i] = r; - } -} - -static void MatchFinder_FreeThisClassMemory(CMatchFinder* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->hash); - p->hash = 0; -} - -void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc) { - MatchFinder_FreeThisClassMemory(p, alloc); - LzInWindow_Free(p, alloc); -} - -static CLzRef* AllocRefs(UInt32 num, ISzAlloc* alloc) { - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); - if (sizeInBytes / sizeof(CLzRef) != num) - return 0; - return (CLzRef*)alloc->Alloc(alloc, sizeInBytes); -} - -int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc* alloc) { - UInt32 sizeReserv; - if (historySize > kMaxHistorySize) { - MatchFinder_Free(p, alloc); - return 0; - } - sizeReserv = historySize >> 1; - if (historySize > ((UInt32)2 << 30)) - sizeReserv = historySize >> 2; - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - - p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - if (LzInWindow_Create(p, sizeReserv, alloc)) { - UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; - UInt32 hs; - p->matchMaxLen = matchMaxLen; - { - p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else { - hs = historySize - 1; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - /* hs >>= p->skipModeBits; */ - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - } - } - p->hashMask = hs; - hs++; - if (p->numHashBytes > 2) - p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) - p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) - p->fixedHashSize += kHash4Size; - hs += p->fixedHashSize; - } - - { - UInt32 prevSize = p->hashSizeSum + p->numSons; - UInt32 newSize; - p->historySize = historySize; - p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); - newSize = p->hashSizeSum + p->numSons; - if (p->hash != 0 && prevSize == newSize) - return 1; - MatchFinder_FreeThisClassMemory(p, alloc); - p->hash = AllocRefs(newSize, alloc); - if (p->hash != 0) { - p->son = p->hash + p->hashSizeSum; - return 1; - } - } - } - MatchFinder_Free(p, alloc); - return 0; -} - -static void MatchFinder_SetLimits(CMatchFinder* p) { - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - if (limit2 <= p->keepSizeAfter) { - if (limit2 > 0) - limit2 = 1; - } else - limit2 -= p->keepSizeAfter; - if (limit2 < limit) - limit = limit2; - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; -} - -void MatchFinder_Init(CMatchFinder* p) { - UInt32 i; - for (i = 0; i < p->hashSizeSum; i++) - p->hash[i] = kEmptyHashValue; - p->cyclicBufferPos = 0; - p->buffer = p->bufferBase; - p->pos = p->streamPos = p->cyclicBufferSize; - p->result = SZ_OK; - p->streamEndWasReached = 0; - MatchFinder_ReadBlock(p); - MatchFinder_SetLimits(p); -} - -static UInt32 MatchFinder_GetSubValue(CMatchFinder* p) { - return (p->pos - p->historySize - 1) & kNormalizeMask; -} - -void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems) { - UInt32 i; - for (i = 0; i < numItems; i++) { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; - } -} - -static void MatchFinder_Normalize(CMatchFinder* p) { - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); - MatchFinder_ReduceOffsets(p, subValue); -} - -static void MatchFinder_CheckLimits(CMatchFinder* p) { - if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); - if (p->cyclicBufferPos == p->cyclicBufferSize) - p->cyclicBufferPos = 0; - MatchFinder_SetLimits(p); -} - -static UInt32* Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32* distances, UInt32 maxLen) { - son[_cyclicBufferPos] = curMatch; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; - { - const Byte* pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - if (pb[maxLen] == cur[maxLen] && *pb == *cur) { - UInt32 len = 0; - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) - return distances; - } - } - } - } -} - -UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32* distances, UInt32 maxLen) { - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return distances; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - { - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -#define MOVE_POS \ - ++p->cyclicBufferPos; \ - p->buffer++; \ - if (++p->pos == p->posLimit) \ - MatchFinder_CheckLimits(p); - -#define MOVE_POS_RET MOVE_POS return offset; - -static void MatchFinder_MovePos(CMatchFinder* p) { - MOVE_POS; -} - -#define GET_MATCHES_HEADER2(minLen, ret_op) \ - UInt32 lenLimit; \ - UInt32 hashValue; \ - const Byte* cur; \ - UInt32 curMatch; \ - lenLimit = p->lenLimit; \ - { \ - if (lenLimit < minLen) { \ - MatchFinder_MovePos(p); \ - ret_op; \ - } \ - } \ - cur = p->buffer; - -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) - -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue - -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, maxLen) - \ - distances); \ - MOVE_POS_RET; - -#define SKIP_FOOTER \ - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); \ - MOVE_POS; - -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) -} - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) -} - -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, delta2, maxLen, offset; - GET_MATCHES_HEADER(3) - - HASH3_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - curMatch = p->hash[kFix3HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - - maxLen = 2; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[0] = maxLen; - distances[1] = delta2 - 1; - offset = 2; - if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - delta2 = p->pos - p->hash[hash2Value]; - delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; - curMatch = p->hash[kFix4HashSize + hashValue]; - - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - - maxLen = 1; - offset = 0; - if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { - distances[0] = maxLen = 2; - distances[1] = delta2 - 1; - offset = 2; - } - if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { - maxLen = 3; - distances[offset + 1] = delta3 - 1; - offset += 2; - delta2 = delta3; - } - if (offset != 0) { - for (; maxLen != lenLimit; maxLen++) - if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) - break; - distances[offset - 2] = maxLen; - if (maxLen == lenLimit) { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - if (maxLen < 3) - maxLen = 3; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - - (distances)); - MOVE_POS_RET -} - -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { - UInt32 offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - - (distances)); - MOVE_POS_RET -} - -static void Bt2_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Bt3_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value; - SKIP_HEADER(3) - HASH3_CALC; - curMatch = p->hash[kFix3HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Bt4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = p->pos; - p->hash[kFix4HashSize + hashValue] = p->pos; - SKIP_FOOTER - } while (--num != 0); -} - -static void Hc4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - UInt32 hash2Value, hash3Value; - SKIP_HEADER(4) - HASH4_CALC; - curMatch = p->hash[kFix4HashSize + hashValue]; - p->hash[hash2Value] = - p->hash[kFix3HashSize + hash3Value] = - p->hash[kFix4HashSize + hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } while (--num != 0); -} - -void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { - do { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hashValue]; - p->hash[hashValue] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } while (--num != 0); -} - -void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; - if (!p->btMode) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; - } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; - } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; - } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; - } -} -} diff --git a/crnlib/lzma_LzFind.h b/crnlib/lzma_LzFind.h deleted file mode 100644 index 0cf86745..00000000 --- a/crnlib/lzma_LzFind.h +++ /dev/null @@ -1,108 +0,0 @@ -/* LzFind.h -- Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZFIND_H -#define __LZFIND_H - -#include "lzma_Types.h" - -namespace crnlib { - -typedef UInt32 CLzRef; - -typedef struct _CMatchFinder { - Byte* buffer; - UInt32 pos; - UInt32 posLimit; - UInt32 streamPos; - UInt32 lenLimit; - - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - - UInt32 matchMaxLen; - CLzRef* hash; - CLzRef* son; - UInt32 hashMask; - UInt32 cutValue; - - Byte* bufferBase; - ISeqInStream* stream; - int streamEndWasReached; - - UInt32 blockSize; - UInt32 keepSizeBefore; - UInt32 keepSizeAfter; - - UInt32 numHashBytes; - int directInput; - int btMode; - /* int skipModeBits; */ - int bigHash; - UInt32 historySize; - UInt32 fixedHashSize; - UInt32 hashSizeSum; - UInt32 numSons; - SRes result; - UInt32 crc[256]; -} CMatchFinder; - -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) -#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) - -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) - -int MatchFinder_NeedMove(CMatchFinder* p); -Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p); -void MatchFinder_MoveBlock(CMatchFinder* p); -void MatchFinder_ReadIfRequired(CMatchFinder* p); - -void MatchFinder_Construct(CMatchFinder* p); - -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB -*/ -int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAlloc* alloc); -void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc); -void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems); -void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue); - -UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* buffer, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32* distances, UInt32 maxLen); - -/* -Conditions: - Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. - Mf_GetPointerToCurrentPos_Func's result must be used only before any other function -*/ - -typedef void (*Mf_Init_Func)(void* object); -typedef Byte (*Mf_GetIndexByte_Func)(void* object, Int32 index); -typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void* object); -typedef const Byte* (*Mf_GetPointerToCurrentPos_Func)(void* object); -typedef UInt32 (*Mf_GetMatches_Func)(void* object, UInt32* distances); -typedef void (*Mf_Skip_Func)(void* object, UInt32); - -typedef struct _IMatchFinder { - Mf_Init_Func Init; - Mf_GetIndexByte_Func GetIndexByte; - Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; - Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; - Mf_GetMatches_Func GetMatches; - Mf_Skip_Func Skip; -} IMatchFinder; - -void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable); - -void MatchFinder_Init(CMatchFinder* p); -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); -void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); -} - -#endif diff --git a/crnlib/lzma_LzFindMt.cpp b/crnlib/lzma_LzFindMt.cpp deleted file mode 100644 index 7d47ee90..00000000 --- a/crnlib/lzma_LzFindMt.cpp +++ /dev/null @@ -1,756 +0,0 @@ -/* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_LzHash.h" - -#include "lzma_LzFindMt.h" - -namespace crnlib { - -void MtSync_Construct(CMtSync* p) { - p->wasCreated = False; - p->csWasInitialized = False; - p->csWasEntered = False; - Thread_Construct(&p->thread); - Event_Construct(&p->canStart); - Event_Construct(&p->wasStarted); - Event_Construct(&p->wasStopped); - Semaphore_Construct(&p->freeSemaphore); - Semaphore_Construct(&p->filledSemaphore); -} - -void MtSync_GetNextBlock(CMtSync* p) { - if (p->needStart) { - p->numProcessedBlocks = 1; - p->needStart = False; - p->stopWriting = False; - p->exit = False; - Event_Reset(&p->wasStarted); - Event_Reset(&p->wasStopped); - - Event_Set(&p->canStart); - Event_Wait(&p->wasStarted); - } else { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - p->numProcessedBlocks++; - Semaphore_Release1(&p->freeSemaphore); - } - Semaphore_Wait(&p->filledSemaphore); - CriticalSection_Enter(&p->cs); - p->csWasEntered = True; -} - -/* MtSync_StopWriting must be called if Writing was started */ - -void MtSync_StopWriting(CMtSync* p) { - UInt32 myNumBlocks = p->numProcessedBlocks; - if (!Thread_WasCreated(&p->thread) || p->needStart) - return; - p->stopWriting = True; - if (p->csWasEntered) { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - } - Semaphore_Release1(&p->freeSemaphore); - - Event_Wait(&p->wasStopped); - - while (myNumBlocks++ != p->numProcessedBlocks) { - Semaphore_Wait(&p->filledSemaphore); - Semaphore_Release1(&p->freeSemaphore); - } - p->needStart = True; -} - -void MtSync_Destruct(CMtSync* p) { - if (Thread_WasCreated(&p->thread)) { - MtSync_StopWriting(p); - p->exit = True; - if (p->needStart) - Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); - } - if (p->csWasInitialized) { - CriticalSection_Delete(&p->cs); - p->csWasInitialized = False; - } - - Event_Close(&p->canStart); - Event_Close(&p->wasStarted); - Event_Close(&p->wasStopped); - Semaphore_Close(&p->freeSemaphore); - Semaphore_Close(&p->filledSemaphore); - - p->wasCreated = False; -} - -#define RINOK_THREAD(x) \ - { \ - if ((x) != 0) \ - return SZ_ERROR_THREAD; \ - } - -static SRes MtSync_Create2(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { - if (p->wasCreated) - return SZ_OK; - - RINOK_THREAD(CriticalSection_Init(&p->cs)); - p->csWasInitialized = True; - - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - - RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); - RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); - - p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); - p->wasCreated = True; - return SZ_OK; -} - -static SRes MtSync_Create(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { - SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); - if (res != SZ_OK) - MtSync_Destruct(p); - return res; -} - -void MtSync_Init(CMtSync* p) { - p->needStart = True; -} - -#define kMtMaxValForNormalize 0xFFFFFFFF - -#define DEF_GetHeads2(name, v, action) \ - \ -static void GetHeads##name(const Byte* p, UInt32 pos, \ -UInt32* hash, \ - UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc) \ -{ \ - action; \ - for (; numHeads != 0; numHeads--) { \ - \ -const UInt32 value = (v); \ - p++; \ - *heads++ = pos - hash[value]; \ - hash[value] = pos++; \ - } \ - } - -#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) - -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc;) - DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) - DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) - DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) - //DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) - - void HashThreadFunc(CMatchFinderMt* mt) { - CMtSync* p = &mt->hashSync; - for (;;) { - UInt32 numProcessedBlocks = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - for (;;) { - if (p->exit) - return; - if (p->stopWriting) { - p->numProcessedBlocks = numProcessedBlocks; - Event_Set(&p->wasStopped); - break; - } - - { - CMatchFinder* mf = mt->MatchFinder; - if (MatchFinder_NeedMove(mf)) { - CriticalSection_Enter(&mt->btSync.cs); - CriticalSection_Enter(&mt->hashSync.cs); - { - const Byte* beforePtr = MatchFinder_GetPointerToCurrentPos(mf); - const Byte* afterPtr; - MatchFinder_MoveBlock(mf); - afterPtr = MatchFinder_GetPointerToCurrentPos(mf); - mt->pointerToCurPos -= beforePtr - afterPtr; - mt->buffer -= beforePtr - afterPtr; - } - CriticalSection_Leave(&mt->btSync.cs); - CriticalSection_Leave(&mt->hashSync.cs); - continue; - } - - Semaphore_Wait(&p->freeSemaphore); - - MatchFinder_ReadIfRequired(mf); - if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { - UInt32 subValue = (mf->pos - mf->historySize - 1); - MatchFinder_ReduceOffsets(mf, subValue); - MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); - } - { - UInt32* heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; - UInt32 num = mf->streamPos - mf->pos; - heads[0] = 2; - heads[1] = num; - if (num >= mf->numHashBytes) { - num = num - mf->numHashBytes + 1; - if (num > kMtHashBlockSize - 2) - num = kMtHashBlockSize - 2; - mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); - heads[0] += num; - } - mf->pos += num; - mf->buffer += num; - } - } - - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt* p) { - MtSync_GetNextBlock(&p->hashSync); - p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; - p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; - p->hashNumAvail = p->hashBuf[p->hashBufPos++]; -} - -#define kEmptyHashValue 0 - -/* #define MFMT_GM_INLINE */ - -#ifdef MFMT_GM_INLINE - -#define NO_INLINE MY_FAST_CALL - -Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte* cur, CLzRef* son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32* _distances, UInt32 _maxLen, const UInt32* hash, Int32 limit, UInt32 size, UInt32* posRes) { - do { - UInt32* distances = _distances + 1; - UInt32 curMatch = pos - *hash++; - - CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; - CLzRef* ptr1 = son + (_cyclicBufferPos << 1); - UInt32 len0 = 0, len1 = 0; - UInt32 cutValue = _cutValue; - UInt32 maxLen = _maxLen; - for (;;) { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) { - *ptr0 = *ptr1 = kEmptyHashValue; - break; - } - { - CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte* pb = cur - delta; - UInt32 len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) { - *distances++ = maxLen = len; - *distances++ = delta - 1; - if (len == lenLimit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - break; - } - } - } - if (pb[len] < cur[len]) { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } else { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } - pos++; - _cyclicBufferPos++; - cur++; - { - UInt32 num = (UInt32)(distances - _distances); - *_distances = num - 1; - _distances += num; - limit -= num; - } - } while (limit > 0 && --size != 0); - *posRes = pos; - return limit; -} - -#endif - -void BtGetMatches(CMatchFinderMt* p, UInt32* distances) { - UInt32 numProcessed = 0; - UInt32 curPos = 2; - UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); - distances[1] = p->hashNumAvail; - while (curPos < limit) { - if (p->hashBufPos == p->hashBufPosLimit) { - MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; - if (p->hashNumAvail >= p->numHashBytes) - continue; - for (; p->hashNumAvail != 0; p->hashNumAvail--) - distances[curPos++] = 0; - break; - } - { - UInt32 size = p->hashBufPosLimit - p->hashBufPos; - UInt32 lenLimit = p->matchMaxLen; - UInt32 pos = p->pos; - UInt32 cyclicBufferPos = p->cyclicBufferPos; - if (lenLimit >= p->hashNumAvail) - lenLimit = p->hashNumAvail; - { - UInt32 size2 = p->hashNumAvail - lenLimit + 1; - if (size2 < size) - size = size2; - size2 = p->cyclicBufferSize - cyclicBufferPos; - if (size2 < size) - size = size2; - } -#ifndef MFMT_GM_INLINE - while (curPos < limit && size-- != 0) { - UInt32* startDistances = distances + curPos; - UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], - pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - startDistances + 1, p->numHashBytes - 1) - - startDistances); - *startDistances = num - 1; - curPos += num; - cyclicBufferPos++; - pos++; - p->buffer++; - } -#else - { - UInt32 posRes; - curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos), size, &posRes); - p->hashBufPos += posRes - pos; - cyclicBufferPos += posRes - pos; - p->buffer += posRes - pos; - pos = posRes; - } -#endif - - numProcessed += pos - p->pos; - p->hashNumAvail -= pos - p->pos; - p->pos = pos; - if (cyclicBufferPos == p->cyclicBufferSize) - cyclicBufferPos = 0; - p->cyclicBufferPos = cyclicBufferPos; - } - } - distances[0] = curPos; -} - -void BtFillBlock(CMatchFinderMt* p, UInt32 globalBlockIndex) { - CMtSync* sync = &p->hashSync; - if (!sync->needStart) { - CriticalSection_Enter(&sync->cs); - sync->csWasEntered = True; - } - - BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); - - if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) { - UInt32 subValue = p->pos - p->cyclicBufferSize; - MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); - p->pos -= subValue; - } - - if (!sync->needStart) { - CriticalSection_Leave(&sync->cs); - sync->csWasEntered = False; - } -} - -void BtThreadFunc(CMatchFinderMt* mt) { - CMtSync* p = &mt->btSync; - for (;;) { - UInt32 blockIndex = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - for (;;) { - if (p->exit) - return; - if (p->stopWriting) { - p->numProcessedBlocks = blockIndex; - MtSync_StopWriting(&mt->hashSync); - Event_Set(&p->wasStopped); - break; - } - Semaphore_Wait(&p->freeSemaphore); - BtFillBlock(mt, blockIndex++); - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -void MatchFinderMt_Construct(CMatchFinderMt* p) { - p->hashBuf = 0; - MtSync_Construct(&p->hashSync); - MtSync_Construct(&p->btSync); -} - -void MatchFinderMt_FreeMem(CMatchFinderMt* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->hashBuf); - p->hashBuf = 0; -} - -void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc) { - MtSync_Destruct(&p->hashSync); - MtSync_Destruct(&p->btSync); - MatchFinderMt_FreeMem(p, alloc); -} - -#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) -#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) - -static unsigned MY_STD_CALL HashThreadFunc2(void* p) { - HashThreadFunc((CMatchFinderMt*)p); - return 0; -} -static unsigned MY_STD_CALL BtThreadFunc2(void* p) { - Byte allocaDummy[0x180]; - (void)allocaDummy; - int i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)i; - BtThreadFunc((CMatchFinderMt*)p); - return 0; -} - -SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc) { - CMatchFinder* mf = p->MatchFinder; - p->historySize = historySize; - if (kMtBtBlockSize <= matchMaxLen * 4) - return SZ_ERROR_PARAM; - if (p->hashBuf == 0) { - p->hashBuf = (UInt32*)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); - if (p->hashBuf == 0) - return SZ_ERROR_MEM; - p->btBuf = p->hashBuf + kHashBufferSize; - } - keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); - keepAddBufferAfter += kMtHashBlockSize; - if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) - return SZ_ERROR_MEM; - - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); - return SZ_OK; -} - -/* Call it after ReleaseStream / SetStream */ -void MatchFinderMt_Init(CMatchFinderMt* p) { - CMatchFinder* mf = p->MatchFinder; - p->btBufPos = p->btBufPosLimit = 0; - p->hashBufPos = p->hashBufPosLimit = 0; - MatchFinder_Init(mf); - p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); - p->btNumAvailBytes = 0; - p->lzPos = p->historySize + 1; - - p->hash = mf->hash; - p->fixedHashSize = mf->fixedHashSize; - p->crc = mf->crc; - - p->son = mf->son; - p->matchMaxLen = mf->matchMaxLen; - p->numHashBytes = mf->numHashBytes; - p->pos = mf->pos; - p->buffer = mf->buffer; - p->cyclicBufferPos = mf->cyclicBufferPos; - p->cyclicBufferSize = mf->cyclicBufferSize; - p->cutValue = mf->cutValue; -} - -/* ReleaseStream is required to finish multithreading */ -void MatchFinderMt_ReleaseStream(CMatchFinderMt* p) { - MtSync_StopWriting(&p->btSync); - /* p->MatchFinder->ReleaseStream(); */ -} - -void MatchFinderMt_Normalize(CMatchFinderMt* p) { - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; -} - -void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt* p) { - UInt32 blockIndex; - MtSync_GetNextBlock(&p->btSync); - blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; - if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); -} - -const Byte* MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt* p) { - return p->pointerToCurPos; -} - -#define GET_NEXT_BLOCK_IF_REQUIRED \ - if (p->btBufPos == p->btBufPosLimit) \ - MatchFinderMt_GetNextBlock_Bt(p); - -UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt* p) { - GET_NEXT_BLOCK_IF_REQUIRED; - return p->btNumAvailBytes; -} - -Byte MatchFinderMt_GetIndexByte(CMatchFinderMt* p, Int32 index) { - return p->pointerToCurPos[index]; -} - -UInt32* MixMatches2(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { - UInt32 hash2Value, curMatch2; - UInt32* hash = p->hash; - const Byte* cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH2_CALC - - curMatch2 = hash[hash2Value]; - hash[hash2Value] = lzPos; - - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; - } - return distances; -} - -UInt32* MixMatches3(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { - UInt32 hash2Value, hash3Value, curMatch2, curMatch3; - UInt32* hash = p->hash; - const Byte* cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH3_CALC - - curMatch2 = hash[hash2Value]; - curMatch3 = hash[kFix3HashSize + hash3Value]; - - hash[hash2Value] = - hash[kFix3HashSize + hash3Value] = - lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { - distances[0] = 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; - } - return distances; -} - -/* -UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ hash2Value]; - curMatch3 = hash[kFix3HashSize + hash3Value]; - curMatch4 = hash[kFix4HashSize + hash4Value]; - - hash[ hash2Value] = - hash[kFix3HashSize + hash3Value] = - hash[kFix4HashSize + hash4Value] = - lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) - { - distances[0] = 4; - return distances + 2; - } - distances[0] = 3; - distances += 2; - } - - if (curMatch4 >= matchMinPos) - if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] - ) - { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; - } - return distances; -} -*/ - -#define INCREASE_LZ_POS \ - p->lzPos++; \ - p->pointerToCurPos++; - -UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt* p, UInt32* distances) { - const UInt32* btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - p->btNumAvailBytes--; - { - UInt32 i; - for (i = 0; i < len; i += 2) { - *distances++ = *btBuf++; - *distances++ = *btBuf++; - } - } - INCREASE_LZ_POS - return len; -} - -UInt32 MatchFinderMt_GetMatches(CMatchFinderMt* p, UInt32* distances) { - const UInt32* btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - - if (len == 0) { - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); - } else { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32* distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); - do { - *distances2++ = *btBuf++; - *distances2++ = *btBuf++; - } while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); - } - INCREASE_LZ_POS - return len; -} - -#define SKIP_HEADER2 \ - do { \ - GET_NEXT_BLOCK_IF_REQUIRED -#define SKIP_HEADER(n) \ - SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { \ - const Byte* cur = p->pointerToCurPos; \ - UInt32* hash = p->hash; -#define SKIP_FOOTER \ - } \ - INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; \ - } \ - while (--num != 0) \ - ; - -void MatchFinderMt0_Skip(CMatchFinderMt* p, UInt32 num) { - SKIP_HEADER2 { - p->btNumAvailBytes--; - SKIP_FOOTER - } - - void MatchFinderMt2_Skip(CMatchFinderMt * p, UInt32 num) { - SKIP_HEADER(2) - UInt32 hash2Value; - MT_HASH2_CALC - hash[hash2Value] = p->lzPos; - SKIP_FOOTER - } - - void MatchFinderMt3_Skip(CMatchFinderMt * p, UInt32 num) { - SKIP_HEADER(3) - UInt32 hash2Value, hash3Value; - MT_HASH3_CALC - hash[kFix3HashSize + hash3Value] = - hash[hash2Value] = - p->lzPos; - SKIP_FOOTER - } - - /* -void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER(4) - UInt32 hash2Value, hash3Value, hash4Value; - MT_HASH4_CALC - hash[kFix4HashSize + hash4Value] = - hash[kFix3HashSize + hash3Value] = - hash[ hash2Value] = - p->lzPos; - SKIP_FOOTER -} -*/ - - void MatchFinderMt_CreateVTable(CMatchFinderMt * p, IMatchFinder * vTable) { - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; - switch (p->MatchFinder->numHashBytes) { - case 2: - p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)0; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; - break; - case 3: - p->GetHeadsFunc = GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; - break; - default: - /* case 4: */ - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; - /* p->GetHeadsFunc = GetHeads4; */ - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; - break; - /* - default: - p->GetHeadsFunc = GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; - break; - */ - } - } -} diff --git a/crnlib/lzma_LzFindMt.h b/crnlib/lzma_LzFindMt.h deleted file mode 100644 index faf92232..00000000 --- a/crnlib/lzma_LzFindMt.h +++ /dev/null @@ -1,98 +0,0 @@ -/* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZFINDMT_H -#define __LZFINDMT_H - -#include "lzma_Threads.h" -#include "lzma_LzFind.h" - -namespace crnlib { - -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - -typedef struct _CMtSync { - Bool wasCreated; - Bool needStart; - Bool exit; - Bool stopWriting; - - CThread thread; - CAutoResetEvent canStart; - CAutoResetEvent wasStarted; - CAutoResetEvent wasStopped; - CSemaphore freeSemaphore; - CSemaphore filledSemaphore; - Bool csWasInitialized; - Bool csWasEntered; - CCriticalSection cs; - UInt32 numProcessedBlocks; -} CMtSync; - -typedef UInt32* (*Mf_Mix_Matches)(void* p, UInt32 matchMinPos, UInt32* distances); - -/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ -#define kMtCacheLineDummy 128 - -typedef void (*Mf_GetHeads)(const Byte* buffer, UInt32 pos, - UInt32* hash, UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc); - -typedef struct _CMatchFinderMt { - /* LZ */ - const Byte* pointerToCurPos; - UInt32* btBuf; - UInt32 btBufPos; - UInt32 btBufPosLimit; - UInt32 lzPos; - UInt32 btNumAvailBytes; - - UInt32* hash; - UInt32 fixedHashSize; - UInt32 historySize; - const UInt32* crc; - - Mf_Mix_Matches MixMatchesFunc; - - /* LZ + BT */ - CMtSync btSync; - Byte btDummy[kMtCacheLineDummy]; - - /* BT */ - UInt32* hashBuf; - UInt32 hashBufPos; - UInt32 hashBufPosLimit; - UInt32 hashNumAvail; - - CLzRef* son; - UInt32 matchMaxLen; - UInt32 numHashBytes; - UInt32 pos; - Byte* buffer; - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be historySize + 1 */ - UInt32 cutValue; - - /* BT + Hash */ - CMtSync hashSync; - /* Byte hashDummy[kMtCacheLineDummy]; */ - - /* Hash */ - Mf_GetHeads GetHeadsFunc; - CMatchFinder* MatchFinder; -} CMatchFinderMt; - -void MatchFinderMt_Construct(CMatchFinderMt* p); -void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc); -SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc); -void MatchFinderMt_CreateVTable(CMatchFinderMt* p, IMatchFinder* vTable); -void MatchFinderMt_ReleaseStream(CMatchFinderMt* p); -} - -#endif diff --git a/crnlib/lzma_LzHash.h b/crnlib/lzma_LzHash.h deleted file mode 100644 index 420b6246..00000000 --- a/crnlib/lzma_LzHash.h +++ /dev/null @@ -1,63 +0,0 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZHASH_H -#define __LZHASH_H - -#define kHash2Size (1 << 10) -#define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) - -#define kFix3HashSize (kHash2Size) -#define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; \ - } - -#define HASH4_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; \ - } - -#define HASH5_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ - hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ - hash4Value &= (kHash4Size - 1); \ - } - -/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - -#define MT_HASH2_CALC \ - hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - } - -#define MT_HASH4_CALC \ - { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - hash2Value = temp & (kHash2Size - 1); \ - hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ - hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); \ - } - -#endif diff --git a/crnlib/lzma_LzmaDec.cpp b/crnlib/lzma_LzmaDec.cpp deleted file mode 100644 index 378d6707..00000000 --- a/crnlib/lzma_LzmaDec.cpp +++ /dev/null @@ -1,946 +0,0 @@ -/* LzmaDec.c -- LZMA Decoder -2008-11-06 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include "lzma_LzmaDec.h" - -#include - -namespace crnlib { - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE \ - if (range < kTopValue) { \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0(p) \ - ttt = *(p); \ - NORMALIZE; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0(p) \ - range = bound; \ - *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) \ - range -= bound; \ - code -= bound; \ - *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) \ - IF_BIT_0(p) { \ - UPDATE_0(p); \ - i = (i + i); \ - A0; \ - } \ - else { \ - UPDATE_1(p); \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) - -#define TREE_GET_BIT(probs, i) \ - { GET_BIT((probs + i), i); } -#define TREE_DECODE(probs, limit, i) \ - { \ - i = 1; \ - do { \ - TREE_GET_BIT(probs, i); \ - } while (i < limit); \ - i -= limit; \ - } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { \ - i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; \ - } -#endif - -#define NORMALIZE_CHECK \ - if (range < kTopValue) { \ - if (buf >= bufLimit) \ - return DUMMY_ERROR; \ - range <<= 8; \ - code = (code << 8) | (*buf++); \ - } - -#define IF_BIT_0_CHECK(p) \ - ttt = *(p); \ - NORMALIZE_CHECK; \ - bound = (range >> kNumBitModelTotalBits) * ttt; \ - if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK \ - range -= bound; \ - code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) \ - IF_BIT_0_CHECK(p) { \ - UPDATE_0_CHECK; \ - i = (i + i); \ - A0; \ - } \ - else { \ - UPDATE_1_CHECK; \ - i = (i + i) + 1; \ - A1; \ - } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { \ - i = 1; \ - do { \ - GET_BIT_CHECK(probs + i, i) \ - } while (i < limit); \ - i -= limit; \ - } - -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - -#define kNumStates 12 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) - -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 768 - -#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) - -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG -#endif - - static const Byte kLiteralNextStates[kNumStates * 2] = - { - 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, - 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; - -#define LZMA_DIC_MIN (1 << 12) - -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization -Out: - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker - = kMatchSpecLenStart + 2 : State Init Marker -*/ - -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { - CLzmaProb* probs = p->probs; - - unsigned state = p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; - unsigned lc = p->prop.lc; - - Byte* dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte* buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = processedPos & pbMask; - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (checkDicSize != 0 || processedPos != 0) - prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); - - if (state < kNumLitStates) { - symbol = 1; - do { - GET_BIT(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - unsigned offs = 0x100; - symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - dic[dicPos++] = (Byte)symbol; - processedPos++; - - state = kLiteralNextStates[state]; - /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ - continue; - } - else { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else { - UPDATE_1(prob); - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0(prob) { - UPDATE_0(prob); - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - distance = rep1; - } - else { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) { - UPDATE_0(prob); - distance = rep2; - } - else { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0(probLen) { - UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = (1 << kLenNumLowBits); - } - else { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) { - UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = (1 << kLenNumMidBits); - } - else { - UPDATE_1(probLen); - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, limit, len); - len += offset; - } - - if (state >= kNumStates) { - UInt32 distance; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) { - unsigned posSlot = (unsigned)distance; - int numDirectBits = (int)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) { - distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; - { - UInt32 mask = 1; - unsigned i = 1; - do { - GET_BIT2(prob + i, i, ;, distance |= mask); - mask <<= 1; - } while (--numDirectBits != 0); - } - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } while (--numDirectBits != 0); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; - GET_BIT2(prob + i, i, ;, distance |= 1); - GET_BIT2(prob + i, i, ;, distance |= 2); - GET_BIT2(prob + i, i, ;, distance |= 4); - GET_BIT2(prob + i, i, ;, distance |= 8); - } - if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; - if (checkDicSize == 0) { - if (distance >= processedPos) - return SZ_ERROR_DATA; - } else if (distance >= checkDicSize) - return SZ_ERROR_DATA; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; - /* state = kLiteralNextStates[state]; */ - } - - len += kMatchMinLen; - - if (limit == dicPos) - return SZ_ERROR_DATA; - { - SizeT rem = limit - dicPos; - unsigned curLen = ((rem < len) ? (unsigned)rem : len); - SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); - - processedPos += curLen; - - len -= curLen; - if (pos + curLen <= dicBufSize) { - Byte* dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte* lim = dest + curLen; - dicPos += curLen; - do - *(dest) = (Byte) * (dest + src); - while (++dest != lim); - } else { - do { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } while (--curLen != 0); - } - } - } - } while (dicPos < limit && buf < bufLimit); - NORMALIZE; - p->buf = buf; - p->range = range; - p->code = code; - p->remainLen = len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; - p->state = state; - - return SZ_OK; -} - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec* p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { - Byte* dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; - UInt32 rep0 = p->reps[0]; - if (limit - dicPos < len) - len = (unsigned)(limit - dicPos); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - - p->processedPos += len; - p->remainLen -= len; - while (len-- != 0) { - dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { - do { - SizeT limit2 = limit; - if (p->checkDicSize == 0) { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - } - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); - if (p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - LzmaDec_WriteRem(p, limit); - } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - if (p->remainLen > kMatchSpecLenStart) { - p->remainLen = kMatchSpecLenStart; - } - return 0; -} - -typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec* p, const Byte* buf, SizeT inSize) { - UInt32 range = p->range; - UInt32 code = p->code; - const Byte* bufLimit = buf + inSize; - CLzmaProb* probs = p->probs; - unsigned state = p->state; - ELzmaDummy res; - - { - CLzmaProb* prob; - UInt32 bound; - unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); - - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += (LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) { - unsigned symbol = 1; - do { - GET_BIT_CHECK(prob + symbol, symbol) - } while (symbol < 0x100); - } else { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + - ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do { - unsigned bit; - CLzmaProb* probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) - } while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else { - UPDATE_1_CHECK; - } - } - else { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - } - else { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; - } - else { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - CLzmaProb* probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); - offset = 0; - limit = 1 << kLenNumLowBits; - } - else { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); - offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; - } - else { - UPDATE_1_CHECK; - probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) { - unsigned posSlot; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) { - int numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; - } else { - numDirectBits -= kNumAlignBits; - do { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } while (--numDirectBits != 0); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; - do { - GET_BIT_CHECK(prob + i, i); - } while (--numDirectBits != 0); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - -static void LzmaDec_InitRc(CLzmaDec* p, const Byte* data) { - p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; -} - -void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState) { - p->needFlush = 1; - p->remainLen = 0; - p->tempBufSize = 0; - - if (initDic) { - p->processedPos = 0; - p->checkDicSize = 0; - p->needInitState = 1; - } - if (initState) - p->needInitState = 1; -} - -void LzmaDec_Init(CLzmaDec* p) { - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - -static void LzmaDec_InitStateReal(CLzmaDec* p) { - UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); - UInt32 i; - CLzmaProb* probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} - -SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, - ELzmaFinishMode finishMode, ELzmaStatus* status) { - SizeT inSize = *srcLen; - (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); - - *status = LZMA_STATUS_NOT_SPECIFIED; - - while (p->remainLen != kMatchSpecLenStart) { - int checkEndMarkNow; - - if (p->needFlush != 0) { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - - LzmaDec_InitRc(p, p->tempBuf); - p->tempBufSize = 0; - } - - checkEndMarkNow = 0; - if (p->dicPos >= dicLimit) { - if (p->remainLen == 0 && p->code == 0) { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->needInitState) - LzmaDec_InitStateReal(p); - - if (p->tempBufSize == 0) { - SizeT processed; - const Byte* bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } else { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); - if (dummyRes == DUMMY_ERROR) { - (*srcLen) += lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); - (*srcLen) += lookAhead; - src += lookAhead; - inSize -= lookAhead; - p->tempBufSize = 0; - } - } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; -} - -SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) { - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } else { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->probs); - p->probs = 0; -} - -static void LzmaDec_FreeDict(CLzmaDec* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->dic); - p->dic = 0; -} - -void LzmaDec_Free(CLzmaDec* p, ISzAlloc* alloc) { - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size) { - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); - - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - - p->lc = d % 9; - d /= 9; - p->pb = d / 5; - p->lp = d % 5; - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec* p, const CLzmaProps* propNew, ISzAlloc* alloc) { - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (p->probs == 0 || numProbs != p->numProbs) { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb*)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; - if (p->probs == 0) - return SZ_ERROR_MEM; - } - return SZ_OK; -} - -SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { - CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - dicBufSize = propNew.dicSize; - if (p->dic == 0 || dicBufSize != p->dicBufSize) { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte*)alloc->Alloc(alloc, dicBufSize); - if (p->dic == 0) { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, - const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus* status, ISzAlloc* alloc) { - CLzmaDec p; - SRes res; - SizeT inSize = *srcLen; - SizeT outSize = *destLen; - *srcLen = *destLen = 0; - if (inSize < RC_INIT_SIZE) - return SZ_ERROR_INPUT_EOF; - - LzmaDec_Construct(&p); - res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); - if (res != 0) - return res; - p.dic = dest; - p.dicBufSize = outSize; - - LzmaDec_Init(&p); - - *srcLen = inSize; - res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - - (*destLen) = p.dicPos; - LzmaDec_FreeProbs(&p, alloc); - return res; -} -} diff --git a/crnlib/lzma_LzmaEnc.cpp b/crnlib/lzma_LzmaEnc.cpp deleted file mode 100644 index 1477a388..00000000 --- a/crnlib/lzma_LzmaEnc.cpp +++ /dev/null @@ -1,2085 +0,0 @@ -/* LzmaEnc.c -- LZMA Encoder -2008-10-04 : Igor Pavlov : Public domain */ -#include "crn_core.h" -#include - -/* #define SHOW_STAT */ -/* #define SHOW_STAT2 */ - -#if defined(SHOW_STAT) || defined(SHOW_STAT2) -#include -#endif - -#include "lzma_LzmaEnc.h" - -#include "lzma_LzFind.h" -#ifdef COMPRESS_MF_MT -#include "lzma_LzFindMt.h" -#endif - -namespace crnlib { - -#ifdef SHOW_STAT -static int ttt = 0; -#endif - -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) - -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) - -#define kNumMaxDirectBits (31) - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 -#define kProbInitValue (kBitModelTotal >> 1) - -#define kNumMoveReducingBits 4 -#define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) - -void LzmaEncProps_Init(CLzmaEncProps* p) { - p->level = 5; - p->dictSize = p->mc = 0; - p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; - p->writeEndMark = 0; -} - -void LzmaEncProps_Normalize(CLzmaEncProps* p) { - int level = p->level; - if (level < 0) - level = 5; - p->level = level; - if (p->dictSize == 0) - p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); - if (p->lc < 0) - p->lc = 3; - if (p->lp < 0) - p->lp = 0; - if (p->pb < 0) - p->pb = 2; - if (p->algo < 0) - p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) - p->fb = (level < 7 ? 32 : 64); - if (p->btMode < 0) - p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) - p->numHashBytes = 4; - if (p->mc == 0) - p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); - if (p->numThreads < 0) - p->numThreads = ((p->btMode && p->algo) ? 2 : 1); -} - -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2) { - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - return props.dictSize; -} - -/* #define LZMA_LOG_BSR */ -/* Define it for Intel's CPU */ - -#ifdef LZMA_LOG_BSR - -#define kDicLogSizeMaxCompress 30 - -#define BSR2_RET(pos, res) \ - { \ - unsigned long i; \ - _BitScanReverse(&i, (pos)); \ - res = (i + i) + ((pos >> (i - 1)) & 1); \ - } - -UInt32 GetPosSlot1(UInt32 pos) { - UInt32 res; - BSR2_RET(pos, res); - return res; -} -#define GetPosSlot2(pos, res) \ - { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) \ - { \ - if (pos < 2) \ - res = pos; \ - else \ - BSR2_RET(pos, res); \ - } - -#else - -#define kNumLogBits (9 + (int)sizeof(size_t) / 2) -#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) - -void LzmaEnc_FastPosInit(Byte* g_FastPos) { - int c = 2, slotFast; - g_FastPos[0] = 0; - g_FastPos[1] = 1; - - for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) { - UInt32 k = (1 << ((slotFast >> 1) - 1)); - UInt32 j; - for (j = 0; j < k; j++, c++) - g_FastPos[c] = (Byte)slotFast; - } -} - -#define BSR2_RET(pos, res) \ - { \ - UInt32 i = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> i] + (i * 2); \ - } -/* -#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ - p->g_FastPos[pos >> 6] + 12 : \ - p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } -*/ - -#define GetPosSlot1(pos) p->g_FastPos[pos] -#define GetPosSlot2(pos, res) \ - { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) \ - { \ - if (pos < kNumFullDistances) \ - res = p->g_FastPos[pos]; \ - else \ - BSR2_RET(pos, res); \ - } - -#endif - -#define LZMA_NUM_REPS 4 - -typedef unsigned CState; - -typedef struct _COptimal { - UInt32 price; - - CState state; - int prev1IsChar; - int prev2; - - UInt32 posPrev2; - UInt32 backPrev2; - - UInt32 posPrev; - UInt32 backPrev; - UInt32 backs[LZMA_NUM_REPS]; -} COptimal; - -#define kNumOpts (1 << 12) - -#define kNumLenToPosStates 4 -#define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 -#define kDicLogSizeMax 32 -#define kDistTableSizeMax (kDicLogSizeMax * 2) - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) -#define kAlignMask (kAlignTableSize - 1) - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - -#define kNumFullDistances (1 << (kEndPosModelIndex / 2)) - -#ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 -#else -#define CLzmaProb UInt16 -#endif - -#define LZMA_PB_MAX 4 -#define LZMA_LC_MAX 8 -#define LZMA_LP_MAX 4 - -#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) - -#define LZMA_MATCH_LEN_MIN 2 -#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) - -#define kNumStates 12 - -typedef struct -{ - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; - CLzmaProb high[kLenNumHighSymbols]; -} CLenEnc; - -typedef struct -{ - CLenEnc p; - UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - UInt32 tableSize; - UInt32 counters[LZMA_NUM_PB_STATES_MAX]; -} CLenPriceEnc; - -typedef struct _CRangeEnc { - UInt32 range; - Byte cache; - UInt64 low; - UInt64 cacheSize; - Byte* buf; - Byte* bufLim; - Byte* bufBase; - ISeqOutStream* outStream; - UInt64 processed; - SRes res; -} CRangeEnc; - -typedef struct _CSeqInStreamBuf { - ISeqInStream funcTable; - const Byte* data; - SizeT rem; -} CSeqInStreamBuf; - -static SRes MyRead(void* pp, void* data, size_t* size) { - size_t curSize = *size; - CSeqInStreamBuf* p = (CSeqInStreamBuf*)pp; - if (p->rem < curSize) - curSize = p->rem; - memcpy(data, p->data, curSize); - p->rem -= curSize; - p->data += curSize; - *size = curSize; - return SZ_OK; -} - -typedef struct -{ - CLzmaProb* litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; -} CSaveState; - -typedef struct _CLzmaEnc { - IMatchFinder matchFinder; - void* matchFinderObj; - -#ifdef COMPRESS_MF_MT - Bool mtMode; - CMatchFinderMt matchFinderMt; -#endif - - CMatchFinder matchFinderBase; - -#ifdef COMPRESS_MF_MT - Byte pad[128]; -#endif - - UInt32 optimumEndIndex; - UInt32 optimumCurrentIndex; - - UInt32 longestMatchLength; - UInt32 numPairs; - UInt32 numAvail; - COptimal opt[kNumOpts]; - -#ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; -#endif - - UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - UInt32 numFastBytes; - UInt32 additionalOffset; - UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; - - UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - UInt32 alignPrices[kAlignTableSize]; - UInt32 alignPriceCount; - - UInt32 distTableSize; - - unsigned lc, lp, pb; - unsigned lpMask, pbMask; - - CLzmaProb* litProbs; - - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - unsigned lclp; - - Bool fastMode; - - CRangeEnc rc; - - Bool writeEndMark; - UInt64 nowPos64; - UInt32 matchPriceCount; - Bool finished; - Bool multiThread; - - SRes result; - UInt32 dictSize; - UInt32 matchFinderCycles; - - ISeqInStream* inStream; - CSeqInStreamBuf seqBufInStream; - - CSaveState saveState; -} CLzmaEnc; - -void LzmaEnc_SaveState(CLzmaEncHandle pp) { - CLzmaEnc* p = (CLzmaEnc*)pp; - CSaveState* dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); -} - -void LzmaEnc_RestoreState(CLzmaEncHandle pp) { - CLzmaEnc* dest = (CLzmaEnc*)pp; - const CSaveState* p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; - dest->state = p->state; - - for (i = 0; i < kNumStates; i++) { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); - memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps* props2) { - CLzmaEnc* p = (CLzmaEnc*)pp; - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - - if (props.lc > LZMA_LC_MAX || - props.lp > LZMA_LP_MAX || - props.pb > LZMA_PB_MAX || - props.dictSize > (1U << kDicLogSizeMaxCompress) || - props.dictSize > (1 << 30)) - return SZ_ERROR_PARAM; - p->dictSize = props.dictSize; - p->matchFinderCycles = props.mc; - { - unsigned fb = props.fb; - if (fb < 5) - fb = 5; - if (fb > LZMA_MATCH_LEN_MAX) - fb = LZMA_MATCH_LEN_MAX; - p->numFastBytes = fb; - } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; - p->fastMode = (props.algo == 0); - p->matchFinderBase.btMode = props.btMode; - { - UInt32 numHashBytes = 4; - if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; - } - p->matchFinderBase.numHashBytes = numHashBytes; - } - - p->matchFinderBase.cutValue = props.mc; - - p->writeEndMark = props.writeEndMark; - -#ifdef COMPRESS_MF_MT - /* - if (newMultiThread != _multiThread) - { - ReleaseMatchFinder(); - _multiThread = newMultiThread; - } - */ - p->multiThread = (props.numThreads > 1); -#endif - - return SZ_OK; -} - -static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const int kShortRepNextStates[kNumStates] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsCharState(s) ((s) < 7) - -#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len)-2 : kNumLenToPosStates - 1) - -#define kInfinityPrice (1 << 30) - -static void RangeEnc_Construct(CRangeEnc* p) { - p->outStream = 0; - p->bufBase = 0; -} - -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) - -#define RC_BUF_SIZE (1 << 16) -static int RangeEnc_Alloc(CRangeEnc* p, ISzAlloc* alloc) { - if (p->bufBase == 0) { - p->bufBase = (Byte*)alloc->Alloc(alloc, RC_BUF_SIZE); - if (p->bufBase == 0) - return 0; - p->bufLim = p->bufBase + RC_BUF_SIZE; - } - return 1; -} - -static void RangeEnc_Free(CRangeEnc* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->bufBase); - p->bufBase = 0; -} - -static void RangeEnc_Init(CRangeEnc* p) { - /* Stream.Init(); */ - p->low = 0; - p->range = 0xFFFFFFFF; - p->cacheSize = 1; - p->cache = 0; - - p->buf = p->bufBase; - - p->processed = 0; - p->res = SZ_OK; -} - -static void RangeEnc_FlushStream(CRangeEnc* p) { - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != p->outStream->Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; - p->processed += num; - p->buf = p->bufBase; -} - -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc* p) { - if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) { - Byte temp = p->cache; - do { - Byte* buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); - temp = 0xFF; - } while (--p->cacheSize != 0); - p->cache = (Byte)((UInt32)p->low >> 24); - } - p->cacheSize++; - p->low = (UInt32)p->low << 8; -} - -static void RangeEnc_FlushData(CRangeEnc* p) { - int i; - for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); -} - -static void RangeEnc_EncodeDirectBits(CRangeEnc* p, UInt32 value, int numBits) { - do { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } while (numBits != 0); -} - -static void RangeEnc_EncodeBit(CRangeEnc* p, CLzmaProb* prob, UInt32 symbol) { - UInt32 ttt = *prob; - UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } else { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; - } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } -} - -static void LitEnc_Encode(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol) { - symbol |= 0x100; - do { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); -} - -static void LitEnc_EncodeMatched(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol, UInt32 matchByte) { - UInt32 offs = 0x100; - symbol |= 0x100; - do { - matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); -} - -void LzmaEnc_InitPriceTables(UInt32* ProbPrices) { - UInt32 i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) { - const int kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = i; - UInt32 bitCount = 0; - int j; - for (j = 0; j < kCyclesBits; j++) { - w = w * w; - bitCount <<= 1; - while (w >= ((UInt32)1 << 16)) { - w >>= 1; - bitCount++; - } - } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); - } -} - -#define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -static UInt32 LitEnc_GetPrice(const CLzmaProb* probs, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - symbol |= 0x100; - do { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; - } while (symbol < 0x10000); - return price; -} - -static UInt32 LitEnc_GetPriceMatched(const CLzmaProb* probs, UInt32 symbol, UInt32 matchByte, UInt32* ProbPrices) { - UInt32 price = 0; - UInt32 offs = 0x100; - symbol |= 0x100; - do { - matchByte <<= 1; - price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); - symbol <<= 1; - offs &= ~(matchByte ^ symbol); - } while (symbol < 0x10000); - return price; -} - -static void RcTree_Encode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0;) { - UInt32 bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - } -} - -static void RcTree_ReverseEncode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = 0; i < numBitLevels; i++) { - UInt32 bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - symbol >>= 1; - } -} - -static UInt32 RcTree_GetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - symbol |= (1 << numBitLevels); - while (symbol != 1) { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static UInt32 RcTree_ReverseGetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { - UInt32 price = 0; - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0; i--) { - UInt32 bit = symbol & 1; - symbol >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) | bit; - } - return price; -} - -static void LenEnc_Init(CLenEnc* p) { - unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) - p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; - for (i = 0; i < kLenNumHighSymbols; i++) - p->high[i] = kProbInitValue; -} - -static void LenEnc_Encode(CLenEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState) { - if (symbol < kLenNumLowSymbols) { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } else { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } else { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); - } - } -} - -static void LenEnc_SetPrices(CLenEnc* p, UInt32 posState, UInt32 numSymbols, UInt32* prices, UInt32* ProbPrices) { - UInt32 a0 = GET_PRICE_0a(p->choice); - UInt32 a1 = GET_PRICE_1a(p->choice); - UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); - UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); - UInt32 i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) { - if (i >= numSymbols) - return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); - } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); - } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); -} - -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc* p, UInt32 posState, UInt32* ProbPrices) { - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; -} - -static void LenPriceEnc_UpdateTables(CLenPriceEnc* p, UInt32 numPosStates, UInt32* ProbPrices) { - UInt32 posState; - for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void LenEnc_Encode2(CLenPriceEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32* ProbPrices) { - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void MovePos(CLzmaEnc* p, UInt32 num) { -#ifdef SHOW_STAT - ttt += num; - printf("\n MovePos %d", num); -#endif - if (num != 0) { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); - } -} - -static UInt32 ReadMatchDistances(CLzmaEnc* p, UInt32* numDistancePairsRes) { - UInt32 lenRes = 0, numPairs; - p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); -#ifdef SHOW_STAT - printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); - ttt++; - { - UInt32 i; - for (i = 0; i < numPairs; i += 2) - printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); - } -#endif - if (numPairs > 0) { - lenRes = p->matches[numPairs - 2]; - if (lenRes == p->numFastBytes) { - const Byte* pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - UInt32 distance = p->matches[numPairs - 1] + 1; - UInt32 numAvail = p->numAvail; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - { - const Byte* pby2 = pby - distance; - for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) - ; - } - } - } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; -} - -#define MakeAsChar(p) \ - (p)->backPrev = (UInt32)(-1); \ - (p)->prev1IsChar = False; -#define MakeAsShortRep(p) \ - (p)->backPrev = 0; \ - (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) - -static UInt32 GetRepLen1Price(CLzmaEnc* p, UInt32 state, UInt32 posState) { - return GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} - -static UInt32 GetPureRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 state, UInt32 posState) { - UInt32 price; - if (repIndex == 0) { - price = GET_PRICE_0(p->isRepG0[state]); - price += GET_PRICE_1(p->isRep0Long[state][posState]); - } else { - price = GET_PRICE_1(p->isRepG0[state]); - if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); - else { - price += GET_PRICE_1(p->isRepG1[state]); - price += GET_PRICE(p->isRepG2[state], repIndex - 2); - } - } - return price; -} - -static UInt32 GetRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) { - return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} - -static UInt32 Backward(CLzmaEnc* p, UInt32* backRes, UInt32 cur) { - UInt32 posMem = p->opt[cur].posPrev; - UInt32 backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do { - if (p->opt[cur].prev1IsChar) { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem] - .posPrev = posMem - 1; - if (p->opt[cur].prev2) { - p->opt[posMem - 1].prev1IsChar = False; - p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; - } - } - { - UInt32 posPrev = posMem; - UInt32 backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; - } - } while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; -} - -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos)&p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) - -static UInt32 GetOptimum(CLzmaEnc* p, UInt32 position, UInt32* backRes) { - UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; - UInt32 matchPrice, repMatchPrice, normalMatchPrice; - UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; - UInt32* matches; - const Byte* data; - Byte curByte, matchByte; - if (p->optimumEndIndex != p->optimumCurrentIndex) { - const COptimal* opt = &p->opt[p->optimumCurrentIndex]; - UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) { - *backRes = (UInt32)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 lenTest; - const Byte* data2; - reps[i] = p->reps[i]; - data2 = data - (reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) { - repLens[i] = 0; - continue; - } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) - ; - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; - } - if (repLens[repMaxIndex] >= p->numFastBytes) { - UInt32 lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) { - *backRes = (UInt32)-1; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - if (matchByte == curByte) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); - } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; - COptimal* opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; - } - } while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) { - UInt32 offs = 0; - while (len > matches[offs]) - offs += 2; - for (;; len++) { - COptimal* opt; - UInt32 distance = matches[offs + 1]; - - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else { - UInt32 slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; - } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - if (len == matches[offs]) { - offs += 2; - if (offs == numPairs) - break; - } - } - } - - cur = 0; - -#ifdef SHOW_STAT2 - if (position >= 0) { - unsigned i; - printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) - printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); - } -#endif - - for (;;) { - UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; - UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; - Bool nextIsChar; - Byte curByte, matchByte; - const Byte* data; - COptimal* curOpt; - COptimal* nextOpt; - - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); - - newLen = ReadMatchDistances(p, &numPairs); - if (newLen >= p->numFastBytes) { - p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); - } - position++; - curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) { - posPrev--; - if (curOpt->prev2) { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) { - if (IsShortRep(curOpt)) - state = kShortRepNextStates[state]; - else - state = kLiteralNextStates[state]; - } else { - UInt32 pos; - const COptimal* prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; - } else { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) { - UInt32 i; - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; - } else { - UInt32 i; - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i - 1]; - } - } - curOpt->state = (CState)state; - - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - posState = (position & p->pbMask); - - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - { - const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - nextOpt = &p->opt[cur + 1]; - - if (curAnd1Price < nextOpt->price) { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; - } - - matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) { - nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; - } - } - numAvailFull = p->numAvail; - { - UInt32 temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) - numAvailFull = temp; - } - - if (numAvailFull < 2) - continue; - numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - - if (!nextIsChar && matchByte != curByte) /* speed optimization */ - { - /* try Literal + rep0 */ - UInt32 temp; - UInt32 lenTest2; - const Byte* data2 = data - (reps[0] + 1); - UInt32 limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) - ; - lenTest2 = temp - 1; - if (lenTest2 >= 2) { - UInt32 state2 = kLiteralNextStates[state]; - UInt32 posStateNext = (position + 1) & p->pbMask; - UInt32 nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal* opt; - UInt32 offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; - } - } - } - } - - startLen = 2; /* speed optimization */ - { - UInt32 repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) { - UInt32 lenTest; - UInt32 lenTestTemp; - UInt32 price; - const Byte* data2 = data - (reps[repIndex] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) - ; - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; - COptimal* opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; - } - } while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; - - /* if (_maxMode) */ - { - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) - ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) { - UInt32 state2 = kRepNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = - price + p->repLenEnc.prices[posState][lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 curAndLenPrice; - COptimal* opt; - UInt32 offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; - } - } - } - } - } - } - /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ - if (newLen > numAvail) { - newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) - ; - matches[numPairs] = newLen; - numPairs += 2; - } - if (newLen >= startLen) { - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 offs, curBack, posSlot; - UInt32 lenTest; - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; - - offs = 0; - while (startLen > matches[offs]) - offs += 2; - curBack = matches[offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen;; lenTest++) { - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(lenTest); - COptimal* opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - - if (/*_maxMode && */ lenTest == matches[offs]) { - /* Try Match + Literal + Rep0 */ - const Byte* data2 = data - (curBack + 1); - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; - UInt32 nextRepMatchPrice; - if (limit > numAvailFull) - limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) - ; - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) { - UInt32 state2 = kMatchNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ - { - UInt32 offset = cur + lenTest + 1 + lenTest2; - UInt32 curAndLenPrice; - COptimal* opt; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; - } - } - } - offs += 2; - if (offs == numPairs) - break; - curBack = matches[offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); - } - } - } - } -} - -#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) - -static UInt32 GetOptimumFast(CLzmaEnc* p, UInt32* backRes) { - UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; - const Byte* data; - const UInt32* matches; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - *backRes = (UInt32)-1; - if (numAvail < 2) - return 1; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - - repLen = repIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len; - const Byte* data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (len = 2; len < numAvail && data[len] == data2[len]; len++) - ; - if (len >= p->numFastBytes) { - *backRes = i; - MovePos(p, len - 1); - return len; - } - if (len > repLen) { - repIndex = i; - repLen = len; - } - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) { - *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - - mainDist = 0; /* for GCC */ - if (mainLen >= 2) { - mainDist = matches[numPairs - 1]; - while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) { - if (!ChangePair(matches[numPairs - 3], mainDist)) - break; - numPairs -= 2; - mainLen = matches[numPairs - 2]; - mainDist = matches[numPairs - 1]; - } - if (mainLen == 2 && mainDist >= 0x80) - mainLen = 1; - } - - if (repLen >= 2 && ((repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) { - *backRes = repIndex; - MovePos(p, repLen - 1); - return repLen; - } - - if (mainLen < 2 || numAvail <= 2) - return 1; - - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) { - UInt32 newDistance = matches[p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; - } - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len, limit; - const Byte* data2 = data - (p->reps[i] + 1); - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++) - ; - if (len >= limit) - return 1; - } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); - return mainLen; -} - -static void WriteEndMarker(CLzmaEnc* p, UInt32 posState) { - UInt32 len; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); -} - -static SRes CheckErrors(CLzmaEnc* p) { - if (p->result != SZ_OK) - return p->result; - if (p->rc.res != SZ_OK) - p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) - p->result = SZ_ERROR_READ; - if (p->result != SZ_OK) - p->finished = True; - return p->result; -} - -static SRes Flush(CLzmaEnc* p, UInt32 nowPos) { - /* ReleaseMFStream(); */ - p->finished = True; - if (p->writeEndMark) - WriteEndMarker(p, nowPos & p->pbMask); - RangeEnc_FlushData(&p->rc); - RangeEnc_FlushStream(&p->rc); - return CheckErrors(p); -} - -static void FillAlignPrices(CLzmaEnc* p) { - UInt32 i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); - p->alignPriceCount = 0; -} - -static void FillDistancesPrices(CLzmaEnc* p) { - UInt32 tempPrices[kNumFullDistances]; - UInt32 i, lenToPosState; - for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { - UInt32 posSlot = GetPosSlot1(i); - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); - } - - for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { - UInt32 posSlot; - const CLzmaProb* encoder = p->posSlotEncoder[lenToPosState]; - UInt32* posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); - - { - UInt32* distancesPrices = p->distancesPrices[lenToPosState]; - UInt32 i; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; - } - } - p->matchPriceCount = 0; -} - -void LzmaEnc_Construct(CLzmaEnc* p) { - RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); -#ifdef COMPRESS_MF_MT - MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; -#endif - - { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); - } - -#ifndef LZMA_LOG_BSR - LzmaEnc_FastPosInit(p->g_FastPos); -#endif - - LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc) { - void* p; - p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); - if (p != 0) - LzmaEnc_Construct((CLzmaEnc*)p); - return p; -} - -void LzmaEnc_FreeLits(CLzmaEnc* p, ISzAlloc* alloc) { - alloc->Free(alloc, p->litProbs); - alloc->Free(alloc, p->saveState.litProbs); - p->litProbs = 0; - p->saveState.litProbs = 0; -} - -void LzmaEnc_Destruct(CLzmaEnc* p, ISzAlloc* alloc, ISzAlloc* allocBig) { -#ifdef COMPRESS_MF_MT - MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); -#endif - MatchFinder_Free(&p->matchFinderBase, allocBig); - LzmaEnc_FreeLits(p, alloc); - RangeEnc_Free(&p->rc, alloc); -} - -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig) { - LzmaEnc_Destruct((CLzmaEnc*)p, alloc, allocBig); - alloc->Free(alloc, p); -} - -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc* p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) { - UInt32 nowPos32, startPos32; - if (p->inStream != 0) { - p->matchFinderBase.stream = p->inStream; - p->matchFinder.Init(p->matchFinderObj); - p->inStream = 0; - } - - if (p->finished) - return p->result; - RINOK(CheckErrors(p)); - - nowPos32 = (UInt32)p->nowPos64; - startPos32 = nowPos32; - - if (p->nowPos64 == 0) { - UInt32 numPairs; - Byte curByte; - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - return Flush(p, nowPos32); - ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; - curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); - LitEnc_Encode(&p->rc, p->litProbs, curByte); - p->additionalOffset--; - nowPos32++; - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - for (;;) { - UInt32 pos, len, posState; - - if (p->fastMode) - len = GetOptimumFast(p, &pos); - else - len = GetOptimum(p, nowPos32, &pos); - -#ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); -#endif - - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (UInt32)-1) { - Byte curByte; - CLzmaProb* probs; - const Byte* data; - - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; - probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) - LitEnc_Encode(&p->rc, probs, curByte); - else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; - } else { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); - } else { - UInt32 distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); - else { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; - p->reps[0] = distance; - } - if (len == 1) - p->state = kShortRepNextStates[p->state]; - else { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - p->state = kRepNextStates[p->state]; - } - } else { - UInt32 posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); - p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); - - if (posSlot >= kStartPosModelIndex) { - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - UInt32 posReduced = pos - base; - - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); - else { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; - } - } - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - p->reps[1] = p->reps[0]; - p->reps[0] = pos; - p->matchPriceCount++; - } - } - p->additionalOffset -= len; - nowPos32 += len; - if (p->additionalOffset == 0) { - UInt32 processed; - if (!p->fastMode) { - if (p->matchPriceCount >= (1 << 7)) - FillDistancesPrices(p); - if (p->alignPriceCount >= kAlignTableSize) - FillAlignPrices(p); - } - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; - if (useLimits) { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) - break; - } else if (processed >= (1 << 15)) { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); - } - } - } - p->nowPos64 += nowPos32 - startPos32; - return Flush(p, nowPos32); -} - -#define kBigHashDicLimit ((UInt32)1 << 24) - -static SRes LzmaEnc_Alloc(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - UInt32 beforeSize = kNumOpts; - if (!RangeEnc_Alloc(&p->rc, alloc)) - return SZ_ERROR_MEM; -#ifdef COMPRESS_MF_MT - Bool btMode = (p->matchFinderBase.btMode != 0); - p->mtMode = (p->multiThread && !p->fastMode && btMode); -#endif - - { - unsigned lclp = p->lc + p->lp; - if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) { - LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); - if (p->litProbs == 0 || p->saveState.litProbs == 0) { - LzmaEnc_FreeLits(p, alloc); - return SZ_ERROR_MEM; - } - p->lclp = lclp; - } - } - - p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); - - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; - -#ifdef COMPRESS_MF_MT - if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); - p->matchFinderObj = &p->matchFinderMt; - MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); - } else -#endif - { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) - return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); - } - return SZ_OK; -} - -void LzmaEnc_Init(CLzmaEnc* p) { - UInt32 i; - p->state = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; - - RangeEnc_Init(&p->rc); - - for (i = 0; i < kNumStates; i++) { - UInt32 j; - for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { - p->isMatch[i][j] = kProbInitValue; - p->isRep0Long[i][j] = kProbInitValue; - } - p->isRep[i] = kProbInitValue; - p->isRepG0[i] = kProbInitValue; - p->isRepG1[i] = kProbInitValue; - p->isRepG2[i] = kProbInitValue; - } - - { - UInt32 num = 0x300 << (p->lp + p->lc); - for (i = 0; i < num; i++) - p->litProbs[i] = kProbInitValue; - } - - { - for (i = 0; i < kNumLenToPosStates; i++) { - CLzmaProb* probs = p->posSlotEncoder[i]; - UInt32 j; - for (j = 0; j < (1 << kNumPosSlotBits); j++) - probs[j] = kProbInitValue; - } - } - { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) - p->posEncoders[i] = kProbInitValue; - } - - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); - - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; - p->additionalOffset = 0; - - p->pbMask = (1 << p->pb) - 1; - p->lpMask = (1 << p->lp) - 1; -} - -void LzmaEnc_InitPrices(CLzmaEnc* p) { - if (!p->fastMode) { - FillDistancesPrices(p); - FillAlignPrices(p); - } - - p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); -} - -static SRes LzmaEnc_AllocAndInit(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - UInt32 i; - for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) - if (p->dictSize <= ((UInt32)1 << i)) - break; - p->distTableSize = i * 2; - - p->finished = False; - p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - p->nowPos64 = 0; - return SZ_OK; -} - -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream* inStream, ISeqOutStream* outStream, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - p->inStream = inStream; - p->rc.outStream = outStream; - return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); -} - -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream* inStream, UInt32 keepWindowSize, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - p->inStream = inStream; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -static void LzmaEnc_SetInputBuf(CLzmaEnc* p, const Byte* src, SizeT srcLen) { - p->seqBufInStream.funcTable.Read = MyRead; - p->seqBufInStream.data = src; - p->seqBufInStream.rem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte* src, SizeT srcLen, - UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->inStream = &p->seqBufInStream.funcTable; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -void LzmaEnc_Finish(CLzmaEncHandle pp) { -#ifdef COMPRESS_MF_MT - CLzmaEnc* p = (CLzmaEnc*)pp; - if (p->mtMode) - MatchFinderMt_ReleaseStream(&p->matchFinderMt); -#else - (void)pp; -#endif -} - -typedef struct _CSeqOutStreamBuf { - ISeqOutStream funcTable; - Byte* data; - SizeT rem; - Bool overflow; -} CSeqOutStreamBuf; - -static size_t MyWrite(void* pp, const void* data, size_t size) { - CSeqOutStreamBuf* p = (CSeqOutStreamBuf*)pp; - if (p->rem < size) { - size = p->rem; - p->overflow = True; - } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; - return size; -} - -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { - const CLzmaEnc* p = (CLzmaEnc*)pp; - return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); -} - -const Byte* LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { - const CLzmaEnc* p = (CLzmaEnc*)pp; - return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; -} - -#if 0 // Unused function producing a dangling-pointer warning. -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, - Byte* dest, size_t* destLen, UInt32 desiredPackSize, UInt32* unpackSize) { - CLzmaEnc* p = (CLzmaEnc*)pp; - UInt64 nowPos64; - SRes res; - CSeqOutStreamBuf outStream; - - outStream.funcTable.Write = MyWrite; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = False; - p->finished = False; - p->result = SZ_OK; - - if (reInit) - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - nowPos64 = p->nowPos64; - RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.funcTable; - - res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); - - *unpackSize = (UInt32)(p->nowPos64 - nowPos64); - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - - return res; -} -#endif - -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream* outStream, ISeqInStream* inStream, ICompressProgress* progress, - ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)pp; - SRes res = SZ_OK; - -#ifdef COMPRESS_MF_MT - Byte allocaDummy[0x300]; - (void)allocaDummy; - int i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)i; -#endif - - RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig)); - - for (;;) { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); - if (res != SZ_OK || p->finished != 0) - break; - if (progress != 0) { - res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); - if (res != SZ_OK) { - res = SZ_ERROR_PROGRESS; - break; - } - } - } - LzmaEnc_Finish(pp); - return res; -} - -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte* props, SizeT* size) { - CLzmaEnc* p = (CLzmaEnc*)pp; - int i; - UInt32 dictSize = p->dictSize; - if (*size < LZMA_PROPS_SIZE) - return SZ_ERROR_PARAM; - *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - for (i = 11; i <= 30; i++) { - if (dictSize <= ((UInt32)2 << i)) { - dictSize = (2 << i); - break; - } - if (dictSize <= ((UInt32)3 << i)) { - dictSize = (3 << i); - break; - } - } - - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; -} - -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { - SRes res; - CLzmaEnc* p = (CLzmaEnc*)pp; - - CSeqOutStreamBuf outStream; - - LzmaEnc_SetInputBuf(p, src, srcLen); - - outStream.funcTable.Write = MyWrite; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = writeEndMark; - res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, - progress, alloc, allocBig); - - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - return res; -} - -SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { - CLzmaEnc* p = (CLzmaEnc*)LzmaEnc_Create(alloc); - SRes res; - if (p == 0) - return SZ_ERROR_MEM; - - res = LzmaEnc_SetProps(p, props); - if (res == SZ_OK) { - res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); - if (res == SZ_OK) - res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, - writeEndMark, progress, alloc, allocBig); - } - - LzmaEnc_Destroy(p, alloc, allocBig); - return res; -} -} diff --git a/crnlib/lzma_LzmaEnc.h b/crnlib/lzma_LzmaEnc.h deleted file mode 100644 index 44f3e99e..00000000 --- a/crnlib/lzma_LzmaEnc.h +++ /dev/null @@ -1,73 +0,0 @@ -/* LzmaEnc.h -- LZMA Encoder -2008-10-04 : Igor Pavlov : Public domain */ - -#ifndef __LZMAENC_H -#define __LZMAENC_H - -#include "lzma_Types.h" - -namespace crnlib { - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaEncProps { - int level; /* 0 <= level <= 9 */ - UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (1 << 30) for 64-bit version - default = (1 << 24) */ - int lc; /* 0 <= lc <= 8, default = 3 */ - int lp; /* 0 <= lp <= 4, default = 0 */ - int pb; /* 0 <= pb <= 4, default = 2 */ - int algo; /* 0 - fast, 1 - normal, default = 1 */ - int fb; /* 5 <= fb <= 273, default = 32 */ - int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ - unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ - int numThreads; /* 1 or 2, default = 2 */ -} CLzmaEncProps; - -void LzmaEncProps_Init(CLzmaEncProps* p); -void LzmaEncProps_Normalize(CLzmaEncProps* p); -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2); - -/* ---------- CLzmaEncHandle Interface ---------- */ - -/* LzmaEnc_* functions can return the following exit codes: -Returns: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - Write callback error. - SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -typedef void* CLzmaEncHandle; - -CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig); -SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps* props); -SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte* properties, SizeT* size); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream* outStream, ISeqInStream* inStream, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); -SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); - -/* ---------- One Call Interface ---------- */ - -/* LzmaEncode -Return code: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater - SZ_ERROR_OUTPUT_EOF - output buffer overflow - SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) -*/ - -SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, - const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, - ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); -} - -#endif diff --git a/crnlib/lzma_LzmaLib.cpp b/crnlib/lzma_LzmaLib.cpp deleted file mode 100644 index 50f6ad19..00000000 --- a/crnlib/lzma_LzmaLib.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* LzmaLib.c -- LZMA library wrapper -2008-08-05 -Igor Pavlov -Public domain */ -#include "crn_core.h" -#include "lzma_LzmaEnc.h" -#include "lzma_LzmaDec.h" -#include "lzma_Alloc.h" -#include "lzma_LzmaLib.h" - -namespace crnlib { - -static void* SzAlloc(void* /* p */, size_t size) { - return MyAlloc(size); -} -static void SzFree(void* /* p */, void* address) { - MyFree(address); -} -static ISzAlloc g_Alloc = {SzAlloc, SzFree}; - -MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, - unsigned char* outProps, size_t* outPropsSize, - int level, /* 0 <= level <= 9, default = 5 */ - unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ - int lc, /* 0 <= lc <= 8, default = 3 */ - int lp, /* 0 <= lp <= 4, default = 0 */ - int pb, /* 0 <= pb <= 4, default = 2 */ - int fb, /* 5 <= fb <= 273, default = 32 */ - int numThreads /* 1 or 2, default = 2 */ - ) { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - props.level = level; - props.dictSize = dictSize; - props.lc = lc; - props.lp = lp; - props.pb = pb; - props.fb = fb; - props.numThreads = numThreads; - - return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, - NULL, &g_Alloc, &g_Alloc); -} - -MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, - const unsigned char* props, size_t propsSize) { - ELzmaStatus status; - return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); -} -} diff --git a/crnlib/lzma_MyVersion.h b/crnlib/lzma_MyVersion.h deleted file mode 100644 index 07e27917..00000000 --- a/crnlib/lzma_MyVersion.h +++ /dev/null @@ -1,8 +0,0 @@ -#define MY_VER_MAJOR 4 -#define MY_VER_MINOR 63 -#define MY_VER_BUILD 0 -#define MY_VERSION "4.63" -#define MY_7ZIP_VERSION "7-Zip 4.63" -#define MY_DATE "2008-12-31" -#define MY_COPYRIGHT "Copyright (c) 1999-2008 Igor Pavlov" -#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " " MY_DATE diff --git a/crnlib/lzma_Threads.cpp b/crnlib/lzma_Threads.cpp deleted file mode 100644 index aae6d4ff..00000000 --- a/crnlib/lzma_Threads.cpp +++ /dev/null @@ -1,122 +0,0 @@ -/* Threads.c -- multithreading library -2008-08-05 -Igor Pavlov -Public domain */ -#include "crn_core.h" -#include "lzma_Threads.h" -#include - -namespace crnlib { - -static WRes GetError() { - DWORD res = GetLastError(); - return (res) ? (WRes)(res) : 1; -} - -WRes HandleToWRes(HANDLE h) { - return (h != 0) ? 0 : GetError(); -} -WRes BOOLToWRes(BOOL v) { - return v ? 0 : GetError(); -} - -static WRes MyCloseHandle(HANDLE* h) { - if (*h != NULL) - if (!CloseHandle(*h)) - return GetError(); - *h = NULL; - return 0; -} - -WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter) { - unsigned threadId; /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - thread->handle = - /* CreateThread(0, 0, startAddress, parameter, 0, &threadId); */ - (HANDLE)_beginthreadex(NULL, 0, startAddress, parameter, 0, &threadId); - /* maybe we must use errno here, but probably GetLastError() is also OK. */ - return HandleToWRes(thread->handle); -} - -WRes WaitObject(HANDLE h) { - return (WRes)WaitForSingleObject(h, INFINITE); -} - -WRes Thread_Wait(CThread* thread) { - if (thread->handle == NULL) - return 1; - return WaitObject(thread->handle); -} - -WRes Thread_Close(CThread* thread) { - return MyCloseHandle(&thread->handle); -} - -WRes Event_Create(CEvent* p, BOOL manualReset, int initialSignaled) { - p->handle = CreateEvent(NULL, manualReset, (initialSignaled ? TRUE : FALSE), NULL); - return HandleToWRes(p->handle); -} - -WRes ManualResetEvent_Create(CManualResetEvent* p, int initialSignaled) { - return Event_Create(p, TRUE, initialSignaled); -} -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* p) { - return ManualResetEvent_Create(p, 0); -} - -WRes AutoResetEvent_Create(CAutoResetEvent* p, int initialSignaled) { - return Event_Create(p, FALSE, initialSignaled); -} -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* p) { - return AutoResetEvent_Create(p, 0); -} - -WRes Event_Set(CEvent* p) { - return BOOLToWRes(SetEvent(p->handle)); -} -WRes Event_Reset(CEvent* p) { - return BOOLToWRes(ResetEvent(p->handle)); -} -WRes Event_Wait(CEvent* p) { - return WaitObject(p->handle); -} -WRes Event_Close(CEvent* p) { - return MyCloseHandle(&p->handle); -} - -WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount) { - p->handle = CreateSemaphore(NULL, (LONG)initiallyCount, (LONG)maxCount, NULL); - return HandleToWRes(p->handle); -} - -WRes Semaphore_Release(CSemaphore* p, LONG releaseCount, LONG* previousCount) { - return BOOLToWRes(ReleaseSemaphore(p->handle, releaseCount, previousCount)); -} -WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 releaseCount) { - return Semaphore_Release(p, (LONG)releaseCount, NULL); -} -WRes Semaphore_Release1(CSemaphore* p) { - return Semaphore_ReleaseN(p, 1); -} - -WRes Semaphore_Wait(CSemaphore* p) { - return WaitObject(p->handle); -} -WRes Semaphore_Close(CSemaphore* p) { - return MyCloseHandle(&p->handle); -} - -WRes CriticalSection_Init(CCriticalSection* p) { -#ifdef _MSC_VER - /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ - __try { - InitializeCriticalSection(p); - /* InitializeCriticalSectionAndSpinCount(p, 0); */ - } __except (EXCEPTION_EXECUTE_HANDLER) { - return 1; - } -#else - InitializeCriticalSection(p); -#endif - return 0; -} -} diff --git a/crnlib/lzma_Threads.h b/crnlib/lzma_Threads.h deleted file mode 100644 index e747d818..00000000 --- a/crnlib/lzma_Threads.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Threads.h -- multithreading library -2008-11-22 : Igor Pavlov : Public domain */ - -#ifndef __7Z_THRESDS_H -#define __7Z_THRESDS_H - -#include "lzma_Types.h" - -namespace crnlib { - -typedef struct _CThread { - HANDLE handle; -} CThread; - -#define Thread_Construct(thread) (thread)->handle = NULL -#define Thread_WasCreated(thread) ((thread)->handle != NULL) - -typedef unsigned THREAD_FUNC_RET_TYPE; -#define THREAD_FUNC_CALL_TYPE MY_STD_CALL -#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE - -WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter); -WRes Thread_Wait(CThread* thread); -WRes Thread_Close(CThread* thread); - -typedef struct _CEvent { - HANDLE handle; -} CEvent; - -typedef CEvent CAutoResetEvent; -typedef CEvent CManualResetEvent; - -#define Event_Construct(event) (event)->handle = NULL -#define Event_IsCreated(event) ((event)->handle != NULL) - -WRes ManualResetEvent_Create(CManualResetEvent* event, int initialSignaled); -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* event); -WRes AutoResetEvent_Create(CAutoResetEvent* event, int initialSignaled); -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* event); -WRes Event_Set(CEvent* event); -WRes Event_Reset(CEvent* event); -WRes Event_Wait(CEvent* event); -WRes Event_Close(CEvent* event); - -typedef struct _CSemaphore { - HANDLE handle; -} CSemaphore; - -#define Semaphore_Construct(p) (p)->handle = NULL - -WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount); -WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 num); -WRes Semaphore_Release1(CSemaphore* p); -WRes Semaphore_Wait(CSemaphore* p); -WRes Semaphore_Close(CSemaphore* p); - -typedef CRITICAL_SECTION CCriticalSection; - -WRes CriticalSection_Init(CCriticalSection* p); -#define CriticalSection_Delete(p) DeleteCriticalSection(p) -#define CriticalSection_Enter(p) EnterCriticalSection(p) -#define CriticalSection_Leave(p) LeaveCriticalSection(p) -} - -#endif diff --git a/crnlib/lzma_Types.h b/crnlib/lzma_Types.h deleted file mode 100644 index 7ff869dd..00000000 --- a/crnlib/lzma_Types.h +++ /dev/null @@ -1,214 +0,0 @@ -/* Types.h -- Basic types -2008-11-23 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#include - -#if defined(_WIN32) -#include -#define COMPRESS_MF_MT -#endif - -namespace crnlib { - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - -#ifdef _WIN32 -typedef DWORD WRes; -#else -typedef int WRes; -#endif - -#ifndef RINOK -#define RINOK(x) \ - { \ - int __result__ = (x); \ - if (__result__ != 0) \ - return __result__; \ - } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int Bool; -#define True 1 -#define False 0 - -#if defined(_WIN32) - -#if defined(_MSC_VER) && _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_CDECL __cdecl -#define MY_STD_CALL __stdcall -#define MY_FAST_CALL MY_NO_INLINE __fastcall - -#else - -#define MY_CDECL -#define MY_STD_CALL -#define MY_FAST_CALL - -#endif - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -} ISeqInStream; - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size); -SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf); - -typedef struct -{ - size_t (*Write)(void* p, const void* buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -} ISeqOutStream; - -typedef enum { - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - -typedef struct -{ - SRes (*Read)(void* p, void* buf, size_t* size); /* same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ISeekInStream; - -typedef struct -{ - SRes (*Look)(void* p, void** buf, size_t* size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(void* p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(void* p, void* buf, size_t* size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); -} ILookInStream; - -SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size); -SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType); -SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size); - -#define LookToRead_BUF_SIZE (1 << 14) - -typedef struct -{ - ILookInStream s; - ISeekInStream* realStream; - size_t pos; - size_t size; - Byte buf[LookToRead_BUF_SIZE]; -} CLookToRead; - -void LookToRead_CreateVTable(CLookToRead* p, int lookahead); -void LookToRead_Init(CLookToRead* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook* p); - -typedef struct -{ - ISeqInStream s; - ILookInStream* realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead* p); - -typedef struct -{ - SRes (*Progress)(void* p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -} ICompressProgress; - -typedef struct -{ - void* (*Alloc)(void* p, size_t size); - void (*Free)(void* p, void* address); /* address can be 0 */ -} ISzAlloc; - -#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) -#define IAlloc_Free(p, a) (p)->Free((p), a) -} - -#endif