From 41153071a002296c65c20958e9fb921a8f82edf0 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Tue, 21 Dec 2021 08:52:50 -0800
Subject: [PATCH 01/52] updated manual

---
 doc/zstd_manual.html | 130 +++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 3d62f595281..1080614ea6b 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1083,7 +1083,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 </b></pre><BR>
 <a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
 
-<pre><b>unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+<pre><b>ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
   `srcSize` must be the _exact_ size of this series
        (i.e. there should be a frame boundary at `src + srcSize`)
@@ -1106,7 +1106,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
             however it does mean that all frame data must be present and valid. 
 </p></pre><BR>
 
-<pre><b>unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+<pre><b>ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
 </b><p>  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
   `srcSize` must be the _exact_ size of this series
        (i.e. there should be a frame boundary at `src + srcSize`)
@@ -1121,7 +1121,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 </b><p>  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
  @return : size of the Frame Header,
            or an error code (if srcSize is too small) 
@@ -1148,7 +1148,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
 </b><p> Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
  by merging them into into the literals of the next sequence.
 
@@ -1161,7 +1161,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
                       const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
                       const void* src, size_t srcSize);
 </b><p> Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
@@ -1193,7 +1193,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize, unsigned magicVariant);
 </b><p> Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
 
@@ -1230,10 +1230,10 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 
 <a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
 
-<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
-size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
-size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-size_t ZSTD_estimateDCtxSize(void);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
 </b><p>  These functions make it possible to estimate memory usage
   of a future {D,C}Ctx, before its creation.
 
@@ -1258,11 +1258,11 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
-size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
-size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-size_t ZSTD_estimateDStreamSize(size_t windowSize);
-size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
 </b><p>  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
   It will also consider src size to be arbitrarily "large", which is worst case.
   If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
@@ -1277,17 +1277,17 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
          In this case, get total size by adding ZSTD_estimate?DictSize 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
-size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
-size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
 </b><p>  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
   ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
   Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
  
 </p></pre><BR>
 
-<pre><b>ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
-ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticCCtx() */<b>
+<pre><b>ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticCCtx() */<b>
 </b><p>  Initialize an object using a pre-allocated fixed-size buffer.
   workspace: The memory area to emplace the object into.
              Provided pointer *must be 8-bytes aligned*.
@@ -1310,7 +1310,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticDCtx() */<b>
+<pre><b>ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    </b>/**< same as ZSTD_initStaticDCtx() */<b>
 </b></pre><BR>
 <pre><b>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
@@ -1327,9 +1327,9 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 </p></pre><BR>
 
 <pre><b>typedef struct POOL_ctx_s ZSTD_threadPool;
-ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  </b>/* accept NULL pointer */<b>
-size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  </b>/* accept NULL pointer */<b>
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 </b><p>  These prototypes make it possible to share a thread pool among multiple compression contexts.
   This can limit resources for applications with multiple threads where each one uses
   a threaded compression mode (via ZSTD_c_nbWorkers parameter).
@@ -1343,7 +1343,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 
 <a name="Chapter17"></a><h2>Advanced compression functions</h2><pre></pre>
 
-<pre><b>ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+<pre><b>ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 </b><p>  Create a digested dictionary for compression
   Dictionary content is just referenced, not duplicated.
   As a consequence, `dictBuffer` **must** outlive CDict,
@@ -1351,22 +1351,22 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
   note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef 
 </p></pre><BR>
 
-<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 </b><p> @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  `estimatedSrcSize` value is optional, select 0 if not known 
 </p></pre><BR>
 
-<pre><b>ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 </b><p>  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
   All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 </b><p>  Ensure param values remain within authorized range.
  @return 0 on success, or an error code (can be checked with ZSTD_isError()) 
 </p></pre><BR>
 
-<pre><b>ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 </b><p>  optimize params for a given `srcSize` and `dictSize`.
  `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
  `dictSize` must be `0` when there is no dictionary.
@@ -1396,31 +1396,31 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
   This prototype will generate compilation warnings. 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 </b><p>  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
   It saves some memory, but also requires that `dict` outlives its usage within `cctx` 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 </b><p>  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
   how to load the dictionary (by copy ? by reference ?)
   and how to interpret it (automatic ? force raw mode ? full mode only ?) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 </b><p>  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
   how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 </b><p>  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
   and store it into int* value.
  @return : 0, or an error code (which can be tested with ZSTD_isError()).
  
 </p></pre><BR>
 
-<pre><b>ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  </b>/* accept NULL pointer */<b>
+<pre><b>ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  </b>/* accept NULL pointer */<b>
 </b><p>  Quick howto :
   - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
   - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
@@ -1439,24 +1439,24 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 </b><p>  Reset params to default values.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 </b><p>  Initializes the compression parameters of cctxParams according to
   compression level. All other parameters are reset to their default values.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 </b><p>  Initializes the compression and frame parameters of cctxParams according to
   params. All other parameters are reset to their default values.
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 </b><p>  Similar to ZSTD_CCtx_setParameter.
   Set one compression parameter, selected by enum ZSTD_cParameter.
   Parameters must be applied to a ZSTD_CCtx using
@@ -1466,14 +1466,14 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 </b><p> Similar to ZSTD_CCtx_getParameter.
  Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
  @result : 0, or an error code (which can be tested with ZSTD_isError()).
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
 </b><p>  Apply a set of ZSTD_CCtx_params to the compression context.
   This can be done even after compression is started,
@@ -1483,7 +1483,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_compressStream2_simpleArgs (
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs (
                 ZSTD_CCtx* cctx,
                 void* dst, size_t dstCapacity, size_t* dstPos,
           const void* src, size_t srcSize, size_t* srcPos,
@@ -1497,40 +1497,40 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
 
 <a name="Chapter18"></a><h2>Advanced decompression functions</h2><pre></pre>
 
-<pre><b>unsigned ZSTD_isFrame(const void* buffer, size_t size);
+<pre><b>ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
 </b><p>  Tells if the content of `buffer` starts with a valid Frame Identifier.
   Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
   Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
   Note 3 : Skippable Frame Identifiers are considered valid. 
 </p></pre><BR>
 
-<pre><b>ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
 </b><p>  Create a digested dictionary, ready to start decompression operation without startup delay.
   Dictionary content is referenced, and therefore stays in dictBuffer.
   It is important that dictBuffer outlives DDict,
   it must remain read accessible throughout the lifetime of DDict 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 </b><p>  Same as ZSTD_DCtx_loadDictionary(),
   but references `dict` content instead of copying it into `dctx`.
   This saves memory if `dict` remains around.,
   However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 </b><p>  Same as ZSTD_DCtx_loadDictionary(),
   but gives direct control over
   how to load the dictionary (by copy ? by reference ?)
   and how to interpret it (automatic ? force raw mode ? full mode only ?). 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 </b><p>  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
   how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
 </b><p>  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
   This protects a decoder context from reserving too much memory for itself (potential attack scenario).
   This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
@@ -1539,7 +1539,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
 </b><p>  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
   and store it into int* value.
  @return : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -1555,7 +1555,7 @@ <h3>Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
  @return : 0, or an error code (which can be tested using ZSTD_isError()). 
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_decompressStream_simpleArgs (
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
                 ZSTD_DCtx* dctx,
                 void* dst, size_t dstCapacity, size_t* dstPos,
           const void* src, size_t srcSize, size_t* srcPos);
@@ -1689,7 +1689,7 @@ <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></pre></b><B
     unsigned nbActiveWorkers;      </b>/* MT only : nb of workers actively compressing at probe time */<b>
 } ZSTD_frameProgression;
 </b></pre><BR>
-<pre><b>size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 </b><p>  Tell how many bytes are ready to be flushed immediately.
   Useful for multithreading scenarios (nbWorkers >= 1).
   Probe the oldest active job, defined as oldest job not yet entirely flushed,
@@ -1705,7 +1705,7 @@ <h3>Advanced Streaming compression functions</h3><pre></pre><b><pre></pre></b><B
 </p></pre><BR>
 
 <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></pre></b><BR>
-<pre><b>size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 </b><p>
      ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
      ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
@@ -1715,7 +1715,7 @@ <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></pre></b>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
 </b><p>
      ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
      ZSTD_DCtx_refDDict(zds, ddict);
@@ -1725,7 +1725,7 @@ <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></pre></b>
  
 </p></pre><BR>
 
-<pre><b>size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 </b><p>
      ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
 
@@ -1769,10 +1769,10 @@ <h3>Advanced Streaming decompression functions</h3><pre></pre><b><pre></pre></b>
   `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
 <BR></pre>
 
-<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
-size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
+<h3>Buffer-less streaming compression functions</h3><pre></pre><b><pre>ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); </b>/**< note: fails if cdict==NULL */<b>
+ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); </b>/**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </pre></b><BR>
 <pre><b>size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); </b>/**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </b></pre><BR>
@@ -1858,12 +1858,12 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef
     unsigned checksumFlag;
 } ZSTD_frameHeader;
 </pre></b><BR>
-<pre><b>size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
+<pre><b>ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input */<b>
 </b>/*! ZSTD_getFrameHeader_advanced() :<b>
  *  same as ZSTD_getFrameHeader(),
  *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  </b>/**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */<b>
 </b><p>  decode Frame Header, or requires larger `srcSize`.
  @return : 0, `zfhPtr` is correctly filled,
           >0, `srcSize` is too small, value is wanted `srcSize` amount,
@@ -1899,10 +1899,10 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef
         Use ZSTD_insertBlock() for such a case.
 </p></pre><BR>
 
-<h3>Raw zstd block functions</h3><pre></pre><b><pre>size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
-size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  </b>/**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */<b>
+<h3>Raw zstd block functions</h3><pre></pre><b><pre>ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  </b>/**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */<b>
 </pre></b><BR>
 </html>
 </body>

From 14a0eaf73ba84ab8eabf9ab44b278a13cc0f0b6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D9=85=D9=87=D8=AF=D9=8A=20=D8=B4=D9=8A=D9=86=D9=88=D9=86?=
 =?UTF-8?q?=20=28Mehdi=20Chinoune=29?=
 <79349457+MehdiChinoune@users.noreply.github.com>
Date: Wed, 22 Dec 2021 10:08:49 +0100
Subject: [PATCH 02/52] Fix zstd-static output name with MINGW/Clang

---
 build/cmake/lib/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 7ba46933024..ec8480d5dbf 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -106,7 +106,7 @@ if (MSVC)
 endif ()
 
 # With MSVC static library needs to be renamed to avoid conflict with import library
-if (MSVC OR (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
+if (MSVC OR (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT MINGW))
     set(STATIC_LIBRARY_BASE_NAME zstd_static)
 else ()
     set(STATIC_LIBRARY_BASE_NAME zstd)

From 29e44bc5547f88ab5c1942d2514c2d524100b71c Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Wed, 22 Dec 2021 20:17:16 -0500
Subject: [PATCH 03/52] meson: fix MSVC support

Regression from commit a5f2c45528032ed20c33e0f8cd2c163a800a0017. It is
not possible to unconditionally add the asm sources, since not all
compilers understand the .s file extension.

Specifically for meson, only compilers inheriting from the GNU mixin
will allow a .s file at configure time.

zstd doesn't support asm for MSVC for the same basic reason; if/when
Windows asm support is added, it would involve preprocessing with nasm,
most likely.
---
 build/meson/lib/meson.build | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build
index 1f4f8c25db3..2a8c55fbf52 100644
--- a/build/meson/lib/meson.build
+++ b/build/meson/lib/meson.build
@@ -37,7 +37,6 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
   join_paths(zstd_rootdir, 'lib/compress/zstd_opt.c'),
   join_paths(zstd_rootdir, 'lib/compress/zstd_ldm.c'),
   join_paths(zstd_rootdir, 'lib/decompress/huf_decompress.c'),
-  join_paths(zstd_rootdir, 'lib/decompress/huf_decompress_amd64.S'),
   join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress.c'),
   join_paths(zstd_rootdir, 'lib/decompress/zstd_decompress_block.c'),
   join_paths(zstd_rootdir, 'lib/decompress/zstd_ddict.c'),
@@ -46,6 +45,12 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/divsufsort.c'),
   join_paths(zstd_rootdir, 'lib/dictBuilder/zdict.c')]
 
+# really we need anything that defines __GNUC__ as that is what ZSTD_ASM_SUPPORTED is gated on
+# but these are the two compilers that are supported in tree and actually handle this correctly
+if [compiler_gcc, compiler_clang].contains(cc_id)
+  libzstd_sources += join_paths(zstd_rootdir, 'lib/decompress/huf_decompress_amd64.S')
+endif
+
 # Explicit define legacy support
 add_project_arguments('-DZSTD_LEGACY_SUPPORT=@0@'.format(legacy_level),
   language: 'c')

From b77fcac61fadf665f7522dd0c2e44b373eb7d57d Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 23 Dec 2021 12:03:33 -0800
Subject: [PATCH 04/52] change ZSTD_storeSeq() interface to accept matchLength

instead of mlBase.

This removes the need to do `- MINMATCH` at every call site.

The new interface contract is checked with an `assert()`.
---
 lib/compress/zstd_compress.c          |  4 ++--
 lib/compress/zstd_compress_internal.h | 21 ++++++++++++---------
 lib/compress/zstd_double_fast.c       | 20 ++++++++++----------
 lib/compress/zstd_fast.c              | 18 +++++++++---------
 lib/compress/zstd_lazy.c              | 10 +++++-----
 lib/compress/zstd_ldm.c               |  2 +-
 lib/compress/zstd_opt.c               |  2 +-
 tests/decodecorpus.c                  |  2 +-
 8 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 34f8e970f1a..24a740850db 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -5816,7 +5816,7 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
         }
         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
         ip += matchLength + litLength;
     }
     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
@@ -5942,7 +5942,7 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
         ip += matchLength + litLength;
     }
     DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 7360ee8e221..ca63903fe46 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -601,13 +601,13 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
 }
 
 /*! ZSTD_storeSeq() :
- *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
  *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
- *  `mlBase` : matchLength - MINMATCH
+ *  @matchLength : must be >= MINMATCH
  *  Allowed to overread literals up to litLimit.
 */
 HINT_INLINE UNUSED_ATTR
-void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t matchLength)
 {
     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
     BYTE const* const litEnd = literals + litLength;
@@ -616,7 +616,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+               pos, (U32)litLength, (U32)matchLength, (U32)offCode);
     }
 #endif
     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -650,12 +650,15 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     seqStorePtr->sequences[0].offset = offCode + 1;
 
     /* match Length */
-    if (mlBase>0xFFFF) {
-        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
-        seqStorePtr->longLengthType = ZSTD_llt_matchLength;
-        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    assert(matchLength >= MINMATCH);
+    {   size_t const mlBase = matchLength - MINMATCH;
+        if (mlBase>0xFFFF) {
+            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
+            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+        }
+        seqStorePtr->sequences[0].matchLength = (U16)mlBase;
     }
-    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
 
     seqStorePtr->sequences++;
 }
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index b9393b6a4e0..c9f7b9d746d 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -131,7 +131,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
                 ip++;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
                 goto _match_stored;
             }
 
@@ -217,7 +217,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
             hashLong[hl1] = (U32)(ip1 - base);
         }
 
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
 
 _match_stored:
         /* match found */
@@ -243,7 +243,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength);
                 ip += rLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -328,7 +328,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
             goto _match_stored;
         }
 
@@ -419,7 +419,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         offset_2 = offset_1;
         offset_1 = offset;
 
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
 
 _match_stored:
         /* match found */
@@ -448,7 +448,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                     ip += repLength2;
@@ -585,7 +585,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
         } else {
             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -596,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
 
             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -621,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 }
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
 
             } else {
                 ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -653,7 +653,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                     ip += repLength2;
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index 2bae2f70e0f..29c00866ef5 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -282,7 +282,7 @@ ZSTD_compressBlock_fast_noDict_generic(
     /* Count the forward length. */
     mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
 
-    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength - MINMATCH);
+    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
 
     ip0 += mLength;
     anchor = ip0;
@@ -306,7 +306,7 @@ ZSTD_compressBlock_fast_noDict_generic(
                 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
                 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
                 ip0 += rLength;
-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength);
                 anchor = ip0;
                 continue;   /* faster when present (confirmed on gcc-8) ... (?) */
     }   }   }
@@ -439,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
         } else if ( (matchIndex <= prefixStartIndex) ) {
             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
             U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -459,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 } /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
             }
         } else if (MEM_read32(match) != MEM_read32(ip)) {
             /* it's not a match, and we're not going to check the dictionary */
@@ -474,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
         }
 
         /* match found */
@@ -499,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
@@ -598,7 +598,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength);
             ip += rLength;
             anchor = ip;
         } else {
@@ -614,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
                 ip += mLength;
                 anchor = ip;
         }   }
@@ -633,7 +633,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index c40473cad7d..83831c67345 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1668,7 +1668,7 @@ ZSTD_compressBlock_lazy_generic(
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -1685,7 +1685,7 @@ ZSTD_compressBlock_lazy_generic(
                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
                     ip += matchLength;
                     anchor = ip;
                     continue;
@@ -1700,7 +1700,7 @@ ZSTD_compressBlock_lazy_generic(
                 /* store sequence */
                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -2010,7 +2010,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -2028,7 +2028,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 19b99f27815..1e7e626d2f7 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -710,7 +710,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             /* Store the sequence */
             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
                           sequence.offset + ZSTD_REP_MOVE,
-                          sequence.matchLength - MINMATCH);
+                          sequence.matchLength);
             ip += sequence.matchLength;
         }
     }
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 7e5eb8b5764..2356fa2ae11 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1286,7 +1286,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
                     assert(anchor + llen <= iend);
                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
                     anchor += advance;
                     ip = anchor;
             }   }
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 4f4db85753b..75d89f52c0c 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -759,7 +759,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
         }
         /* use libzstd sequence handling */
         ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
-                      offsetCode, matchLen - MINMATCH);
+                      offsetCode, matchLen);
 
         literalsSize -= literalLen;
         excessMatch -= (matchLen - MIN_SEQ_LEN);

From e145b58cfdf94a300295be022d4b41d6ca70d8f1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 23 Dec 2021 13:39:46 -0800
Subject: [PATCH 05/52] changed seqDef.matchLength into seqDef.mlBase

since this is effectively what is stored in this field (== matchLength - MINMATCH).
This makes it clearer what needs to be done when reading from / writing to this field.
---
 lib/common/zstd_internal.h             | 6 +++---
 lib/compress/zstd_compress.c           | 6 +++---
 lib/compress/zstd_compress_internal.h  | 2 +-
 lib/compress/zstd_compress_sequences.c | 6 +++---
 tests/decodecorpus.c                   | 4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 1c053b4120f..0fba5cc1f56 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -285,9 +285,9 @@ typedef enum {
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;         /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
+    U32 offset;    /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
     U16 litLength;
-    U16 matchLength;
+    U16 mlBase;    /* mlBase == matchLength - MINMATCH */
 } seqDef;
 
 /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
@@ -329,7 +329,7 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
 {
     ZSTD_sequenceLength seqLen;
     seqLen.litLength = seq->litLength;
-    seqLen.matchLength = seq->matchLength + MINMATCH;
+    seqLen.matchLength = seq->mlBase + MINMATCH;
     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
             seqLen.litLength += 0xFFFF;
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 24a740850db..14595669107 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2396,7 +2396,7 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
     assert(nbSeq <= seqStorePtr->maxNbSeq);
     for (u=0; u<nbSeq; u++) {
         U32 const llv = sequences[u].litLength;
-        U32 const mlv = sequences[u].matchLength;
+        U32 const mlv = sequences[u].mlBase;
         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
@@ -2912,7 +2912,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
     for (i = 0; i < seqStoreSeqSize; ++i) {
         U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
         outSeqs[i].litLength = seqStoreSeqs[i].litLength;
-        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
         outSeqs[i].rep = 0;
 
         if (i == seqStore->longLengthPos) {
@@ -3385,7 +3385,7 @@ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
     size_t i;
     for (i = 0; i < nbSeqs; ++i) {
         seqDef seq = seqStore->sequencesStart[i];
-        matchBytes += seq.matchLength + MINMATCH;
+        matchBytes += seq.mlBase + MINMATCH;
         if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
             matchBytes += 0x10000;
         }
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index ca63903fe46..87536b38c8d 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -657,7 +657,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
             seqStorePtr->longLengthType = ZSTD_llt_matchLength;
             seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
         }
-        seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
     }
 
     seqStorePtr->sequences++;
diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c
index fa31e6e946f..8d1439045d3 100644
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@@ -313,7 +313,7 @@ ZSTD_encodeSequences_body(
     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
     if (MEM_32bits()) BIT_flushBits(&blockStream);
-    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
     if (MEM_32bits()) BIT_flushBits(&blockStream);
     if (longOffsets) {
         U32 const ofBits = ofCodeTable[nbSeq-1];
@@ -339,7 +339,7 @@ ZSTD_encodeSequences_body(
             U32  const mlBits = ML_bits[mlCode];
             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
                         (unsigned)sequences[n].litLength,
-                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].mlBase + MINMATCH,
                         (unsigned)sequences[n].offset);
                                                                             /* 32b*/  /* 64b*/
                                                                             /* (7)*/  /* (7)*/
@@ -351,7 +351,7 @@ ZSTD_encodeSequences_body(
                 BIT_flushBits(&blockStream);                                /* (7)*/
             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
-            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
             if (longOffsets) {
                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 75d89f52c0c..2343ce6391c 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -949,7 +949,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
         if (MEM_32bits()) BIT_flushBits(&blockStream);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+        BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
         if (MEM_32bits()) BIT_flushBits(&blockStream);
         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
         BIT_flushBits(&blockStream);
@@ -971,7 +971,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
                     BIT_flushBits(&blockStream);                                /* (7)*/
                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
-                BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+                BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
                 BIT_flushBits(&blockStream);                                    /* (7)*/

From 75525fcb9f4c7ed1bb39d10aa43ce529c950d208 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 23 Dec 2021 17:43:12 -0800
Subject: [PATCH 06/52] library optimization flag can be selected on command
 line again

`CFLAGS=-O0 make`
will now use `-O0` instead of enforcing `-O3`
which used to be the behavior before introduction of `libzstd.mk`.

This should result in faster tests,
since a few tests depend on this capability for faster roundtrips.
---
 lib/libzstd.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/libzstd.mk b/lib/libzstd.mk
index af12daffe12..c04957c6fae 100644
--- a/lib/libzstd.mk
+++ b/lib/libzstd.mk
@@ -79,7 +79,7 @@ endif
   CFLAGS += -fno-stack-protector -fomit-frame-pointer -fno-ident \
             -DDYNAMIC_BMI2=0 -DNDEBUG
 else
-  CFLAGS += -O3
+  CFLAGS ?= -O3
 endif
 
 DEBUGLEVEL ?= 0

From aeff1283311b6be38c5efc24c019e0f72ce60046 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 23 Dec 2021 17:56:08 -0800
Subject: [PATCH 07/52] change seqDef.offset into seqDef.offBase

to better reflect the value stored in this field.
---
 lib/common/zstd_internal.h              |  2 +-
 lib/compress/zstd_compress.c            | 18 +++++++++---------
 lib/compress/zstd_compress_internal.h   |  2 +-
 lib/compress/zstd_compress_sequences.c  | 14 +++++++-------
 lib/compress/zstd_compress_superblock.c |  2 +-
 lib/dictBuilder/zdict.c                 |  4 ++--
 tests/decodecorpus.c                    |  4 ++--
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 0fba5cc1f56..32ef0b43eb4 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -285,7 +285,7 @@ typedef enum {
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;    /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
+    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
     U16 litLength;
     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
 } seqDef;
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 14595669107..0635c343767 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2398,7 +2398,7 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         U32 const llv = sequences[u].litLength;
         U32 const mlv = sequences[u].mlBase;
         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
-        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
     }
     if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
@@ -2910,7 +2910,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
     assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
     ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
     for (i = 0; i < seqStoreSeqSize; ++i) {
-        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
         outSeqs[i].litLength = seqStoreSeqs[i].litLength;
         outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
         outSeqs[i].rep = 0;
@@ -2923,9 +2923,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
             }
         }
 
-        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
             /* Derive the correct offset corresponding to a repcode */
-            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            outSeqs[i].rep = seqStoreSeqs[i].offBase;
             if (outSeqs[i].litLength != 0) {
                 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
             } else {
@@ -2940,7 +2940,7 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
         /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
            so we provide seqStoreSeqs[i].offset - 1 */
         updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
-                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].offBase - 1,
                                          seqStoreSeqs[i].litLength == 0);
         literalsRead += outSeqs[i].litLength;
     }
@@ -3461,8 +3461,8 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
     for (; idx < nbSeq; ++idx) {
         seqDef* const seq = seqStore->sequencesStart + idx;
         U32 const ll0 = (seq->litLength == 0);
-        U32 offCode = seq->offset - 1;
-        assert(seq->offset > 0);
+        U32 offCode = seq->offBase - 1;
+        assert(seq->offBase > 0);
         if (offCode <= ZSTD_REP_MOVE) {
             U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
             U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
@@ -3471,13 +3471,13 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
              * repcode history.
              */
             if (dRawOffset != cRawOffset) {
-                seq->offset = cRawOffset + ZSTD_REP_NUM;
+                seq->offBase = cRawOffset + ZSTD_REP_NUM;
             }
         }
         /* Compression repcode history is always updated with values directly from the unmodified seqStore.
          * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
          */
-        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
+        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0);
         *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
     }
 }
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 87536b38c8d..1bc221b6dab 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -647,7 +647,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
-    seqStorePtr->sequences[0].offset = offCode + 1;
+    seqStorePtr->sequences[0].offBase = offCode + 1;
 
     /* match Length */
     assert(matchLength >= MINMATCH);
diff --git a/lib/compress/zstd_compress_sequences.c b/lib/compress/zstd_compress_sequences.c
index 8d1439045d3..f1e40af2ea0 100644
--- a/lib/compress/zstd_compress_sequences.c
+++ b/lib/compress/zstd_compress_sequences.c
@@ -319,13 +319,13 @@ ZSTD_encodeSequences_body(
         U32 const ofBits = ofCodeTable[nbSeq-1];
         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
         if (extraBits) {
-            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
             BIT_flushBits(&blockStream);
         }
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
                     ofBits - extraBits);
     } else {
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
     }
     BIT_flushBits(&blockStream);
 
@@ -340,7 +340,7 @@ ZSTD_encodeSequences_body(
             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
                         (unsigned)sequences[n].litLength,
                         (unsigned)sequences[n].mlBase + MINMATCH,
-                        (unsigned)sequences[n].offset);
+                        (unsigned)sequences[n].offBase);
                                                                             /* 32b*/  /* 64b*/
                                                                             /* (7)*/  /* (7)*/
             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
@@ -356,13 +356,13 @@ ZSTD_encodeSequences_body(
             if (longOffsets) {
                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
                 if (extraBits) {
-                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
                     BIT_flushBits(&blockStream);                            /* (7)*/
                 }
-                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
                             ofBits - extraBits);                            /* 31 */
             } else {
-                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+                BIT_addBits(&blockStream, sequences[n].offBase, ofBits);     /* 31 */
             }
             BIT_flushBits(&blockStream);                                    /* (7)*/
             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index 67e1abb11e6..d270a1c6a9c 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -539,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             repcodes_t rep;
             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
             for (seq = sstart; seq < sp; ++seq) {
-                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+                rep = ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
             }
             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
         }
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index 8b8b381edd9..006aba7c9c4 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -682,8 +682,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
 
             if (nbSeq >= 2) { /* rep offsets */
                 const seqDef* const seq = seqStorePtr->sequencesStart;
-                U32 offset1 = seq[0].offset - 3;
-                U32 offset2 = seq[1].offset - 3;
+                U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
+                U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
                 if (offset1 >= MAXREPOFFSET) offset1 = 0;
                 if (offset2 >= MAXREPOFFSET) offset2 = 0;
                 repOffsets[offset1] += 3;
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 2343ce6391c..2ba2a7d5ca0 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -951,7 +951,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
         if (MEM_32bits()) BIT_flushBits(&blockStream);
         BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
         if (MEM_32bits()) BIT_flushBits(&blockStream);
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
         BIT_flushBits(&blockStream);
 
         {   size_t n;
@@ -973,7 +973,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
                 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
-                BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
+                BIT_addBits(&blockStream, sequences[n].offBase, ofBits);         /* 31 */
                 BIT_flushBits(&blockStream);                                    /* (7)*/
         }   }
 

From 1aed962216373a6683ff6f26e4ae0ff8fa62f4e4 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 23 Dec 2021 21:58:08 -0800
Subject: [PATCH 08/52] introduce macros STORE_OFFSET() and STORE_REPCODE()

this meant to abstract the sumtype representation required
to transfert `offcode` to `ZSTD_storeSeq()`.

Unfortunately, the sumtype numeric representation is currently a leaky abstraction
that has permeated many other parts of the code,
especially within `zstd_lazy.c` and also within `zstd_opt.c` and `zstd_compress.c`.

While this PR makes a good job a transfering a large nb of call sites
to using the new macros, there are still a few sites where this transformation is more complex,
or where the numeric representation itself it used "as is".

One of the problematics area is the decision to use the numeric format of the sumtype
within the match finders of `zstd_lazy`.

This commit doesn't change the behavior, it only introduces and employes the macros,
but eventually the resulting code remains identical.

At target, if the numeric representation of the sumtype can be completely abstracted
and no other part of the code depends on it,
it will be possible to move it towards something slightly more efficient.
---
 lib/compress/zstd_compress.c          | 124 ++++++++++++++------------
 lib/compress/zstd_compress_internal.h |  27 ++++--
 lib/compress/zstd_double_fast.c       |  20 ++---
 lib/compress/zstd_fast.c              |  20 ++---
 lib/compress/zstd_lazy.c              |  26 +++---
 lib/compress/zstd_ldm.c               |   2 +-
 lib/compress/zstd_opt.c               |  41 +++++----
 tests/decodecorpus.c                  |  35 ++++----
 8 files changed, 158 insertions(+), 137 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 0635c343767..e3199515ae0 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -3449,11 +3449,16 @@ static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32
 
 /**
  * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
- * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
- * the seqStore that may be invalid.
+ * due to emission of RLE/raw blocks that disturb the offset history,
+ * and replaces any repcodes within the seqStore that may be invalid.
  *
- * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
- * accordance with the seqStore.
+ * dRepcodes are updated as would be on the decompression side.
+ * cRepcodes are updated exactly in accordance with the seqStore.
+ *
+ * Note : this function assumes seq->offBase respects the following numbering scheme :
+ *        0 : invalid
+ *        1-3 : repcode 1-3
+ *        4+ : real_offset+3
  */
 static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
                                           seqStore_t* const seqStore, U32 const nbSeq) {
@@ -3461,9 +3466,9 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
     for (; idx < nbSeq; ++idx) {
         seqDef* const seq = seqStore->sequencesStart + idx;
         U32 const ll0 = (seq->litLength == 0);
-        U32 offCode = seq->offBase - 1;
+        U32 const offCode = seq->offBase - 1;
         assert(seq->offBase > 0);
-        if (offCode <= ZSTD_REP_MOVE) {
+        if (offCode < ZSTD_REP_NUM) {
             U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
             U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
             /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
@@ -5738,39 +5743,39 @@ typedef struct {
     size_t posInSrc;        /* Number of bytes given by sequences provided so far */
 } ZSTD_sequencePosition;
 
-/* Returns a ZSTD error code if sequence is not valid */
-static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
-                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
-    size_t offsetBound;
-    U32 windowSize = 1 << windowLog;
+/* ZSTD_validateSequence() :
+ * @offCode : is presumed to follow format required by ZSTD_storeSeq()
+ * @returns a ZSTD error code if sequence is not valid
+ */
+static size_t
+ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                      size_t posInSrc, U32 windowLog, size_t dictSize)
+{
+    U32 const windowSize = 1 << windowLog;
     /* posInSrc represents the amount of data the the decoder would decode up to this point.
      * As long as the amount of data decoded is less than or equal to window size, offsets may be
      * larger than the total length of output decoded in order to reference the dict, even larger than
      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
      */
-    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
-    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
-    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
     return 0;
 }
 
 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
-static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
-    U32 offCode = rawOffset + ZSTD_REP_MOVE;
-    U32 repCode = 0;
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
+{
+    U32 offCode = STORE_OFFSET(rawOffset);
 
     if (!ll0 && rawOffset == rep[0]) {
-        repCode = 1;
+        offCode = STORE_REPCODE_1;
     } else if (rawOffset == rep[1]) {
-        repCode = 2 - ll0;
+        offCode = STORE_REPCODE(2 - ll0);
     } else if (rawOffset == rep[2]) {
-        repCode = 3 - ll0;
+        offCode = STORE_REPCODE(3 - ll0);
     } else if (ll0 && rawOffset == rep[0] - 1) {
-        repCode = 3;
-    }
-    if (repCode) {
-        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
-        offCode = repCode - 1;
+        offCode = STORE_REPCODE_3;
     }
     return offCode;
 }
@@ -5778,18 +5783,17 @@ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32
 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
  * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
  */
-static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                                             const void* src, size_t blockSize) {
+static size_t
+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+                                              ZSTD_sequencePosition* seqPos,
+                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                        const void* src, size_t blockSize)
+{
     U32 idx = seqPos->idx;
     BYTE const* ip = (BYTE const*)(src);
     const BYTE* const iend = ip + blockSize;
     repcodes_t updatedRepcodes;
     U32 dictSize;
-    U32 litLength;
-    U32 matchLength;
-    U32 ll0;
-    U32 offCode;
 
     if (cctx->cdict) {
         dictSize = (U32)cctx->cdict->dictContentSize;
@@ -5800,18 +5804,17 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
     }
     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
     for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
-        litLength = inSeqs[idx].litLength;
-        matchLength = inSeqs[idx].matchLength;
-        ll0 = litLength == 0;
-        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        U32 const litLength = inSeqs[idx].litLength;
+        U32 const ll0 = (litLength == 0);
+        U32 const matchLength = inSeqs[idx].matchLength;
+        U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
         updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
 
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize,
-                                                cctx->appliedParams.cParams.minMatch),
+                                                cctx->appliedParams.cParams.windowLog, dictSize),
                                                 "Sequence validation failed");
         }
         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
@@ -5843,9 +5846,11 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
  * avoid splitting a match, or to avoid splitting a match such that it would produce a match
  * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
  */
-static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                                       const void* src, size_t blockSize) {
+static size_t
+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                   const void* src, size_t blockSize)
+{
     U32 idx = seqPos->idx;
     U32 startPosInSequence = seqPos->posInSequence;
     U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
@@ -5855,10 +5860,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
     repcodes_t updatedRepcodes;
     U32 bytesAdjustment = 0;
     U32 finalMatchSplit = 0;
-    U32 litLength;
-    U32 matchLength;
-    U32 rawOffset;
-    U32 offCode;
 
     if (cctx->cdict) {
         dictSize = cctx->cdict->dictContentSize;
@@ -5872,9 +5873,10 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
     while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
         const ZSTD_Sequence currSeq = inSeqs[idx];
-        litLength = currSeq.litLength;
-        matchLength = currSeq.matchLength;
-        rawOffset = currSeq.offset;
+        U32 litLength = currSeq.litLength;
+        U32 matchLength = currSeq.matchLength;
+        U32 const rawOffset = currSeq.offset;
+        U32 offCode;
 
         /* Modify the sequence depending on where endPosInSequence lies */
         if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
@@ -5927,7 +5929,7 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
             }
         }
         /* Check if this offset can be represented with a repcode */
-        {   U32 ll0 = (litLength == 0);
+        {   U32 const ll0 = (litLength == 0);
             offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
             updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
         }
@@ -5935,8 +5937,7 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize,
-                                                   cctx->appliedParams.cParams.minMatch),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
@@ -5967,7 +5968,8 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
                                        const void* src, size_t blockSize);
-static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
+{
     ZSTD_sequenceCopier sequenceCopier = NULL;
     assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
     if (mode == ZSTD_sf_explicitBlockDelimiters) {
@@ -5981,12 +5983,15 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
 
 /* Compress, block-by-block, all of the sequences given.
  *
- * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ * Returns the cumulative size of all compressed blocks (including their headers),
+ * otherwise a ZSTD error.
  */
-static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-                                              void* dst, size_t dstCapacity,
-                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                                              const void* src, size_t srcSize) {
+static size_t
+ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                          const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                          const void* src, size_t srcSize)
+{
     size_t cSize = 0;
     U32 lastBlock;
     size_t blockSize;
@@ -5996,7 +6001,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
 
     BYTE const* ip = (BYTE const*)src;
     BYTE* op = (BYTE*)dst;
-    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+    ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
 
     DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
     /* Special case: empty frame */
@@ -6096,7 +6101,8 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
 
 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                              const void* src, size_t srcSize) {
+                              const void* src, size_t srcSize)
+{
     BYTE* op = (BYTE*)dst;
     size_t cSize = 0;
     size_t compressedBlocksSize = 0;
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 1bc221b6dab..270354e44fa 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -501,15 +501,20 @@ typedef struct repcodes_s {
     U32 rep[3];
 } repcodes_t;
 
-MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+
+/* ZSTD_updateRep() :
+ * @offcode : expects a scale where 0,1,2 represent repcodes 1-3, and 2+ represents real_offset+2
+ */
+MEM_STATIC repcodes_t
+ZSTD_updateRep(U32 const rep[3], U32 const offcode, U32 const ll0)
 {
     repcodes_t newReps;
-    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+    if (offcode >= ZSTD_REP_NUM) {  /* full offset */
         newReps.rep[2] = rep[1];
         newReps.rep[1] = rep[0];
-        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+        newReps.rep[0] = offcode - ZSTD_REP_MOVE;
     } else {   /* repcode */
-        U32 const repCode = offset + ll0;
+        U32 const repCode = offcode + ll0;
         if (repCode > 0) {  /* note : if repCode==0, no change */
             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@@ -600,14 +605,22 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
     while (ip < iend) *op++ = *ip++;
 }
 
+#define STORE_REPCODE_1 STORE_REPCODE(1)
+#define STORE_REPCODE_2 STORE_REPCODE(2)
+#define STORE_REPCODE_3 STORE_REPCODE(3)
+#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
+#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
+
 /*! ZSTD_storeSeq() :
  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
- *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  @offBase_minus1 : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *                    Users should not specify the encoded value directly,
+ *                    instead use macros STORE_REPCODE_X and STORE_OFFSET().
  *  @matchLength : must be >= MINMATCH
  *  Allowed to overread literals up to litLimit.
 */
 HINT_INLINE UNUSED_ATTR
-void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t matchLength)
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offBase_minus1, size_t matchLength)
 {
     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
     BYTE const* const litEnd = literals + litLength;
@@ -647,7 +660,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
-    seqStorePtr->sequences[0].offBase = offCode + 1;
+    seqStorePtr->sequences[0].offBase = offBase_minus1 + 1;
 
     /* match Length */
     assert(matchLength >= MINMATCH);
diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
index c9f7b9d746d..76933dea262 100644
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@@ -131,7 +131,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
             if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
                 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
                 ip++;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
                 goto _match_stored;
             }
 
@@ -217,7 +217,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
             hashLong[hl1] = (U32)(ip1 - base);
         }
 
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
 _match_stored:
         /* match found */
@@ -243,7 +243,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
                 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
                 ip += rLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -328,7 +328,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
             goto _match_stored;
         }
 
@@ -419,7 +419,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         offset_2 = offset_1;
         offset_1 = offset;
 
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
 _match_stored:
         /* match found */
@@ -448,7 +448,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                     ip += repLength2;
@@ -585,7 +585,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         } else {
             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -596,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -621,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 }
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
             } else {
                 ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -653,7 +653,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                     ip += repLength2;
diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index 29c00866ef5..802fc315798 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -180,7 +180,7 @@ ZSTD_compressBlock_fast_noDict_generic(
             mLength = ip0[-1] == match0[-1];
             ip0 -= mLength;
             match0 -= mLength;
-            offcode = 0;
+            offcode = STORE_REPCODE_1;
             mLength += 4;
             goto _match;
         }
@@ -267,7 +267,7 @@ ZSTD_compressBlock_fast_noDict_generic(
     match0 = base + idx;
     rep_offset2 = rep_offset1;
     rep_offset1 = (U32)(ip0-match0);
-    offcode = rep_offset1 + ZSTD_REP_MOVE;
+    offcode = STORE_OFFSET(rep_offset1);
     mLength = 4;
 
     /* Count the backwards match length. */
@@ -306,7 +306,7 @@ ZSTD_compressBlock_fast_noDict_generic(
                 { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
                 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
                 ip0 += rLength;
-                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength);
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
                 anchor = ip0;
                 continue;   /* faster when present (confirmed on gcc-8) ... (?) */
     }   }   }
@@ -439,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         } else if ( (matchIndex <= prefixStartIndex) ) {
             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
             U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -459,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 } /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
             }
         } else if (MEM_read32(match) != MEM_read32(ip)) {
             /* it's not a match, and we're not going to check the dictionary */
@@ -474,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         }
 
         /* match found */
@@ -499,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
@@ -598,7 +598,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
             ip += rLength;
             anchor = ip;
         } else {
@@ -614,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
                 ip += mLength;
                 anchor = ip;
         }   }
@@ -633,7 +633,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2);
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 83831c67345..cf450a38efa 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -197,8 +197,8 @@ ZSTD_DUBT_findBetterDictMatch (
             U32 matchIndex = dictMatchIndex + dictIndexDelta;
             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
-                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
-                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
             }
             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -328,7 +328,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
                 if (matchLength > matchEndIdx - matchIndex)
                     matchEndIdx = matchIndex + (U32)matchLength;
                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
-                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                    bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
                     if (dictMode == ZSTD_dictMatchState) {
                         nbCompares = 0; /* in addition to avoiding checking any
@@ -561,7 +561,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
         /* save best solution */
         if (currentMl > ml) {
             ml = currentMl;
-            *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+            *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
             if (ip+currentMl == iLimit) {
                 /* best possible, avoids read overflow on next attempt */
                 return ml;
@@ -598,7 +598,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
             /* save best solution */
             if (currentMl > ml) {
                 ml = currentMl;
-                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
             }
         }
@@ -703,7 +703,7 @@ size_t ZSTD_HcFindBestMatch(
         /* save best solution */
         if (currentMl > ml) {
             ml = currentMl;
-            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            *offsetPtr = STORE_OFFSET(curr - matchIndex);
             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         }
 
@@ -738,7 +738,8 @@ size_t ZSTD_HcFindBestMatch(
             /* save best solution */
             if (currentMl > ml) {
                 ml = currentMl;
-                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                assert(curr > matchIndex + dmsIndexDelta);
+                *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
             }
 
@@ -1244,7 +1245,7 @@ size_t ZSTD_RowFindBestMatch(
             /* Save best solution */
             if (currentMl > ml) {
                 ml = currentMl;
-                *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+                *offsetPtr = STORE_OFFSET(curr - matchIndex);
                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
             }
         }
@@ -1292,7 +1293,8 @@ size_t ZSTD_RowFindBestMatch(
 
                 if (currentMl > ml) {
                     ml = currentMl;
-                    *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                    assert(curr > matchIndex + dmsIndexDelta);
+                    *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
                     if (ip+currentMl == iLimit) break;
                 }
             }
@@ -1685,7 +1687,7 @@ ZSTD_compressBlock_lazy_generic(
                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                     ip += matchLength;
                     anchor = ip;
                     continue;
@@ -1700,7 +1702,7 @@ ZSTD_compressBlock_lazy_generic(
                 /* store sequence */
                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -2028,7 +2030,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 1e7e626d2f7..68762a2874f 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -709,7 +709,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             rep[0] = sequence.offset;
             /* Store the sequence */
             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
-                          sequence.offset + ZSTD_REP_MOVE,
+                          STORE_OFFSET(sequence.offset),
                           sequence.matchLength);
             ip += sequence.matchLength;
         }
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 2356fa2ae11..34725bcd8b9 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -280,15 +280,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
 /* ZSTD_getMatchPrice() :
  * Provides the cost of the match part (offset + matchLength) of a sequence
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+ * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
+ */
 FORCE_INLINE_TEMPLATE U32
-ZSTD_getMatchPrice(U32 const offset,
+ZSTD_getMatchPrice(U32 const offcode,
                    U32 const matchLength,
              const optState_t* const optPtr,
                    int const optLevel)
 {
     U32 price;
-    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const offCode = ZSTD_highbit32(offcode+1);
     U32 const mlBase = matchLength - MINMATCH;
     assert(matchLength >= MINMATCH);
 
@@ -631,7 +633,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
                             repCode, ll0, repOffset, repLen);
                 bestLength = repLen;
-                matches[mnum].off = repCode - ll0;
+                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
                 matches[mnum].len = (U32)repLen;
                 mnum++;
                 if ( (repLen > sufficient_len)
@@ -660,7 +662,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 bestLength = mlen;
                 assert(curr > matchIndex3);
                 assert(mnum==0);  /* no prior solution */
-                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].off = STORE_OFFSET(curr - matchIndex3);
                 matches[0].len = (U32)mlen;
                 mnum = 1;
                 if ( (mlen > sufficient_len) |
@@ -694,12 +696,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
         if (matchLength > bestLength) {
             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
-                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
             assert(matchEndIdx > matchIndex);
             if (matchLength > matchEndIdx - matchIndex)
                 matchEndIdx = matchIndex + (U32)matchLength;
             bestLength = matchLength;
-            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
             matches[mnum].len = (U32)matchLength;
             mnum++;
             if ( (matchLength > ZSTD_OPT_NUM)
@@ -742,11 +744,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
             if (matchLength > bestLength) {
                 matchIndex = dictMatchIndex + dmsIndexDelta;
                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
-                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
                 if (matchLength > matchEndIdx - matchIndex)
                     matchEndIdx = matchIndex + (U32)matchLength;
                 bestLength = matchLength;
-                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
                 matches[mnum].len = (U32)matchLength;
                 mnum++;
                 if ( (matchLength > ZSTD_OPT_NUM)
@@ -938,11 +940,12 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
  * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
  */
 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
-    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+{
+    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
-    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
-    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
 
     /* Ensure that current block position is not outside of the match */
     if (currPosInBlock < optLdm->startPosInBlock
@@ -1075,14 +1078,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
-                U32 const maxOffset = matches[nbMatches-1].off;
+                U32 const maxOffcode = matches[nbMatches-1].off;
                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
-                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
 
                 if (maxML > sufficient_len) {
                     lastSequence.litlen = litlen;
                     lastSequence.mlen = maxML;
-                    lastSequence.off = maxOffset;
+                    lastSequence.off = maxOffcode;
                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
                                 maxML, sufficient_len);
                     cur = 0;
@@ -1099,15 +1102,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
-                    U32 const offset = matches[matchNb].off;
+                    U32 const offcode = matches[matchNb].off;
                     U32 const end = matches[matchNb].len;
                     for ( ; pos <= end ; pos++ ) {
-                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
                         U32 const sequencePrice = literalsPrice + matchPrice;
                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
                                     pos, ZSTD_fCost(sequencePrice));
                         opt[pos].mlen = pos;
-                        opt[pos].off = offset;
+                        opt[pos].off = offcode;
                         opt[pos].litlen = litlen;
                         opt[pos].price = (int)sequencePrice;
                 }   }
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 2ba2a7d5ca0..ab59110985f 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -633,17 +633,16 @@ static inline void initSeqStore(seqStore_t *seqStore) {
 }
 
 /* Randomly generate sequence commands */
-static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
-                                size_t contentSize, size_t literalsSize, dictInfo info)
+static U32
+generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
+                  size_t contentSize, size_t literalsSize, dictInfo info)
 {
     /* The total length of all the matches */
     size_t const remainingMatch = contentSize - literalsSize;
     size_t excessMatch = 0;
     U32 numSequences = 0;
-
     U32 i;
 
-
     const BYTE* literals = LITERAL_BUFFER;
     BYTE* srcPtr = frame->src;
 
@@ -703,32 +702,31 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
                             lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
                             offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
                         }
-                        {
-                            U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
+                        {   U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
                             matchLen = MIN(matchLen, matchLenBound);
                         }
                     }
                 }
-                offsetCode = offset + ZSTD_REP_MOVE;
+                offsetCode = STORE_OFFSET(offset);
                 repIndex = 2;
             } else {
                 /* do a repeat offset */
-                offsetCode = RAND(seed) % 3;
+                U32 const randomRepIndex = RAND(seed) % 3;
+                offsetCode = STORE_REPCODE(randomRepIndex + 1);  /* expects values between 1 & 3 */
                 if (literalLen > 0) {
-                    offset = frame->stats.rep[offsetCode];
-                    repIndex = offsetCode;
+                    offset = frame->stats.rep[randomRepIndex];
+                    repIndex = randomRepIndex;
                 } else {
-                    /* special case */
-                    offset = offsetCode == 2 ? frame->stats.rep[0] - 1
-                                           : frame->stats.rep[offsetCode + 1];
-                    repIndex = MIN(2, offsetCode + 1);
+                    /* special case : literalLen == 0 */
+                    offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1
+                                           : frame->stats.rep[randomRepIndex + 1];
+                    repIndex = MIN(2, randomRepIndex + 1);
                 }
             }
         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
 
-        {
+        {   BYTE* const dictEnd = info.dictContent + info.dictContentSize;
             size_t j;
-            BYTE* const dictEnd = info.dictContent + info.dictContentSize;
             for (j = 0; j < matchLen; j++) {
                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
                     /* copy from dictionary instead of literals */
@@ -739,8 +737,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
                     *srcPtr = *(srcPtr-offset);
                 }
                 srcPtr++;
-            }
-        }
+        }   }
 
         {   int r;
             for (r = repIndex; r > 0; r--) {
@@ -754,7 +751,7 @@ static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
                      (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
         DISPLAYLEVEL(6, "\n");
-        if (offsetCode < 3) {
+        if (offsetCode < ZSTD_REP_NUM) {  /* expects @offsetCode to use 0-2 to represents repCodes */
             DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
         }
         /* use libzstd sequence handling */

From 666372c7bf345ef8a238ca1ffbe985ba4a9aedf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Marciniak?= <sunwire+git@gmail.com>
Date: Fri, 24 Dec 2021 15:05:26 +0100
Subject: [PATCH 09/52] Fix tar test cases

---
 tests/playTests.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index a772b61ff7c..d5ee3155ef1 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1359,21 +1359,21 @@ zstd -d tmp.tzst
 rm -f tmp.tar tmp.tzst
 
 if [ $GZIPMODE -eq 1 ]; then
-    tar -c tmp | gzip > tmp.tgz
+    tar -f - -c tmp | gzip > tmp.tgz
     zstd -d tmp.tgz
     [ -e tmp.tar ] || die ".tgz failed to decompress to .tar!"
     rm -f tmp.tar tmp.tgz
 fi
 
 if [ $LZMAMODE -eq 1 ]; then
-    tar -c tmp | zstd --format=xz > tmp.txz
+    tar -f - -c tmp | zstd --format=xz > tmp.txz
     zstd -d tmp.txz
     [ -e tmp.tar ] || die ".txz failed to decompress to .tar!"
     rm -f tmp.tar tmp.txz
 fi
 
 if [ $LZ4MODE -eq 1 ]; then
-    tar -c tmp | zstd --format=lz4 > tmp.tlz4
+    tar -f - -c tmp | zstd --format=lz4 > tmp.tlz4
     zstd -d tmp.tlz4
     [ -e tmp.tar ] || die ".tlz4 failed to decompress to .tar!"
     rm -f tmp.tar tmp.tlz4

From 148ff1577452e1ceb722ff4394007656112d8a41 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Mon, 27 Dec 2021 02:21:21 -0500
Subject: [PATCH 10/52] Fixup MSVC source file inclusion for cmake builds

---
 build/cmake/lib/CMakeLists.txt | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index ec8480d5dbf..612f667f9c0 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -22,7 +22,11 @@ include_directories(${LIBRARY_DIR} ${LIBRARY_DIR}/common)
 
 file(GLOB CommonSources ${LIBRARY_DIR}/common/*.c)
 file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
-file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S)
+if (MSVC)
+    file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
+else ()
+    file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S)
+endif ()
 file(GLOB DictBuilderSources ${LIBRARY_DIR}/dictBuilder/*.c)
 
 set(Sources

From 2068889146a8c41947bd57b41c639b9f5ab1b73c Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 06:59:07 -0800
Subject: [PATCH 11/52] created STORED_*() macros

to act on values stored / expressed in the sumtype numeric representation required by `storedSeq()`.

This makes it possible to abstract away this representation by using the macros to extract these values.

First user : ZSTD_updateRep() .
---
 lib/compress/zstd_compress_internal.h | 72 +++++++++++++++------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 270354e44fa..dbf2c2c26bc 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -497,36 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }
 
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
-
-/* ZSTD_updateRep() :
- * @offcode : expects a scale where 0,1,2 represent repcodes 1-3, and 2+ represents real_offset+2
- */
-MEM_STATIC repcodes_t
-ZSTD_updateRep(U32 const rep[3], U32 const offcode, U32 const ll0)
-{
-    repcodes_t newReps;
-    if (offcode >= ZSTD_REP_NUM) {  /* full offset */
-        newReps.rep[2] = rep[1];
-        newReps.rep[1] = rep[0];
-        newReps.rep[0] = offcode - ZSTD_REP_MOVE;
-    } else {   /* repcode */
-        U32 const repCode = offcode + ll0;
-        if (repCode > 0) {  /* note : if repCode==0, no change */
-            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            newReps.rep[1] = rep[0];
-            newReps.rep[0] = currentOffset;
-        } else {   /* repCode == 0 */
-            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
-        }
-    }
-    return newReps;
-}
-
 /* ZSTD_cParam_withinBounds:
  * @return 1 if value is within cParam bounds,
  * 0 otherwise */
@@ -609,7 +579,11 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
 #define STORE_REPCODE_2 STORE_REPCODE(2)
 #define STORE_REPCODE_3 STORE_REPCODE(3)
 #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
-#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
+#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
+#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
+#define STORED_IS_REPCODE(o) ((o) < ZSTD_REP_NUM)
+#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
+#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
 
 /*! ZSTD_storeSeq() :
  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
@@ -619,8 +593,11 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
  *  @matchLength : must be >= MINMATCH
  *  Allowed to overread literals up to litLimit.
 */
-HINT_INLINE UNUSED_ATTR
-void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offBase_minus1, size_t matchLength)
+HINT_INLINE UNUSED_ATTR void
+ZSTD_storeSeq(seqStore_t* seqStorePtr,
+              size_t litLength, const BYTE* literals, const BYTE* litLimit,
+              U32 offBase_minus1,
+              size_t matchLength)
 {
     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
     BYTE const* const litEnd = literals + litLength;
@@ -676,6 +653,35 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     seqStorePtr->sequences++;
 }
 
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+/* ZSTD_updateRep() :
+ * @offcode : sum-type, with same numeric representation as ZSTD_storeSeq()
+ */
+MEM_STATIC repcodes_t
+ZSTD_updateRep(U32 const rep[3], U32 const offBase_minus1, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = STORED_OFFSET(offBase_minus1);
+    } else {   /* repcode */
+        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
 
 /*-*************************************
 *  Match length counter

From 7ee35bad6b9e20c8f01e96c5e106cab7bd4d45ec Mon Sep 17 00:00:00 2001
From: Frank Wessels <fwessels@xs4all.nl>
Date: Tue, 28 Dec 2021 09:04:28 -0800
Subject: [PATCH 12/52] Fix mini typo

---
 examples/streaming_compression_thread_pool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/streaming_compression_thread_pool.c b/examples/streaming_compression_thread_pool.c
index 471ca863136..21cb3d54999 100644
--- a/examples/streaming_compression_thread_pool.c
+++ b/examples/streaming_compression_thread_pool.c
@@ -60,7 +60,7 @@ static void *compressFile_orDie(void *data)
     CHECK_ZSTD( ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1) );
     ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbThreads);
 
-    /* This loop read from the input file, compresses that entire chunk,
+    /* This loop reads from the input file, compresses that entire chunk,
      * and writes all output produced to the output file.
      */
     size_t const toRead = buffInSize;

From 435f5a2e6d7aa8f0ad581c2da688f8a7f1e3e8cd Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 09:55:31 -0800
Subject: [PATCH 13/52] fixed regression test assert

optLdm->offset might be == 0 in invalid case.
Only use STORE_OFFSET() after validating it's a correct case.
---
 lib/compress/zstd_compress_internal.h |  4 ++-
 lib/compress/zstd_ldm.c               |  4 ++-
 lib/compress/zstd_opt.c               | 43 ++++++++++++++++-----------
 tests/fuzz/.gitignore                 |  1 +
 4 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index dbf2c2c26bc..f0c4215ead4 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -565,7 +565,9 @@ MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxPa
  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
  *  large copies.
  */
-static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+static void
+ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
+{
     assert(iend > ilimit_w);
     if (ip <= ilimit_w) {
         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
index 68762a2874f..f662b2546e0 100644
--- a/lib/compress/zstd_ldm.c
+++ b/lib/compress/zstd_ldm.c
@@ -579,7 +579,9 @@ size_t ZSTD_ldm_generateSequences(
     return 0;
 }
 
-void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+void
+ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
+{
     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
         if (srcSize <= seq->litLength) {
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 34725bcd8b9..34972486bac 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -837,7 +837,8 @@ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
     }
 
-static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
+static ZSTD_getAllMatchesFn
+ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
 {
     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
@@ -856,16 +857,18 @@ static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const*
 
 /* Struct containing info needed to make decision about ldm inclusion */
 typedef struct {
-    rawSeqStore_t seqStore;         /* External match candidates store for this block */
-    U32 startPosInBlock;            /* Start position of the current match candidate */
-    U32 endPosInBlock;              /* End position of the current match candidate */
-    U32 offset;                     /* Offset of the match candidate */
+    rawSeqStore_t seqStore;   /* External match candidates store for this block */
+    U32 startPosInBlock;      /* Start position of the current match candidate */
+    U32 endPosInBlock;        /* End position of the current match candidate */
+    U32 offset;               /* Offset of the match candidate */
 } ZSTD_optLdm_t;
 
 /* ZSTD_optLdm_skipRawSeqStoreBytes():
- * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ * Moves forward in @rawSeqStore by @nbBytes,
+ * which will update the fields 'pos' and 'posInSequence'.
  */
-static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
+{
     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@@ -886,8 +889,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
  * Calculates the beginning and end of the next match in the current block.
  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
  */
-static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
-                                                   U32 blockBytesRemaining) {
+static void
+ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                       U32 blockBytesRemaining)
+{
     rawSeq currSeq;
     U32 currBlockEndPos;
     U32 literalsBytesRemaining;
@@ -899,8 +904,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
         optLdm->endPosInBlock = UINT_MAX;
         return;
     }
-    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
-       based on ldmSeqStore->posInSequence */
+    /* Calculate appropriate bytes left in matchLength and litLength
+     * after adjusting based on ldmSeqStore->posInSequence */
     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
     currBlockEndPos = currPosInBlock + blockBytesRemaining;
@@ -936,11 +941,12 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
 }
 
 /* ZSTD_optLdm_maybeAddMatch():
- * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
- * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ * Adds a match if it's long enough,
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
+ * into 'matches'. Maintains the correct ordering of 'matches'.
  */
 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
 {
     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
@@ -966,8 +972,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
 /* ZSTD_optLdm_processMatchCandidate():
  * Wrapper function to update ldm seq store and call ldm functions as necessary.
  */
-static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
-                                              U32 currPosInBlock, U32 remainingBytes) {
+static void
+ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
+                                  ZSTD_match_t* matches, U32* nbMatches,
+                                  U32 currPosInBlock, U32 remainingBytes)
+{
     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
         return;
     }
@@ -978,7 +987,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
              * at the end of a match from the ldm seq store, and will often be some bytes
              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
              */
-            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
         }
         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index 02c2f10beb1..28f2b8da4db 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -17,6 +17,7 @@ decompress_dstSize_tooSmall
 fse_read_ncount
 sequence_compression_api
 seekable_roundtrip
+huf_decompress
 huf_round_trip
 fuzz-*.log
 rt_lib_*

From b7630a474b5e330e07dc743e2a2d7cb26f457a7a Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 10:59:47 -0800
Subject: [PATCH 14/52] abstracted usage of offBase sumtype within zstd_lazy.c

---
 lib/compress/zstd_lazy.c | 16 ++++++++--------
 lib/compress/zstd_opt.c  |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index cf450a38efa..eae9a994bcb 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -218,7 +218,7 @@ ZSTD_DUBT_findBetterDictMatch (
     }
 
     if (bestLength >= MINMATCH) {
-        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        U32 const mIndex = curr - STORED_OFFSET(*offsetPtr); (void)mIndex;
         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                     curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
     }
@@ -368,7 +368,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
         assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
         if (bestLength >= MINMATCH) {
-            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            U32 const mIndex = curr - STORED_OFFSET(*offsetPtr); (void)mIndex;
             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                         curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
         }
@@ -1655,17 +1655,17 @@ ZSTD_compressBlock_lazy_generic(
         /* catch up */
         if (offset) {
             if (dictMode == ZSTD_noDict) {
-                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
-                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                while ( ((start > anchor) & (start - STORED_OFFSET(offset) > prefixLowest))
+                     && (start[-1] == (start-STORED_OFFSET(offset))[-1]) )  /* only search for offset within prefix */
                     { start--; matchLength++; }
             }
             if (isDxS) {
-                U32 const matchIndex = (U32)((size_t)(start-base) - (offset - ZSTD_REP_MOVE));
+                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offset));
                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
             }
-            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offset);
         }
         /* store sequence */
 _storeSequence:
@@ -2002,11 +2002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
 
         /* catch up */
         if (offset) {
-            U32 const matchIndex = (U32)((size_t)(start-base) - (offset - ZSTD_REP_MOVE));
+            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offset));
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offset);
         }
 
         /* store sequence */
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 34972486bac..3d821b290a4 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -951,7 +951,6 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
-    U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
 
     /* Ensure that current block position is not outside of the match */
     if (currPosInBlock < optLdm->startPosInBlock
@@ -961,6 +960,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
     }
 
     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
                  candidateOffCode, candidateMatchLength, currPosInBlock);
         matches[*nbMatches].len = candidateMatchLength;

From 321583ccf508e300d68f6ea3e6fcf9adb13d2a47 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 11:38:21 -0800
Subject: [PATCH 15/52] fixed minor typecast warnings

---
 lib/compress/zstd_lazy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index eae9a994bcb..6d2bc77130f 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -218,7 +218,7 @@ ZSTD_DUBT_findBetterDictMatch (
     }
 
     if (bestLength >= MINMATCH) {
-        U32 const mIndex = curr - STORED_OFFSET(*offsetPtr); (void)mIndex;
+        U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                     curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
     }
@@ -368,7 +368,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
         assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
         if (bestLength >= MINMATCH) {
-            U32 const mIndex = curr - STORED_OFFSET(*offsetPtr); (void)mIndex;
+            U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                         curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
         }

From 6fa640ef70d01489e2a4a6228f4e439b712f7d68 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 11:46:15 -0800
Subject: [PATCH 16/52] separate newRep() from updateRep()

the new contracts seems to make more sense :
updateRep() updates an array of repeat offsets _in place_,
while newRep() generates a new structure with the updated repeat-offset array.

Most callers are actually expecting the in-place variant,
and a limited sub-section, in `zstd_opt.c` mainly, prefer `newRep()`.
---
 lib/compress/zstd_compress.c            | 14 ++++-----
 lib/compress/zstd_compress_internal.h   | 38 +++++++++++++++----------
 lib/compress/zstd_compress_superblock.c |  2 +-
 lib/compress/zstd_opt.c                 |  4 +--
 4 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index e3199515ae0..0b1543f722b 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -2939,9 +2939,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
         outSeqs[i].offset = rawOffset;
         /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
            so we provide seqStoreSeqs[i].offset - 1 */
-        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
-                                         seqStoreSeqs[i].offBase - 1,
-                                         seqStoreSeqs[i].litLength == 0);
+        ZSTD_updateRep(updatedRepcodes.rep,
+                       seqStoreSeqs[i].offBase - 1,
+                       seqStoreSeqs[i].litLength == 0);
         literalsRead += outSeqs[i].litLength;
     }
     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
@@ -3482,8 +3482,8 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
         /* Compression repcode history is always updated with values directly from the unmodified seqStore.
          * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
          */
-        *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0);
-        *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
+        ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0);
+        ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
     }
 }
 
@@ -5808,7 +5808,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
         U32 const ll0 = (litLength == 0);
         U32 const matchLength = inSeqs[idx].matchLength;
         U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
-        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
 
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         if (cctx->appliedParams.validateSequences) {
@@ -5931,7 +5931,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
         /* Check if this offset can be represented with a repcode */
         {   U32 const ll0 = (litLength == 0);
             offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
-            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+            ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
         }
 
         if (cctx->appliedParams.validateSequences) {
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index f0c4215ead4..f36b4665c8b 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -655,32 +655,40 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
     seqStorePtr->sequences++;
 }
 
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
 /* ZSTD_updateRep() :
- * @offcode : sum-type, with same numeric representation as ZSTD_storeSeq()
+ * updates in-place @rep (array of repeat offsets)
+ * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
  */
-MEM_STATIC repcodes_t
-ZSTD_updateRep(U32 const rep[3], U32 const offBase_minus1, U32 const ll0)
+MEM_STATIC void
+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
 {
-    repcodes_t newReps;
     if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
-        newReps.rep[2] = rep[1];
-        newReps.rep[1] = rep[0];
-        newReps.rep[0] = STORED_OFFSET(offBase_minus1);
+        rep[2] = rep[1];
+        rep[1] = rep[0];
+        rep[0] = STORED_OFFSET(offBase_minus1);
     } else {   /* repcode */
         U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
         if (repCode > 0) {  /* note : if repCode==0, no change */
             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            newReps.rep[1] = rep[0];
-            newReps.rep[0] = currentOffset;
+            rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            rep[1] = rep[0];
+            rep[0] = currentOffset;
         } else {   /* repCode == 0 */
-            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+            /* nothing to do */
         }
     }
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t
+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
+{
+    repcodes_t newReps;
+    memcpy(&newReps, rep, sizeof(newReps));
+    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
     return newReps;
 }
 
diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c
index d270a1c6a9c..10e33785778 100644
--- a/lib/compress/zstd_compress_superblock.c
+++ b/lib/compress/zstd_compress_superblock.c
@@ -539,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             repcodes_t rep;
             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
             for (seq = sstart; seq < sp; ++seq) {
-                rep = ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+                ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
             }
             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
         }
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 3d821b290a4..52c2d3d158f 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -1164,7 +1164,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             assert(cur >= opt[cur].mlen);
             if (opt[cur].mlen != 0) {
                 U32 const prev = cur - opt[cur].mlen;
-                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
             } else {
                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
@@ -1254,7 +1254,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
          * update them while traversing the sequences.
          */
         if (lastSequence.mlen != 0) {
-            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
             ZSTD_memcpy(rep, &reps, sizeof(reps));
         } else {
             ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));

From 681c81f06c313eed276283c141d0c14e9404b4fa Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 11:58:33 -0800
Subject: [PATCH 17/52] abstracted storeSeq() sumtype numeric representation
 from decodecorpus.c

---
 tests/decodecorpus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index ab59110985f..1037a36596c 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -751,7 +751,7 @@ generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
                      (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
         DISPLAYLEVEL(6, "\n");
-        if (offsetCode < ZSTD_REP_NUM) {  /* expects @offsetCode to use 0-2 to represents repCodes */
+        if (STORED_IS_REPCODE(offsetCode)) {  /* expects sumtype numeric representation of ZSTD_storeSeq() */
             DISPLAYLEVEL(7, "        repeat offset: %d\n", (int)repIndex);
         }
         /* use libzstd sequence handling */

From e909fa627fc9005119457bc25e59cd1a03ae76ba Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 12:13:58 -0800
Subject: [PATCH 18/52] abstracted storeSeq() sumtype numeric representation
 from zstd_opt.c

---
 lib/compress/zstd_compress_internal.h |  3 ++-
 lib/compress/zstd_opt.c               | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index f36b4665c8b..9ad0db6dada 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -586,6 +586,7 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
 #define STORED_IS_REPCODE(o) ((o) < ZSTD_REP_NUM)
 #define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
+#define STORED_TO_OFFBASE(o) ((o)+1)
 
 /*! ZSTD_storeSeq() :
  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
@@ -639,7 +640,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
-    seqStorePtr->sequences[0].offBase = offBase_minus1 + 1;
+    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
 
     /* match Length */
     assert(matchLength >= MINMATCH);
diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 52c2d3d158f..2fa10816f9f 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -204,7 +204,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
                     1, 1, 1, 1, 1, 1, 1, 1,
                     1, 1, 1, 1
                 };
-                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
+                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
+                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
             }
 
             {   unsigned ml;
@@ -219,7 +220,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
                     1, 1, 1, 1, 1, 1, 1, 1,
                     1, 1, 1, 1, 1, 1, 1, 1
                 };
-                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
+                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
+                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
             }
 
 
@@ -290,7 +292,7 @@ ZSTD_getMatchPrice(U32 const offcode,
                    int const optLevel)
 {
     U32 price;
-    U32 const offCode = ZSTD_highbit32(offcode+1);
+    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
     U32 const mlBase = matchLength - MINMATCH;
     assert(matchLength >= MINMATCH);
 
@@ -333,8 +335,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
         optPtr->litLengthSum++;
     }
 
-    /* match offset code (0-2=>repCode; 3+=>offset+2) */
-    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+    /* offset code : expected to follow storeSeq() numeric representation */
+    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
         assert(offCode <= MaxOff);
         optPtr->offCodeFreq[offCode]++;
         optPtr->offCodeSum++;

From 92a08eec72c9bd2b28620aab3b47af5a2ae7f0c5 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 12:23:39 -0800
Subject: [PATCH 19/52] abstracted storeSeq() sumtype numeric representation
 from zstd_lazy.c

---
 lib/compress/zstd_lazy.c | 88 ++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 6d2bc77130f..a446482aed3 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1539,7 +1539,7 @@ ZSTD_compressBlock_lazy_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offset=0;
+        size_t offcode=0;
         const BYTE* start=ip+1;
 
         /* check repCode */
@@ -1566,7 +1566,7 @@ ZSTD_compressBlock_lazy_generic(
         {   size_t offsetFound = 999999999;
             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
             if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offset=offsetFound;
+                matchLength = ml2, start = ip, offcode=offsetFound;
         }
 
         if (matchLength < 4) {
@@ -1579,12 +1579,12 @@ ZSTD_compressBlock_lazy_generic(
         while (ip<ilimit) {
             ip ++;
             if ( (dictMode == ZSTD_noDict)
-              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+              && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                 int const gain2 = (int)(mlRep * 3);
-                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                 if ((mlRep >= 4) && (gain2 > gain1))
-                    matchLength = mlRep, offset = 0, start = ip;
+                    matchLength = mlRep, offcode = 0, start = ip;
             }
             if (isDxS) {
                 const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1596,17 +1596,17 @@ ZSTD_compressBlock_lazy_generic(
                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
                     int const gain2 = (int)(mlRep * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offset = 0, start = ip;
+                        matchLength = mlRep, offcode = 0, start = ip;
                 }
             }
             {   size_t offset2=999999999;
                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offset = offset2, start = ip;
+                    matchLength = ml2, offcode = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
 
@@ -1614,12 +1614,12 @@ ZSTD_compressBlock_lazy_generic(
             if ((depth==2) && (ip<ilimit)) {
                 ip ++;
                 if ( (dictMode == ZSTD_noDict)
-                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                  && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                     int const gain2 = (int)(mlRep * 4);
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offset = 0, start = ip;
+                        matchLength = mlRep, offcode = 0, start = ip;
                 }
                 if (isDxS) {
                     const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1631,17 +1631,17 @@ ZSTD_compressBlock_lazy_generic(
                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
                         int const gain2 = (int)(mlRep * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((mlRep >= 4) && (gain2 > gain1))
-                            matchLength = mlRep, offset = 0, start = ip;
+                            matchLength = mlRep, offcode = 0, start = ip;
                     }
                 }
                 {   size_t offset2=999999999;
                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
                     if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offset = offset2, start = ip;
+                        matchLength = ml2, offcode = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
@@ -1653,24 +1653,24 @@ ZSTD_compressBlock_lazy_generic(
          * overflows the pointer, which is undefined behavior.
          */
         /* catch up */
-        if (offset) {
+        if (offcode) {
             if (dictMode == ZSTD_noDict) {
-                while ( ((start > anchor) & (start - STORED_OFFSET(offset) > prefixLowest))
-                     && (start[-1] == (start-STORED_OFFSET(offset))[-1]) )  /* only search for offset within prefix */
+                while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
+                     && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
                     { start--; matchLength++; }
             }
             if (isDxS) {
-                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offset));
+                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
             }
-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offset);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
         }
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -1686,7 +1686,7 @@ ZSTD_compressBlock_lazy_generic(
                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
-                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset_2 <=> offset_1 */
                     ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                     ip += matchLength;
                     anchor = ip;
@@ -1701,7 +1701,7 @@ ZSTD_compressBlock_lazy_generic(
                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
                 /* store sequence */
                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
                 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;
@@ -1903,7 +1903,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offset=0;
+        size_t offcode=0;
         const BYTE* start=ip+1;
         U32 curr = (U32)(ip-base);
 
@@ -1925,7 +1925,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
         {   size_t offsetFound = 999999999;
             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
             if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offset=offsetFound;
+                matchLength = ml2, start = ip, offcode=offsetFound;
         }
 
         if (matchLength < 4) {
@@ -1939,7 +1939,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
             ip ++;
             curr++;
             /* check repCode */
-            if (offset) {
+            if (offcode) {
                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
                 const U32 repIndex = (U32)(curr - offset_1);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
@@ -1951,18 +1951,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                     int const gain2 = (int)(repLength * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32(STORED_TO_OFFBASE(offcode)) + 1);
                     if ((repLength >= 4) && (gain2 > gain1))
-                        matchLength = repLength, offset = 0, start = ip;
+                        matchLength = repLength, offcode = 0, start = ip;
             }   }
 
             /* search match, depth 1 */
             {   size_t offset2=999999999;
                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32(STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32(STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offset = offset2, start = ip;
+                    matchLength = ml2, offcode = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
 
@@ -1971,7 +1971,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                 ip ++;
                 curr++;
                 /* check repCode */
-                if (offset) {
+                if (offcode) {
                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
                     const U32 repIndex = (U32)(curr - offset_1);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
@@ -1983,36 +1983,36 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                         int const gain2 = (int)(repLength * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((repLength >= 4) && (gain2 > gain1))
-                            matchLength = repLength, offset = 0, start = ip;
+                            matchLength = repLength, offcode = 0, start = ip;
                 }   }
 
                 /* search match, depth 2 */
                 {   size_t offset2=999999999;
                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
                     if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offset = offset2, start = ip;
+                        matchLength = ml2, offcode = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
         }
 
         /* catch up */
-        if (offset) {
-            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offset));
+        if (offcode) {
+            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offset);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
         }
 
         /* store sequence */
 _storeSequence:
         {   size_t const litLength = (size_t)(start - anchor);
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -2029,7 +2029,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset history */
                 ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;

From a34ccad9a6adbaf6bd976434b5ae18a2d60f224a Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 13:21:22 -0800
Subject: [PATCH 20/52] fixed minor conversion warnings

---
 lib/compress/zstd_lazy.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index a446482aed3..cb3152eb86a 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1951,7 +1951,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                     int const gain2 = (int)(repLength * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32(STORED_TO_OFFBASE(offcode)) + 1);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((repLength >= 4) && (gain2 > gain1))
                         matchLength = repLength, offcode = 0, start = ip;
             }   }
@@ -1959,8 +1959,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
             /* search match, depth 1 */
             {   size_t offset2=999999999;
                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32(STORED_TO_OFFBASE(offset2)));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32(STORED_TO_OFFBASE(offcode)) + 4);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offcode = offset2, start = ip;
                     continue;   /* search a better one */

From de9f52e9456f97a3bdf6c2b4ecad424226d65247 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 13:47:57 -0800
Subject: [PATCH 21/52] regroup all mentions of ZSTD_REP_MOVE within
 zstd_compress_internal.h

---
 lib/common/zstd_internal.h            | 1 -
 lib/compress/zstd_compress_internal.h | 9 ++++-----
 lib/compress/zstd_lazy.c              | 5 ++---
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index 32ef0b43eb4..e4d36ce0905 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -67,7 +67,6 @@ extern "C" {
 #define ZSTD_OPT_NUM    (1<<12)
 
 #define ZSTD_REP_NUM      3                 /* number of repcodes */
-#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 
 #define KB *(1 <<10)
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 9ad0db6dada..2d55ddd716a 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -129,7 +129,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
 *********************************/
 
 typedef struct {
-    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
     U32 len;            /* Raw length of match */
 } ZSTD_match_t;
 
@@ -577,22 +577,21 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
     while (ip < iend) *op++ = *ip++;
 }
 
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
 #define STORE_REPCODE_1 STORE_REPCODE(1)
 #define STORE_REPCODE_2 STORE_REPCODE(2)
 #define STORE_REPCODE_3 STORE_REPCODE(3)
 #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
 #define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
 #define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
-#define STORED_IS_REPCODE(o) ((o) < ZSTD_REP_NUM)
+#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
 #define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
 #define STORED_TO_OFFBASE(o) ((o)+1)
 
 /*! ZSTD_storeSeq() :
  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
- *  @offBase_minus1 : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
- *                    Users should not specify the encoded value directly,
- *                    instead use macros STORE_REPCODE_X and STORE_OFFSET().
+ *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
  *  @matchLength : must be >= MINMATCH
  *  Allowed to overread literals up to litLimit.
 */
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index cb3152eb86a..72bbe719537 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1648,9 +1648,8 @@ ZSTD_compressBlock_lazy_generic(
         }
 
         /* NOTE:
-         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
-         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
-         * overflows the pointer, which is undefined behavior.
+         * Pay attention that `start[-value]` can lead to strange undefined behavior 
+         * notably if `value` is unsigned, resulting in a large positive `-value`.
          */
         /* catch up */
         if (offcode) {

From 8da414231d105e64d424f6661a21e3add8c40fd1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 16:18:44 -0800
Subject: [PATCH 22/52] found a few more places which were dependent on
 seqStore offcode sumtype numeric representation

---
 lib/compress/zstd_compress.c          | 56 ++++++++++++++++-----------
 lib/compress/zstd_compress_internal.h |  3 +-
 lib/compress/zstd_lazy.c              | 27 +++++++------
 tests/fuzz/sequence_compression_api.c |  4 +-
 tests/fuzz/zstd_helpers.c             |  2 +-
 5 files changed, 54 insertions(+), 38 deletions(-)

diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index 0b1543f722b..f06456af926 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -3434,11 +3434,13 @@ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
 
 /**
  * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
- * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
+ * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
  */
-static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
-    U32 const adjustedOffCode = offCode + ll0;
-    assert(offCode < ZSTD_REP_NUM);
+static U32
+ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
+{
+    U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0;  /* [ 0 - 3 ] */
+    assert(STORED_IS_REPCODE(offCode));
     if (adjustedOffCode == ZSTD_REP_NUM) {
         /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
         assert(rep[0] > 0);
@@ -3466,9 +3468,9 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
     for (; idx < nbSeq; ++idx) {
         seqDef* const seq = seqStore->sequencesStart + idx;
         U32 const ll0 = (seq->litLength == 0);
-        U32 const offCode = seq->offBase - 1;
+        U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
         assert(seq->offBase > 0);
-        if (offCode < ZSTD_REP_NUM) {
+        if (STORED_IS_REPCODE(offCode)) {
             U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
             U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
             /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
@@ -3482,7 +3484,7 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
         /* Compression repcode history is always updated with values directly from the unmodified seqStore.
          * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
          */
-        ZSTD_updateRep(dRepcodes->rep, seq->offBase - 1, ll0);
+        ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
         ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
     }
 }
@@ -3492,11 +3494,13 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_
  *
  * Returns the total size of that block (including header) or a ZSTD error code.
  */
-static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
-                                                repcodes_t* const dRep, repcodes_t* const cRep,
-                                                void* dst, size_t dstCapacity,
-                                                const void* src, size_t srcSize,
-                                                U32 lastBlock, U32 isPartition) {
+static size_t
+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+                                  repcodes_t* const dRep, repcodes_t* const cRep,
+                                  void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  U32 lastBlock, U32 isPartition)
+{
     const U32 rleMaxLength = 25;
     BYTE* op = (BYTE*)dst;
     const BYTE* ip = (const BYTE*)src;
@@ -3505,6 +3509,7 @@ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const
 
     /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
     repcodes_t const dRepOriginal = *dRep;
+    DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
     if (isPartition)
         ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
 
@@ -3577,8 +3582,10 @@ typedef struct {
  * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
  * maximum of 128 KB, this value is actually impossible to reach.
  */
-static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
-                                         ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
+static void
+ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                             ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
+{
     seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
     seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
     seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
@@ -3633,8 +3640,10 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
  *
  * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
  */
-static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
-                                                     const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
+static size_t
+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+                                       const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
+{
     size_t cSize = 0;
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
@@ -3720,9 +3729,11 @@ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, s
     return cSize;
 }
 
-static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
-                                        void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize, U32 lastBlock) {
+static size_t
+ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                              void* dst, size_t dstCapacity,
+                              const void* src, size_t srcSize, U32 lastBlock)
+{
     const BYTE* ip = (const BYTE*)src;
     BYTE* op = (BYTE*)dst;
     U32 nbSeq;
@@ -3748,9 +3759,10 @@ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
     return cSize;
 }
 
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
-                                        void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize, U32 frame)
+static size_t
+ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                            void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize, U32 frame)
 {
     /* This the upper bound for the length of an rle block.
      * This isn't the actual upper bound. Finding the real threshold
diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 2d55ddd716a..22579ca0b79 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -588,6 +588,7 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
 #define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
 #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
 #define STORED_TO_OFFBASE(o) ((o)+1)
+#define OFFBASE_TO_STORED(o) ((o)-1)
 
 /*! ZSTD_storeSeq() :
  *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
@@ -608,7 +609,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)matchLength, (U32)offCode);
+               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
     }
 #endif
     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c
index 72bbe719537..2e38dcb46d2 100644
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@@ -1539,8 +1539,9 @@ ZSTD_compressBlock_lazy_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offcode=0;
+        size_t offcode=STORE_REPCODE_1;
         const BYTE* start=ip+1;
+        DEBUGLOG(7, "search baseline (depth 0)");
 
         /* check repCode */
         if (isDxS) {
@@ -1577,6 +1578,7 @@ ZSTD_compressBlock_lazy_generic(
         /* let's try to find a better solution */
         if (depth>=1)
         while (ip<ilimit) {
+            DEBUGLOG(7, "search depth 1");
             ip ++;
             if ( (dictMode == ZSTD_noDict)
               && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
@@ -1584,7 +1586,7 @@ ZSTD_compressBlock_lazy_generic(
                 int const gain2 = (int)(mlRep * 3);
                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                 if ((mlRep >= 4) && (gain2 > gain1))
-                    matchLength = mlRep, offcode = 0, start = ip;
+                    matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
             }
             if (isDxS) {
                 const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1598,12 +1600,12 @@ ZSTD_compressBlock_lazy_generic(
                     int const gain2 = (int)(mlRep * 3);
                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offcode = 0, start = ip;
+                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                 }
             }
             {   size_t offset2=999999999;
                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
                     matchLength = ml2, offcode = offset2, start = ip;
@@ -1612,6 +1614,7 @@ ZSTD_compressBlock_lazy_generic(
 
             /* let's find an even better one */
             if ((depth==2) && (ip<ilimit)) {
+                DEBUGLOG(7, "search depth 2");
                 ip ++;
                 if ( (dictMode == ZSTD_noDict)
                   && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
@@ -1619,7 +1622,7 @@ ZSTD_compressBlock_lazy_generic(
                     int const gain2 = (int)(mlRep * 4);
                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offcode = 0, start = ip;
+                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                 }
                 if (isDxS) {
                     const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1633,7 +1636,7 @@ ZSTD_compressBlock_lazy_generic(
                         int const gain2 = (int)(mlRep * 4);
                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((mlRep >= 4) && (gain2 > gain1))
-                            matchLength = mlRep, offcode = 0, start = ip;
+                            matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                     }
                 }
                 {   size_t offset2=999999999;
@@ -1648,11 +1651,11 @@ ZSTD_compressBlock_lazy_generic(
         }
 
         /* NOTE:
-         * Pay attention that `start[-value]` can lead to strange undefined behavior 
+         * Pay attention that `start[-value]` can lead to strange undefined behavior
          * notably if `value` is unsigned, resulting in a large positive `-value`.
          */
         /* catch up */
-        if (offcode) {
+        if (STORED_IS_OFFSET(offcode)) {
             if (dictMode == ZSTD_noDict) {
                 while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
                      && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
@@ -1902,7 +1905,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offcode=0;
+        size_t offcode=STORE_REPCODE_1;
         const BYTE* start=ip+1;
         U32 curr = (U32)(ip-base);
 
@@ -1952,7 +1955,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                     int const gain2 = (int)(repLength * 3);
                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((repLength >= 4) && (gain2 > gain1))
-                        matchLength = repLength, offcode = 0, start = ip;
+                        matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
             }   }
 
             /* search match, depth 1 */
@@ -1984,7 +1987,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                         int const gain2 = (int)(repLength * 4);
                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((repLength >= 4) && (gain2 > gain1))
-                            matchLength = repLength, offcode = 0, start = ip;
+                            matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
                 }   }
 
                 /* search match, depth 2 */
@@ -2000,7 +2003,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
         }
 
         /* catch up */
-        if (offcode) {
+        if (STORED_IS_OFFSET(offcode)) {
             U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c
index cc840bf801a..a2959e1aca6 100644
--- a/tests/fuzz/sequence_compression_api.c
+++ b/tests/fuzz/sequence_compression_api.c
@@ -72,10 +72,10 @@ static size_t decodeSequences(void* dst, size_t nbSequences,
                               size_t literalsSize, const void* dict, size_t dictSize) {
     const uint8_t* litPtr = literalsBuffer;
     const uint8_t* const litBegin = literalsBuffer;
-    const uint8_t* const litEnd = literalsBuffer + literalsSize;
+    const uint8_t* const litEnd = litBegin + literalsSize;
     const uint8_t* dictPtr = dict;
     uint8_t* op = dst;
-    const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
+    const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
     size_t generatedSrcBufferSize = 0;
     size_t bytesWritten = 0;
     uint32_t lastLLSize;
diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c
index 0fbf3bed7ab..f66579754ff 100644
--- a/tests/fuzz/zstd_helpers.c
+++ b/tests/fuzz/zstd_helpers.c
@@ -123,7 +123,7 @@ FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *pro
       size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
       size_t const limit = MIN(srcSize - offset, remaining);
       size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
-      memcpy(samples + pos, src + offset, toCopy);
+      memcpy(samples + pos, (const char*)src + offset, toCopy);
       pos += toCopy;
       samplesSizes[sample] = toCopy;
     }

From ad7c9fc11e689e105e9c43c016c9160a121ba3b1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 28 Dec 2021 17:41:47 -0800
Subject: [PATCH 23/52] use ZSTD_memcpy(), for proper redirection within Linux
 Kernel

---
 lib/compress/zstd_compress_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
index 22579ca0b79..c406e794bdb 100644
--- a/lib/compress/zstd_compress_internal.h
+++ b/lib/compress/zstd_compress_internal.h
@@ -688,7 +688,7 @@ MEM_STATIC repcodes_t
 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
 {
     repcodes_t newReps;
-    memcpy(&newReps, rep, sizeof(newReps));
+    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
     ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
     return newReps;
 }

From 9a9d1ec6f4536ffeb745f360ef010cefd125bfd0 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 29 Dec 2021 17:47:12 -0800
Subject: [PATCH 24/52] Mark Huffman Decoder Assembly `noexecstack` on All
 Architectures

Apparently, even when the assembly file is empty (because
`ZSTD_ENABLE_ASM_X86_64_BMI2` is false), it still is marked as possibly
needing an executable stack and so the whole library is marked as such. This
commit applies a simple patch for this problem by moving the noexecstack
indication outside the macro guard.

This commit builds on #2857.

This commit addresses #2963.
---
 lib/decompress/huf_decompress_amd64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 98173cce863..706786bb0db 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -1,7 +1,5 @@
 #include "../common/portability_macros.h"
 
-#if ZSTD_ENABLE_ASM_X86_64_BMI2
-
 /* Stack marking
  * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
  */
@@ -9,6 +7,8 @@
 .section .note.GNU-stack,"",%progbits
 #endif
 
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
 /* Calling convention:
  *
  * %rdi contains the first argument: HUF_DecompressAsmArgs*.

From b1978d60ee6de821501d7e0ce88185f6575028b0 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 30 Dec 2021 14:08:51 -0800
Subject: [PATCH 25/52] POOL_sizeof() only needs a const read-only reference

---
 lib/common/pool.c | 15 ++++++++++-----
 lib/common/pool.h |  4 ++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/lib/common/pool.c b/lib/common/pool.c
index ea70b8b65ad..1146ce99a8f 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -86,7 +86,7 @@ static void* POOL_thread(void* opaque) {
         {   POOL_job const job = ctx->queue[ctx->queueHead];
             ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
             ctx->numThreadsBusy++;
-            ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
+            ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
             /* Unlock the mutex, signal a pusher, and run the job */
             ZSTD_pthread_cond_signal(&ctx->queuePushCond);
             ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
@@ -105,6 +105,7 @@ static void* POOL_thread(void* opaque) {
     assert(0);  /* Unreachable */
 }
 
+/* ZSTD_createThreadPool() : public access point */
 POOL_ctx* ZSTD_createThreadPool(size_t numThreads) {
     return POOL_create (numThreads, 0);
 }
@@ -114,7 +115,8 @@ POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
 }
 
 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
-                               ZSTD_customMem customMem) {
+                               ZSTD_customMem customMem)
+{
     POOL_ctx* ctx;
     /* Check parameters */
     if (!numThreads) { return NULL; }
@@ -192,7 +194,7 @@ void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
   POOL_free (pool);
 }
 
-size_t POOL_sizeof(POOL_ctx *ctx) {
+size_t POOL_sizeof(const POOL_ctx *ctx) {
     if (ctx==NULL) return 0;  /* supports sizeof NULL */
     return sizeof(*ctx)
         + ctx->queueSize * sizeof(POOL_job)
@@ -257,7 +259,8 @@ static int isQueueFull(POOL_ctx const* ctx) {
 }
 
 
-static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+static void
+POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
 {
     POOL_job const job = {function, opaque};
     assert(ctx != NULL);
@@ -313,7 +316,9 @@ POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
     return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
 }
 
-POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+POOL_ctx*
+POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem)
+{
     (void)numThreads;
     (void)queueSize;
     (void)customMem;
diff --git a/lib/common/pool.h b/lib/common/pool.h
index e18aa0708f7..0ebde1805db 100644
--- a/lib/common/pool.h
+++ b/lib/common/pool.h
@@ -53,7 +53,7 @@ int POOL_resize(POOL_ctx* ctx, size_t numThreads);
  * @return threadpool memory usage
  *  note : compatible with NULL (returns 0 in this case)
  */
-size_t POOL_sizeof(POOL_ctx* ctx);
+size_t POOL_sizeof(const POOL_ctx* ctx);
 
 /*! POOL_function :
  *  The function type that can be added to a thread pool.
@@ -70,7 +70,7 @@ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
 
 
 /*! POOL_tryAdd() :
- *  Add the job `function(opaque)` to thread pool _if_ a worker is available.
+ *  Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
  *  Returns immediately even if not (does not block).
  * @return : 1 if successful, 0 if not.
  */

From 6211bfee5ec24dc825c11751c33aa31d618b5f10 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Thu, 30 Dec 2021 14:33:21 -0800
Subject: [PATCH 26/52] fixed backup prototype for POOL_sizeof()

---
 lib/common/pool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/common/pool.c b/lib/common/pool.c
index 1146ce99a8f..2e37cdd73c8 100644
--- a/lib/common/pool.c
+++ b/lib/common/pool.c
@@ -194,7 +194,7 @@ void ZSTD_freeThreadPool (ZSTD_threadPool* pool) {
   POOL_free (pool);
 }
 
-size_t POOL_sizeof(const POOL_ctx *ctx) {
+size_t POOL_sizeof(const POOL_ctx* ctx) {
     if (ctx==NULL) return 0;  /* supports sizeof NULL */
     return sizeof(*ctx)
         + ctx->queueSize * sizeof(POOL_job)
@@ -346,7 +346,7 @@ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
     return 1;
 }
 
-size_t POOL_sizeof(POOL_ctx* ctx) {
+size_t POOL_sizeof(const POOL_ctx* ctx) {
     if (ctx==NULL) return 0;  /* supports sizeof NULL */
     assert(ctx == &g_poolCtx);
     return sizeof(*ctx);

From 8c53e526db3bcf5a95f67bd347e1f89c79f4fe94 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 31 Dec 2021 13:03:12 -0800
Subject: [PATCH 27/52] fix performance issue in scenario #2966 (part 1)

When re-using a compression state, across multiple successive compressions,
the state should minimize the amount of allocation and initialization required.

This mostly matters in situations where initialization is an overwhelming task
compared to compression itself.
This can happen when the amount to compress is small,
while the compression state was given the impression that it would be much larger,
aka, streaming mode without providing a srcSize hint.

This lean-initialization optimization was broken in 980f3bbf8354edec0ad32b4430800f330185de6a .

This commit fixes it, making this scenario once again on par with v1.4.9.

Note that this does not completely fix #2966,
since another heavy initialization, specific to row mode,
is also happening (and was not present in v1.4.9).
This will be fixed in a separate commit.
---
 lib/compress/zstd_cwksp.h | 58 ++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index 29d027e9cdb..468b06da0ae 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -243,12 +243,14 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
 
 /**
  * Internal function. Do not use directly.
- * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which
- * counts from the end of the wksp. (as opposed to the object/table segment)
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
+ * which counts from the end of the wksp (as opposed to the object/table segment).
  *
  * Returns a pointer to the beginning of that space.
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) {
+MEM_STATIC void*
+ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
+{
     void* const alloc = (BYTE*)ws->allocStart - bytes;
     void* const bottom = ws->tableEnd;
     DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
@@ -260,6 +262,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t
         ws->allocFailed = 1;
         return NULL;
     }
+    /* the area is reserved from the end of wksp.
+     * If it overlaps with tableValidEnd, it voids guarantees on values' range */
     if (alloc < ws->tableValidEnd) {
         ws->tableValidEnd = alloc;
     }
@@ -269,10 +273,12 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t
 
 /**
  * Moves the cwksp to the next phase, and does any necessary allocations.
+ * cwksp initialization must necessarily go through each phase in order.
  * Returns a 0 on success, or zstd error
  */
-MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
-        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+MEM_STATIC size_t
+ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
+{
     assert(phase >= ws->phase);
     if (phase > ws->phase) {
         /* Going from allocating objects to allocating buffers */
@@ -295,13 +301,13 @@ MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
             {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
                 void* const alloc = ws->objectEnd;
                 size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
-                void* const end = (BYTE*)alloc + bytesToAlign;
+                void* const objectEnd = (BYTE*)alloc + bytesToAlign;
                 DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
-                RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation,
+                RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
                                 "table phase - alignment initial allocation failed!");
-                ws->objectEnd = end;
-                ws->tableEnd = end;
-                ws->tableValidEnd = end;
+                ws->objectEnd = objectEnd;
+                ws->tableEnd = objectEnd;
+                if (ws->tableEnd > ws->tableValidEnd) ws->tableValidEnd = objectEnd;
             }
         }
         ws->phase = phase;
@@ -313,15 +319,17 @@ MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase(
 /**
  * Returns whether this object/buffer/etc was allocated in this workspace.
  */
-MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
+{
     return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
 }
 
 /**
  * Internal function. Do not use directly.
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_internal(
-        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+MEM_STATIC void*
+ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
+{
     void* alloc;
     if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
         return NULL;
@@ -351,14 +359,16 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
 /**
  * Reserves and returns unaligned memory.
  */
-MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
+{
     return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
 }
 
 /**
  * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
+{
     void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
                                             ZSTD_cwksp_alloc_aligned);
     assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
@@ -370,7 +380,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
  * their values remain constrained, allowing us to re-use them without
  * memset()-ing them.
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
+{
     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
     void* alloc;
     void* end;
@@ -408,9 +419,11 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
 
 /**
  * Aligned on sizeof(void*).
+ * Note : should happen only once, at workspace first initialization
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
-    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
+{
+    size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
     void* alloc = ws->objectEnd;
     void* end = (BYTE*)alloc + roundedBytes;
 
@@ -419,7 +432,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
     end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
 #endif
 
-    DEBUGLOG(5,
+    DEBUGLOG(4,
         "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
         alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
     assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
@@ -427,7 +440,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
     ZSTD_cwksp_assert_internal_consistency(ws);
     /* we must be in the first phase, no advance is possible */
     if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
-        DEBUGLOG(4, "cwksp: object alloc failed!");
+        DEBUGLOG(3, "cwksp: object alloc failed!");
         ws->allocFailed = 1;
         return NULL;
     }
@@ -438,7 +451,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
 #if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
      * either size. */
-    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
     if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
         __asan_unpoison_memory_region(alloc, bytes);
     }
@@ -447,7 +460,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
     return alloc;
 }
 
-MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
+{
     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)

From 213dc6110fda144326cc783c02f98a7886cfe4b3 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sat, 1 Jan 2022 23:46:49 -0800
Subject: [PATCH 28/52] fixed fullbench freshCCtx scenario

---
 tests/fullbench.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/fullbench.c b/tests/fullbench.c
index f610fef4c75..31b3b6da159 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -230,14 +230,15 @@ local_ZSTD_compressStream_freshCCtx(const void* src, size_t srcSize,
                           void* dst, size_t dstCapacity,
                           void* payload)
 {
-    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
-    size_t r;
-    assert(cctx != NULL);
-
-    r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
-
-    ZSTD_freeCCtx(cctx);
-    return r;
+    if (g_cstream != NULL) ZSTD_freeCCtx(g_cstream);
+    g_cstream = ZSTD_createCCtx();
+    assert(g_cstream != NULL);
+
+    {   size_t const r = local_ZSTD_compressStream(src, srcSize, dst, dstCapacity, payload);
+        ZSTD_freeCCtx(g_cstream);
+        g_cstream = NULL;
+        return r;
+    }
 }
 
 static size_t

From a0b9520e38c459408330bfdfb62f666f331d3bed Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 2 Jan 2022 08:52:33 -0800
Subject: [PATCH 29/52] fullbench: added compress_freshCCtx scenario

---
 tests/fullbench.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/tests/fullbench.c b/tests/fullbench.c
index 31b3b6da159..b55ff767f2f 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -108,7 +108,25 @@ local_ZSTD_compress(const void* src, size_t srcSize,
     p.fParams = f;
     p.cParams = *(ZSTD_compressionParameters*)payload;
     return ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p);
-    //return ZSTD_compress(dst, dstSize, src, srcSize, cLevel);
+}
+
+static size_t
+local_ZSTD_compress_freshCCtx(const void* src, size_t srcSize,
+                    void* dst, size_t dstSize,
+                    void* payload)
+{
+    ZSTD_parameters p;
+    ZSTD_frameParameters f = { 1 /* contentSizeHeader*/, 0, 0 };
+    p.fParams = f;
+    p.cParams = *(ZSTD_compressionParameters*)payload;
+    if (g_zcc != NULL) ZSTD_freeCCtx(g_zcc);
+    g_zcc = ZSTD_createCCtx();
+    assert(g_zcc != NULL);
+    {   size_t const r = ZSTD_compress_advanced (g_zcc, dst, dstSize, src, srcSize, NULL ,0, p);
+        ZSTD_freeCCtx(g_zcc);
+        g_zcc = NULL;
+        return r;
+    }
 }
 
 static size_t g_cSize = 0;
@@ -431,6 +449,9 @@ static int benchMem(unsigned benchNb,
     case 2:
         benchFunction = local_ZSTD_decompress; benchName = "decompress";
         break;
+    case 3:
+        benchFunction = local_ZSTD_compress_freshCCtx; benchName = "compress_freshCCtx";
+        break;
 #ifndef ZSTD_DLL_IMPORT
     case 11:
         benchFunction = local_ZSTD_compressContinue; benchName = "compressContinue";
@@ -509,7 +530,6 @@ static int benchMem(unsigned benchNb,
     ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_targetLength, (int)cparams.targetLength);
     ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_strategy, cparams.strategy);
 
-
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_compressionLevel, cLevel);
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_windowLog, (int)cparams.windowLog);
     ZSTD_CCtx_setParameter(g_cstream, ZSTD_c_hashLog, (int)cparams.hashLog);
@@ -528,6 +548,9 @@ static int benchMem(unsigned benchNb,
     case 2:
         g_cSize = ZSTD_compress(dstBuff2, dstBuffSize, src, srcSize, cLevel);
         break;
+    case 3:
+        payload = &cparams;
+        break;
 #ifndef ZSTD_DLL_IMPORT
     case 11:
         payload = &cparams;

From 41ad7332dd59ed9081cee345bd95e080cb96b199 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 4 Jan 2022 09:07:11 -0800
Subject: [PATCH 30/52] Updated expression for better readability

---
 lib/compress/zstd_cwksp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstd_cwksp.h b/lib/compress/zstd_cwksp.h
index 468b06da0ae..dc3f40c80c3 100644
--- a/lib/compress/zstd_cwksp.h
+++ b/lib/compress/zstd_cwksp.h
@@ -306,10 +306,10 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
                 RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
                                 "table phase - alignment initial allocation failed!");
                 ws->objectEnd = objectEnd;
-                ws->tableEnd = objectEnd;
-                if (ws->tableEnd > ws->tableValidEnd) ws->tableValidEnd = objectEnd;
-            }
-        }
+                ws->tableEnd = objectEnd;  /* table area starts being empty */
+                if (ws->tableValidEnd < ws->tableEnd) {
+                    ws->tableValidEnd = ws->tableEnd;
+        }   }   }
         ws->phase = phase;
         ZSTD_cwksp_assert_internal_consistency(ws);
     }

From 35208f702f0a5e4ebe70d651efd75770f176b2d0 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 29 Dec 2021 18:42:32 -0800
Subject: [PATCH 31/52] Add Test Validating Stack is not Executable in
 playTests.sh

---
 tests/playTests.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/playTests.sh b/tests/playTests.sh
index a772b61ff7c..e1da24a4b06 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -304,6 +304,15 @@ zstd -d -f tmp_corrupt.zst --no-check
 zstd -d -f tmp_corrupt.zst --check --no-check # final flag overrides
 zstd -d -f tmp.zst --no-check
 
+if [ "$isWindows" = false ]; then
+  if [ -n "$(which readelf)" ]; then
+    println "test: check if binary has executable stack"
+    file "$ZSTD_BIN"
+    readelf -lW "$ZSTD_BIN"
+    readelf -lW "$ZSTD_BIN" | grep 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
+  fi
+fi
+
 println "\n===> zstdgrep tests"
 ln -sf "$ZSTD_BIN" zstdcat
 rm -f tmp_grep

From c4f5116e95d7aee1fb36f64ce074fc1187630398 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Tue, 4 Jan 2022 16:05:59 -0800
Subject: [PATCH 32/52] [meson] Explicitly disable assembly for non clang/gcc
 copmilers

After merging #2951 I realized that we will want to explicitly disable
assembly when we aren't including the assembly source file. Otherwise,
if some non clang/gcc compiler does support assembly, it would fail to
build.
---
 build/meson/lib/meson.build | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/build/meson/lib/meson.build b/build/meson/lib/meson.build
index 2a8c55fbf52..6b093378101 100644
--- a/build/meson/lib/meson.build
+++ b/build/meson/lib/meson.build
@@ -47,8 +47,11 @@ libzstd_sources = [join_paths(zstd_rootdir, 'lib/common/entropy_common.c'),
 
 # really we need anything that defines __GNUC__ as that is what ZSTD_ASM_SUPPORTED is gated on
 # but these are the two compilers that are supported in tree and actually handle this correctly
+# Otherwise, explicitly disable assmebly.
 if [compiler_gcc, compiler_clang].contains(cc_id)
   libzstd_sources += join_paths(zstd_rootdir, 'lib/decompress/huf_decompress_amd64.S')
+else
+  add_project_arguments('-DZSTD_DISABLE_ASM', language: 'c')
 endif
 
 # Explicit define legacy support

From 4620ce6a9abe7f2aad9ae0ecd4768cd38491edb8 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 14:53:22 -0500
Subject: [PATCH 33/52] Makefiles: Add `noexecstack` Options to Compilation and
 Linking

Hopefully this marks the binary artifacts `noexecstack` even on platforms
where binaries default to true.
---
 lib/libzstd.mk    | 28 ++++++++++++++++++++++++++++
 programs/Makefile |  2 --
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/lib/libzstd.mk b/lib/libzstd.mk
index c04957c6fae..e682e446ae9 100644
--- a/lib/libzstd.mk
+++ b/lib/libzstd.mk
@@ -34,6 +34,8 @@ ZSTD_NO_ASM ?= 0
 # libzstd helpers
 ##################################################################
 
+VOID ?= /dev/null
+
 # Make 4.3 doesn't support '\#' anymore (https://lwn.net/Articles/810071/)
 NUM_SYMBOL := \#
 
@@ -96,6 +98,32 @@ CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
 LDFLAGS  += $(MOREFLAGS)
 FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 
+ifndef ALREADY_APPENDED_NOEXECSTACK
+export ALREADY_APPENDED_NOEXECSTACK := 1
+ifeq ($(shell echo "int main(int argc, char* argv[]) { (void)argc; (void)argv; return 0; }" | $(CC) $(FLAGS) -z noexecstack -x c -Werror - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
+$(info Supports noexecstack linker flag!)
+$(info $(LDFLAGS))
+LDFLAGS += -z noexecstack
+$(info $(LDFLAGS))
+else
+$(info Doesn't support noexecstack linker flag!)
+endif
+ifeq ($(shell echo | $(CC) $(FLAGS) -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
+$(info Supports noexecstack assembler flag!)
+$(info $(CFLAGS))
+CFLAGS += -Wa,--noexecstack
+$(info $(CFLAGS))
+else ifeq ($(shell echo | $(CC) $(FLAGS) -Qunused-arguments -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
+# See e.g.: https://github.com/android/ndk/issues/171
+$(info Supports noexecstack assembler flag with unused arg suppression!)
+$(info $(CFLAGS))
+CFLAGS += -Qunused-arguments -Wa,--noexecstack
+$(info $(CFLAGS))
+else
+$(info Doesn't support noexecstack assembler flag!)
+endif
+endif
+
 HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
 GREP_OPTIONS ?=
 ifeq ($HAVE_COLORNEVER, 1)
diff --git a/programs/Makefile b/programs/Makefile
index a54900cc1e9..da848eb66bc 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -62,8 +62,6 @@ else
   EXT =
 endif
 
-VOID = /dev/null
-
 # thread detection
 NO_THREAD_MSG := ==> no threads, building without multithreading support
 HAVE_PTHREAD := $(shell printf '$(NUM_SYMBOL)include <pthread.h>\nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c)

From b12edddb3784c59c459a40f6108027b0bcedaf2f Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 15:42:58 -0500
Subject: [PATCH 34/52] Write `GNU-stack` Section on All ELF Architectures

Previously we did this only on Linux, which missed other Unices.
---
 lib/decompress/huf_decompress_amd64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 706786bb0db..d2fb90ca110 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -3,7 +3,7 @@
 /* Stack marking
  * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
  */
-#if defined(__linux__) && defined(__ELF__)
+#if defined(__ELF__)
 .section .note.GNU-stack,"",%progbits
 #endif
 

From ef1f9e80ffc62c5de06375bc62012373a5d0b77f Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 16:03:32 -0500
Subject: [PATCH 35/52] Restrict `GNU-stack` Note to GNU Assemblers

---
 lib/decompress/huf_decompress_amd64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index d2fb90ca110..0b4cd38ee10 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -3,7 +3,7 @@
 /* Stack marking
  * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
  */
-#if defined(__ELF__)
+#if defined(__ELF__) && defined(__GNUC__)
 .section .note.GNU-stack,"",%progbits
 #endif
 

From ff5d1daf33abe71f95f2c90de877ac98cf01af83 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 15:21:48 -0500
Subject: [PATCH 36/52] Clean Up Debugging Statements

---
 lib/libzstd.mk     | 13 -------------
 tests/playTests.sh |  4 +---
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/lib/libzstd.mk b/lib/libzstd.mk
index e682e446ae9..73a8cd5d860 100644
--- a/lib/libzstd.mk
+++ b/lib/libzstd.mk
@@ -101,26 +101,13 @@ FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
 ifndef ALREADY_APPENDED_NOEXECSTACK
 export ALREADY_APPENDED_NOEXECSTACK := 1
 ifeq ($(shell echo "int main(int argc, char* argv[]) { (void)argc; (void)argv; return 0; }" | $(CC) $(FLAGS) -z noexecstack -x c -Werror - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
-$(info Supports noexecstack linker flag!)
-$(info $(LDFLAGS))
 LDFLAGS += -z noexecstack
-$(info $(LDFLAGS))
-else
-$(info Doesn't support noexecstack linker flag!)
 endif
 ifeq ($(shell echo | $(CC) $(FLAGS) -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
-$(info Supports noexecstack assembler flag!)
-$(info $(CFLAGS))
 CFLAGS += -Wa,--noexecstack
-$(info $(CFLAGS))
 else ifeq ($(shell echo | $(CC) $(FLAGS) -Qunused-arguments -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
 # See e.g.: https://github.com/android/ndk/issues/171
-$(info Supports noexecstack assembler flag with unused arg suppression!)
-$(info $(CFLAGS))
 CFLAGS += -Qunused-arguments -Wa,--noexecstack
-$(info $(CFLAGS))
-else
-$(info Doesn't support noexecstack assembler flag!)
 endif
 endif
 
diff --git a/tests/playTests.sh b/tests/playTests.sh
index e1da24a4b06..695d4333ba4 100755
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -306,9 +306,7 @@ zstd -d -f tmp.zst --no-check
 
 if [ "$isWindows" = false ]; then
   if [ -n "$(which readelf)" ]; then
-    println "test: check if binary has executable stack"
-    file "$ZSTD_BIN"
-    readelf -lW "$ZSTD_BIN"
+    println "test: check if binary has executable stack (#2963)"
     readelf -lW "$ZSTD_BIN" | grep 'GNU_STACK .* RW ' || die "zstd binary has executable stack!"
   fi
 fi

From 8dd943e42c13ca33d07ead274e07defd1e2114c5 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Thu, 23 Dec 2021 14:57:16 -0500
Subject: [PATCH 37/52] Improve Module Map File

This commit makes several changes:

1. It adds modules for the dictionary builder and errors headers.
2. It captures all of the macros that are used to configure these headers.
   When the headers are imported as modules and one of these macros is defined
   the compiler issues a warning that it needs to be defined on the CLI.
3. It promotes the modulemap file into the root of the lib directory.
   Experimentation shows that clang's `-fimplicit-module-maps` will find the
   modulemap when placed here, but not when it's put in a subdirectory.
---
 lib/module.modulemap           | 25 +++++++++++++++++++++++++
 lib/modulemap/module.modulemap |  4 ----
 2 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 lib/module.modulemap
 delete mode 100644 lib/modulemap/module.modulemap

diff --git a/lib/module.modulemap b/lib/module.modulemap
new file mode 100644
index 00000000000..bbb939782e1
--- /dev/null
+++ b/lib/module.modulemap
@@ -0,0 +1,25 @@
+module libzstd [extern_c] {
+    header "zstd.h"
+    export *
+    config_macros [exhaustive] /* zstd.h */ \
+        ZSTD_STATIC_LINKING_ONLY, \
+        ZSTDLIB_VISIBLE, \
+        ZSTD_DLL_EXPORT, \
+        ZSTDLIB_STATIC_API, \
+        ZSTD_DISABLE_DEPRECATE_WARNINGS, \
+        ZSTD_CLEVEL_DEFAULT, \
+        /* zdict.h */ ZDICT_STATIC_LINKING_ONLY, \
+        ZDICTLIB_VISIBILITY, \
+        ZDICT_DISABLE_DEPRECATE_WARNINGS, \
+        /* zstd_errors.h */ ZSTDERRORLIB_VISIBILITY
+
+    module dictbuilder [extern_c] {
+        header "zdict.h"
+        export *
+    }
+
+    module errors [extern_c] {
+        header "zstd_errors.h"
+        export *
+    }
+}
diff --git a/lib/modulemap/module.modulemap b/lib/modulemap/module.modulemap
deleted file mode 100644
index eeda6988543..00000000000
--- a/lib/modulemap/module.modulemap
+++ /dev/null
@@ -1,4 +0,0 @@
-module libzstd [extern_c] {
-    header "../zstd.h"
-    export *
-}

From 17782224460107dcbc29141db9a9b1209746fd27 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Thu, 23 Dec 2021 15:01:57 -0500
Subject: [PATCH 38/52] Update the Swift Package Definition to Reflect Move

---
 Package.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Package.swift b/Package.swift
index 75a5a0b3323..97f2c6a5251 100644
--- a/Package.swift
+++ b/Package.swift
@@ -25,7 +25,7 @@ let package = Package(
             name: "libzstd",
             path: "lib",
             sources: [ "common", "compress", "decompress", "dictBuilder" ],
-            publicHeadersPath: "modulemap",
+            publicHeadersPath: ".",
             cSettings: [
                 .headerSearchPath(".")
             ])

From df5ad5a0f1e087e1202806c8b4baf72d7841edf4 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Wed, 5 Jan 2022 20:36:51 -0500
Subject: [PATCH 39/52] Add a compile option to explicitely disable assembly

---
 build/cmake/lib/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build/cmake/lib/CMakeLists.txt b/build/cmake/lib/CMakeLists.txt
index 612f667f9c0..4905bd91338 100644
--- a/build/cmake/lib/CMakeLists.txt
+++ b/build/cmake/lib/CMakeLists.txt
@@ -24,6 +24,7 @@ file(GLOB CommonSources ${LIBRARY_DIR}/common/*.c)
 file(GLOB CompressSources ${LIBRARY_DIR}/compress/*.c)
 if (MSVC)
     file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c)
+    add_compile_options(-DZSTD_DISABLE_ASM)
 else ()
     file(GLOB DecompressSources ${LIBRARY_DIR}/decompress/*.c ${LIBRARY_DIR}/decompress/*.S)
 endif ()

From fc946d131b3a028c23d2ae84ade6b6114aa6fec2 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Thu, 6 Jan 2022 00:20:05 -0500
Subject: [PATCH 40/52] Remove Unused Include

---
 programs/dibio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/programs/dibio.c b/programs/dibio.c
index 04860dbbfa6..d19f954486f 100644
--- a/programs/dibio.c
+++ b/programs/dibio.c
@@ -31,7 +31,6 @@
 
 #include "timefn.h"         /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
 #include "../lib/common/mem.h"  /* read */
-#include "../lib/common/error_private.h"
 #include "dibio.h"
 
 

From 4bd96a61f103ac7ed8b52d39e99424f2f9b52643 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Thu, 6 Jan 2022 00:20:49 -0500
Subject: [PATCH 41/52] Avoid xxHash Dependency by Inlining

xxHash symbols are present in `libzstd.so`, but they are local and therefore
unavailable outside the lib. There are two possible solutions to the problem.
We could make those symbols global, or we could remove the dependency.

This commit chooses the latter approach. I suppose this comes at the cost of
code size / build time. I'm open to comments on whether this is a good thing
to do, especially since this will apply even when we are statically linking
everything.
---
 programs/benchzstd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/programs/benchzstd.c b/programs/benchzstd.c
index 1e4d717d153..fa2659efbbb 100644
--- a/programs/benchzstd.c
+++ b/programs/benchzstd.c
@@ -31,9 +31,14 @@
 #include "timefn.h"      /* UTIL_time_t */
 #include "benchfn.h"
 #include "../lib/common/mem.h"
+#ifndef ZSTD_STATIC_LINKING_ONLY
 #define ZSTD_STATIC_LINKING_ONLY
+#endif
 #include "../lib/zstd.h"
 #include "datagen.h"     /* RDG_genBuffer */
+#ifndef XXH_INLINE_ALL
+#define XXH_INLINE_ALL
+#endif
 #include "../lib/common/xxhash.h"
 #include "benchzstd.h"
 #include "../lib/zstd_errors.h"

From 4d8a2132d0e453232a46dd448e5137035ba25bee Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Thu, 6 Jan 2022 16:00:02 -0800
Subject: [PATCH 42/52] [opt] Fix oss-fuzz bug in optimal parser

oss-fuzz uncovered a scenario where we're evaluating the cost of litLength = 131072,
which can't be represented in the zstd format, so we accessed 1 beyond LL_bits.

Fix the issue by making it cost 1 bit more than litLength = 131071.

There are still follow ups:
1. This happened because literals_cost[0] = 0, so the optimal parser chose 36 literals
   over a match. Should we bound literals_cost[literal] > 0, unless the block truly only
   has one literal value?
2. When no matches are found, the cost model isn't updated. In this case no matches were
   found for an entire block. So the literals cost model wasn't updated at all. That made
   the optimal parser think literals_cost[0] = 0, where it is actually quite high, since
   the block was entirely random noise.

Credit to OSS-Fuzz.
---
 lib/compress/zstd_opt.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c
index 2fa10816f9f..1b1ddad4289 100644
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@@ -269,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
  * cost of literalLength symbol */
 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
 {
-    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
+    if (optPtr->priceType == zop_predef)
+        return WEIGHT(litLength, optLevel);
+    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
+     * because it isn't representable in the zstd format. So instead just
+     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
+     * would be all literals.
+     */
+    if (litLength == ZSTD_BLOCKSIZE_MAX)
+        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
 
     /* dynamic statistics */
     {   U32 const llCode = ZSTD_LLcode(litLength);

From c7b03c217c2542152a08e4ca343f8a27d3680901 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 7 Jan 2022 09:35:27 -0800
Subject: [PATCH 43/52] [license] Fix license header of huf_decompress_amd64.S

* Add the license header for `huf_decompress_amd64.S`
* Add `.S` files to the `test-license.py` test
---
 lib/decompress/huf_decompress_amd64.S | 10 ++++++++++
 tests/test-license.py                 |  1 +
 2 files changed, 11 insertions(+)

diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 0b4cd38ee10..01bb6d76787 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -1,3 +1,13 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
 #include "../common/portability_macros.h"
 
 /* Stack marking
diff --git a/tests/test-license.py b/tests/test-license.py
index 224776536dc..f27cb677431 100755
--- a/tests/test-license.py
+++ b/tests/test-license.py
@@ -43,6 +43,7 @@ def to_abs(d):
     "Makefile",
     ".mk",
     ".py",
+    ".S",
 ]
 
 # License should certainly be in the first 10 KB.

From 46ad9377e8eac2c77ee677a9af94104d996561d9 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 16:56:24 -0500
Subject: [PATCH 44/52] Bump Version Number to 1.5.2

---
 lib/zstd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd.h b/lib/zstd.h
index 66ec125d00e..a88ae7bf8ed 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -74,7 +74,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    5
-#define ZSTD_VERSION_RELEASE  1
+#define ZSTD_VERSION_RELEASE  2
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
 /*! ZSTD_versionNumber() :

From e1323744b6998ac237c25904331c6d820bf02aa4 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 16:59:07 -0500
Subject: [PATCH 45/52] Update Docs

---
 doc/zstd_manual.html |   4 +-
 programs/zstd.1      | 254 ++++++++++++++++++++++++++++++++++---------
 programs/zstdgrep.1  |  13 ++-
 programs/zstdless.1  |   9 +-
 4 files changed, 225 insertions(+), 55 deletions(-)

diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
index 1080614ea6b..9f73c4c815e 100644
--- a/doc/zstd_manual.html
+++ b/doc/zstd_manual.html
@@ -1,10 +1,10 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.5.1 Manual</title>
+<title>zstd 1.5.2 Manual</title>
 </head>
 <body>
-<h1>zstd 1.5.1 Manual</h1>
+<h1>zstd 1.5.2 Manual</h1>
 <hr>
 <a name="Contents"></a><h2>Contents</h2>
 <ol>
diff --git a/programs/zstd.1 b/programs/zstd.1
index da5814963dd..c7a19dbacac 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -1,345 +1,501 @@
-.TH "ZSTD" "1" "December 2021" "zstd 1.5.1" "User Commands"
+.
+.TH "ZSTD" "1" "January 2022" "zstd 1.5.2" "User Commands"
+.
 .SH "NAME"
 \fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
+.
 .SH "SYNOPSIS"
-.TS
-allbox;
-\fBzstd\fR [\fIOPTIONS\fR] [\-	\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
-.TE
+\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
+.
 .P
 \fBzstdmt\fR is equivalent to \fBzstd \-T0\fR
+.
 .P
 \fBunzstd\fR is equivalent to \fBzstd \-d\fR
+.
 .P
 \fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
+.
 .SH "DESCRIPTION"
 \fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per core, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+.
 .P
 \fBzstd\fR command line syntax is generally similar to gzip, but features the following differences :
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fBzstd\fR does not accept input from console, but it properly accepts \fBstdin\fR when it\'s not the console\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\.
+.
 .IP "" 0
+.
 .P
 \fBzstd\fR compresses or decompresses each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
+.
 .P
 Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename
+.
 .IP "" 0
+.
 .SS "Concatenation with \.zst files"
 It is possible to concatenate \fB\.zst\fR files as is\. \fBzstd\fR will decompress such files as if they were a single \fB\.zst\fR file\.
+.
 .SH "OPTIONS"
+.
 .SS "Integer suffixes and special values"
 In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
+.
 .TP
 \fBKiB\fR
-Multiply the integer by 1,024 (2\e^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
+.
 .TP
 \fBMiB\fR
-Multiply the integer by 1,048,576 (2\e^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
+.
 .SS "Operation mode"
 If multiple operation mode options are given, the last one takes effect\.
+.
 .TP
 \fB\-z\fR, \fB\-\-compress\fR
 Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\.
+.
 .TP
 \fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR
 Decompress\.
+.
 .TP
 \fB\-t\fR, \fB\-\-test\fR
 Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout\fR except that the decompressed data is discarded instead of being written to standard output\. No files are created or removed\.
+.
 .TP
 \fB\-b#\fR
 Benchmark file(s) using compression level #
+.
 .TP
 \fB\-\-train FILEs\fR
 Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\.
+.
 .TP
 \fB\-l\fR, \fB\-\-list\fR
 Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command can be augmented with the \fB\-v\fR modifier\.
+.
 .SS "Operation modifiers"
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-auto\-threads={physical,logical} (default: physical)\fR: When using a default amount of threads via \fB\-T0\fR, choose the default based on the number of detected physical or logical cores\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
+.
 .IP
 Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\.
+.
 .IP
 Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog required to cover the whole file)\. You can also manually force it\. Node: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e 4096), and by setting a large \fB\-\-zstd=chainLog=\fR
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (ie\. you can increase or decrease it)\.
+.
 .IP
 This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\.
+.
 .IP
 Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GB\. zstd will load training samples up to the memory limit and ignore the rest\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-o FILE\fR: save result into \fBFILE\fR
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-c\fR, \fB\-\-stdout\fR: write to standard output (even if it is the console)
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \-o, will trigger a confirmation prompt (which can be silenced with \-f), as this is a destructive operation\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-r\fR: operate recursively on directories\. It selects all files in the named directory and all its subdirectories\. This can be useful both to reduce command line typing, and to circumvent shell expansion limitations, when there are a lot of files and naming breaks the maximum size of a command line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-filelist FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-output\-dir\-flat DIR\fR: resulting files are stored into target \fBDIR\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBDIR\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-output\-dir\-mirror DIR\fR: similar to \fB\-\-output\-dir\-flat\fR, the output files are stored underneath target \fBDIR\fR directory, but this option will replicate input directory hierarchy into output \fBDIR\fR\.
+.
 .IP
 If input directory contains "\.\.", the files in this directory will be ignored\. If input directory is an absolute directory (i\.e\. "/var/tmp/abc"), it will be stored into the "output\-dir/var/tmp/abc"\. If there are multiple input files or directories, name collision resolution will follow the same rules as \fB\-\-output\-dir\-flat\fR\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-show\-default\-cparams\fR: Shows the default compression parameters that will be used for a particular src file\. If the provided src file is not a regular file (eg\. named pipe), the cli will just output the default parameters\. That is, the parameters that are used when the src size is unknown\.
-.IP "\[ci]" 4
+.
+.IP "\(bu" 4
 \fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
+.
 .IP "" 0
+.
 .SS "Restricted usage of Environment Variables"
 Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\.
+.
 .P
 \fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
+.
 .P
 \fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
+.
 .P
 They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
+.
 .SH "DICTIONARY BUILDER"
 \fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
+.
 .TP
 \fB\-\-train FILEs\fR
 Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
+.
 .IP
 \fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
+.
 .TP
 \fB\-o file\fR
 Dictionary saved into \fBfile\fR (default name: dictionary)\.
+.
 .TP
 \fB\-\-maxdict=#\fR
 Limit dictionary to specified size (default: 112640)\.
+.
 .TP
 \fB\-#\fR
 Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
+.
 .TP
 \fB\-B#\fR
 Split input files into blocks of size # (default: no split)
+.
 .TP
 \fB\-M#\fR, \fB\-\-memory=#\fR
 Limit the amount of sample data loaded for training (default: 2 GB)\. See above for details\.
+.
 .TP
 \fB\-\-dictID=#\fR
 A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
+.
 .TP
 \fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
 Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
+.
 .IP
 Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
+.
 .IP
 Examples:
+.
 .IP
 \fBzstd \-\-train\-cover FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=k=50 FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=shrink FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-cover=shrink=2 FILEs\fR
+.
 .TP
 \fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
 Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
+.
 .IP
 \fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
+.
 .IP
 Examples:
+.
 .IP
 \fBzstd \-\-train\-fastcover FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
+.
 .TP
 \fB\-\-train\-legacy[=selectivity=#]\fR
 Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
+.
 .IP
 Examples:
+.
 .IP
 \fBzstd \-\-train\-legacy FILEs\fR
+.
 .IP
 \fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
+.
 .SH "BENCHMARK"
+.
 .TP
 \fB\-b#\fR
 benchmark file(s) using compression level #
+.
 .TP
 \fB\-e#\fR
 benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
+.
 .TP
 \fB\-i#\fR
 minimum evaluation time, in seconds (default: 3s), benchmark mode only
+.
 .TP
 \fB\-B#\fR, \fB\-\-block\-size=#\fR
 cut file(s) into independent blocks of size # (default: no block)
+.
 .TP
 \fB\-\-priority=rt\fR
 set process priority to real\-time
+.
 .P
 \fBOutput Format:\fR CompressionLevel#Filename : IntputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
+.
 .P
 \fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
+.
 .SH "ADVANCED COMPRESSION OPTIONS"
-### \-B#: Select the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to (slightly) different compressed frames\.
+.
+.SS "\-B#:"
+Select the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to (slightly) different compressed frames\.
+.
 .SS "\-\-zstd[=options]:"
 \fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+.
 .TP
 \fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
 Specify a strategy used by a match finder\.
+.
 .IP
 There are 9 strategies numbered from 1 to 9, from faster to stronger: 1=ZSTD_fast, 2=ZSTD_dfast, 3=ZSTD_greedy, 4=ZSTD_lazy, 5=ZSTD_lazy2, 6=ZSTD_btlazy2, 7=ZSTD_btopt, 8=ZSTD_btultra, 9=ZSTD_btultra2\.
+.
 .TP
 \fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
 Specify the maximum number of bits for a match distance\.
+.
 .IP
 The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\.
+.
 .IP
 Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
+.
 .TP
 \fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
 Specify the maximum number of bits for a hash table\.
+.
 .IP
 Bigger hash tables cause less collisions which usually makes compression faster, but requires more memory during compression\.
+.
 .IP
 The minimum \fIhlog\fR is 6 (64 B) and the maximum is 30 (1 GiB)\.
+.
 .TP
 \fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
 Specify the maximum number of bits for a hash chain or a binary tree\.
+.
 .IP
 Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\.
+.
 .IP
 The minimum \fIclog\fR is 6 (64 B) and the maximum is 29 (524 Mib) on 32\-bit platforms and 30 (1 Gib) on 64\-bit platforms\.
+.
 .TP
 \fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
 Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\.
+.
 .IP
 More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\.
+.
 .IP
 The minimum \fIslog\fR is 1 and the maximum is \'windowLog\' \- 1\.
+.
 .TP
 \fBminMatch\fR=\fImml\fR, \fBmml\fR=\fImml\fR
 Specify the minimum searched length of a match in a hash table\.
+.
 .IP
 Larger search lengths usually decrease compression ratio but improve decompression speed\.
+.
 .IP
 The minimum \fImml\fR is 3 and the maximum is 7\.
+.
 .TP
 \fBtargetLength\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
 The impact of this field vary depending on selected strategy\.
+.
 .IP
 For ZSTD_btopt, ZSTD_btultra and ZSTD_btultra2, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\. t For ZSTD_fast, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
+.
 .IP
 For all other strategies, this field has no impact\.
+.
 .IP
 The minimum \fItlen\fR is 0 and the maximum is 128 Kib\.
+.
 .TP
 \fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
 Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
+.
 .IP
 The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default" : \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
+.
 .TP
 \fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR
 Specify the maximum size for a hash table used for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
+.
 .IP
 The minimum \fIlhlog\fR is 6 and the maximum is 30 (default: 20)\.
+.
 .TP
 \fBldmMinMatch\fR=\fIlmml\fR, \fBlmml\fR=\fIlmml\fR
 Specify the minimum searched length of a match for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger/very small values usually decrease compression ratio\.
+.
 .IP
 The minimum \fIlmml\fR is 4 and the maximum is 4096 (default: 64)\.
+.
 .TP
 \fBldmBucketSizeLog\fR=\fIlblog\fR, \fBlblog\fR=\fIlblog\fR
 Specify the size of each bucket for the hash table used for long distance matching\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger bucket sizes improve collision resolution but decrease compression speed\.
+.
 .IP
 The minimum \fIlblog\fR is 1 and the maximum is 8 (default: 3)\.
+.
 .TP
 \fBldmHashRateLog\fR=\fIlhrlog\fR, \fBlhrlog\fR=\fIlhrlog\fR
 Specify the frequency of inserting entries into the long distance matching hash table\.
+.
 .IP
 This option is ignored unless long distance matching is enabled\.
+.
 .IP
 Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
+.
 .IP
 The default value is \fBwlog \- lhlog\fR\.
+.
 .SS "Example"
 The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB:
+.
 .P
 \fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+.
 .SH "BUGS"
 Report bugs at: https://github\.com/facebook/zstd/issues
+.
 .SH "AUTHOR"
 Yann Collet
diff --git a/programs/zstdgrep.1 b/programs/zstdgrep.1
index 06465cebc35..563696d339e 100644
--- a/programs/zstdgrep.1
+++ b/programs/zstdgrep.1
@@ -1,17 +1,26 @@
-.TH "ZSTDGREP" "1" "December 2021" "zstd 1.5.1" "User Commands"
+.
+.TH "ZSTDGREP" "1" "January 2022" "zstd 1.5.2" "User Commands"
+.
 .SH "NAME"
 \fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
+.
 .SH "SYNOPSIS"
-\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \|\.\|\.\|\.]
+\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.]
+.
 .SH "DESCRIPTION"
 \fBzstdgrep\fR runs \fBgrep (1)\fR on files, or \fBstdin\fR if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
+.
 .P
 The grep\-flags and pattern arguments are passed on to \fBgrep (1)\fR\. If an \fB\-e\fR flag is found in the \fBgrep\-flags\fR, \fBzstdgrep\fR will not look for a pattern argument\.
+.
 .P
 Note that modern \fBgrep\fR alternatives such as \fBripgrep\fR (\fBrg\fR) support \fBzstd\fR\-compressed files out of the box, and can prove better alternatives than \fBzstdgrep\fR notably for unsupported complex pattern searches\. Note though that such alternatives may also feature some minor command line differences\.
+.
 .SH "EXIT STATUS"
 In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\.
+.
 .SH "SEE ALSO"
 \fBzstd (1)\fR
+.
 .SH "AUTHORS"
 Thomas Klausner \fIwiz@NetBSD\.org\fR
diff --git a/programs/zstdless.1 b/programs/zstdless.1
index d90ea0f5465..ab38e7a7f45 100644
--- a/programs/zstdless.1
+++ b/programs/zstdless.1
@@ -1,9 +1,14 @@
-.TH "ZSTDLESS" "1" "December 2021" "zstd 1.5.1" "User Commands"
+.
+.TH "ZSTDLESS" "1" "January 2022" "zstd 1.5.2" "User Commands"
+.
 .SH "NAME"
 \fBzstdless\fR \- view zstandard\-compressed files
+.
 .SH "SYNOPSIS"
-\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \|\.\|\.\|\.]
+\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.]
+.
 .SH "DESCRIPTION"
 \fBzstdless\fR runs \fBless (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
+.
 .SH "SEE ALSO"
 \fBzstd (1)\fR

From 308a11b8e88f5201ddeb839269268b3824567d89 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 7 Jan 2022 15:09:56 -0800
Subject: [PATCH 46/52] Fix stderr progress logging for decompression

When decompressing with `-q` and an output file, the progress bar was mistakenly printed. This is a minimal fix, with a larger refactor to be stacked on top of it.

Fixes #2967.
---
 programs/fileio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/programs/fileio.c b/programs/fileio.c
index 379d334eb02..89eecb9e7d7 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -2245,7 +2245,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
         ZSTD_inBuffer  inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
         ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
         size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
-        const int displayLevel = (!fCtx->hasStdoutOutput || g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
+        const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
         UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
         if (ZSTD_isError(readSizeHint)) {
             DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",

From 144879a7cd1b7ac40cf46178f3951a0a97df3c63 Mon Sep 17 00:00:00 2001
From: "W. Felix Handte" <w@felixhandte.com>
Date: Wed, 5 Jan 2022 18:41:14 -0500
Subject: [PATCH 47/52] Update Changelog

---
 CHANGELOG | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 31e9007b871..4e0045b950c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+v1.5.2 (Jan, 2022)
+perf: Regain Minimal memset()-ing During Reuse of Compression Contexts (@Cyan4973, #2969)
+build: Build Zstd with `noexecstack` on All Architectures (@felixhandte, #2964)
+doc: Clarify Licensing (@terrelln, #2981)
+
 v1.5.1 (Dec, 2021)
 perf: rebalanced compression levels, to better match the intended speed/level curve, by @senhuang42
 perf: faster huffman decoder, using x64 assembly, by @terrelln

From 568c69a4eb0e30fb03a75176804b47ed51dd3ab1 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 11 Jan 2022 08:09:58 -0800
Subject: [PATCH 48/52] x86-64: Hide internal assembly functions

Hide x86-64 internal assembly functions. Before

$ nm -D lib/libzstd.so.1 | grep usingDTable_internal_bmi2_asm_loop
00000000000c23c0 T _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
00000000000c23c0 T HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
00000000000c283d T _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
00000000000c283d T HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
$

After

$ nm -D lib/libzstd.so.1 | grep usingDTable_internal_bmi2_asm_loop
$

This fixes issue #2990.
---
 lib/common/portability_macros.h       | 6 ++++++
 lib/decompress/huf_decompress.c       | 4 ++--
 lib/decompress/huf_decompress_amd64.S | 4 ++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/common/portability_macros.h b/lib/common/portability_macros.h
index 627ef9eed4a..2143817f574 100644
--- a/lib/common/portability_macros.h
+++ b/lib/common/portability_macros.h
@@ -65,6 +65,12 @@
 # endif
 #endif
 
+/* Mark the internal assembly functions as hidden  */
+#ifdef __ELF__
+# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#else
+# define ZSTD_HIDE_ASM_FUNCTION(func)
+#endif
 
 /* Enable runtime BMI2 dispatch based on the CPU.
  * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c
index fa61968ba48..2027188255e 100644
--- a/lib/decompress/huf_decompress.c
+++ b/lib/decompress/huf_decompress.c
@@ -664,7 +664,7 @@ size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize,
 
 #if ZSTD_ENABLE_ASM_X86_64_BMI2
 
-HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args);
+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
 
 static HUF_ASM_X86_64_BMI2_ATTRS
 size_t
@@ -1380,7 +1380,7 @@ size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize,
 
 #if ZSTD_ENABLE_ASM_X86_64_BMI2
 
-HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args);
+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
 
 static HUF_ASM_X86_64_BMI2_ATTRS size_t
 HUF_decompress4X2_usingDTable_internal_bmi2_asm(
diff --git a/lib/decompress/huf_decompress_amd64.S b/lib/decompress/huf_decompress_amd64.S
index 01bb6d76787..49589cb6114 100644
--- a/lib/decompress/huf_decompress_amd64.S
+++ b/lib/decompress/huf_decompress_amd64.S
@@ -30,6 +30,10 @@
  * TODO: Support Windows calling convention.
  */
 
+ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
 .global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
 .global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
 .global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop

From 57a86d9ec636c75f17a3005962ff178545e404f5 Mon Sep 17 00:00:00 2001
From: Mike Gilbert <floppym@gentoo.org>
Date: Thu, 13 Jan 2022 16:47:18 -0500
Subject: [PATCH 49/52] Avoid updating timestamps when the destination is
 stdout

Fixes: 9cd6c1ff4d56fc74a2cbdfd9bcc82a64e0fe4bb7
Fixes: https://github.com/facebook/zstd/issues/2997
---
 programs/fileio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/programs/fileio.c b/programs/fileio.c
index 89eecb9e7d7..5338fa62955 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1679,6 +1679,7 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
     if (ress.dstFile == NULL) {
         int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
         if ( strcmp (srcFileName, stdinmark)
+          && strcmp (dstFileName, stdoutmark)
           && UTIL_stat(srcFileName, &statbuf)
           && UTIL_isRegularFileStat(&statbuf) ) {
             dstFilePermissions = statbuf.st_mode;
@@ -2634,6 +2635,7 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
     if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
         int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
         if ( strcmp(srcFileName, stdinmark)   /* special case : don't transfer permissions from stdin */
+          && strcmp(dstFileName, stdoutmark)
           && UTIL_stat(srcFileName, &statbuf)
           && UTIL_isRegularFileStat(&statbuf) ) {
             dstFilePermissions = statbuf.st_mode;

From 9b6dfedf0c49d6554609419214f58beb6a60480b Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <embg@fb.com>
Date: Fri, 14 Jan 2022 11:11:41 -0700
Subject: [PATCH 50/52] Documentation and minor refactor to clarify MT memory
 management.

---
 lib/compress/zstdmt_compress.c | 26 +++++++++++++++++---------
 lib/compress/zstdmt_compress.h |  5 ++++-
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
index f564822d4b2..6bc14b035e1 100644
--- a/lib/compress/zstdmt_compress.c
+++ b/lib/compress/zstdmt_compress.c
@@ -102,9 +102,8 @@ typedef struct ZSTDMT_bufferPool_s {
     buffer_t bTable[1];   /* variable size */
 } ZSTDMT_bufferPool;
 
-static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
 {
-    unsigned const maxNbBuffers = 2*nbWorkers + 3;
     ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
         sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
     if (bufPool==NULL) return NULL;
@@ -160,9 +159,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
 }
 
 
-static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
+static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
 {
-    unsigned const maxNbBuffers = 2*nbWorkers + 3;
     if (srcBufPool==NULL) return NULL;
     if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
         return srcBufPool;
@@ -171,7 +169,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
         size_t const bSize = srcBufPool->bufferSize;   /* forward parameters */
         ZSTDMT_bufferPool* newBufPool;
         ZSTDMT_freeBufferPool(srcBufPool);
-        newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+        newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
         if (newBufPool==NULL) return newBufPool;
         ZSTDMT_setBufferSize(newBufPool, bSize);
         return newBufPool;
@@ -263,6 +261,16 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
     ZSTD_customFree(buf.start, bufPool->cMem);
 }
 
+/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
+ * The 3 additional buffers are as follows:
+ *   1 buffer for input loading
+ *   1 buffer for "next input" when submitting current one
+ *   1 buffer stuck in queue */
+#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
+
+/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
+ * So we only need one seq buffer per worker. */
+#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
 
 /* =====   Seq Pool Wrapper   ====== */
 
@@ -316,7 +324,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
 
 static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
 {
-    ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+    ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
     if (seqPool == NULL) return NULL;
     ZSTDMT_setNbSeq(seqPool, 0);
     return seqPool;
@@ -329,7 +337,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
 
 static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
 {
-    return ZSTDMT_expandBufferPool(pool, nbWorkers);
+    return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
 }
 
 
@@ -936,7 +944,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
     mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
     assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0);  /* ensure nbJobs is a power of 2 */
     mtctx->jobIDMask = nbJobs - 1;
-    mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+    mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
     mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
     mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
     initError = ZSTDMT_serialState_init(&mtctx->serial);
@@ -1039,7 +1047,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
 {
     if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
     FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
-    mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
+    mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
     if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
     mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
     if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
diff --git a/lib/compress/zstdmt_compress.h b/lib/compress/zstdmt_compress.h
index 2fee2ec7455..271eb1ac71f 100644
--- a/lib/compress/zstdmt_compress.h
+++ b/lib/compress/zstdmt_compress.h
@@ -65,8 +65,11 @@ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
  *  Private use only. Init streaming operation.
  *  expects params to be valid.
  *  must receive dict, or cdict, or none, but not both.
+ *  mtctx can be freshly constructed or reused from a prior compression.
+ *  If mtctx is reused, memory allocations from the prior compression may not be freed,
+ *  even if they are not needed for the current compression.
  *  @return : 0, or an error code */
-size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
+size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx,
                     const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
                     const ZSTD_CDict* cdict,
                     ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);

From 8ea3d57de4bcff2170296e0d1a5019f030630f3b Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Tue, 18 Jan 2022 14:28:43 -0800
Subject: [PATCH 51/52] [build][asm] Pass ASFLAGS to the assembler instead of
 CFLAGS

* Add `-Wa,--noexecstack` to both `ASFLAGS` and `CFLAGS`
* Pass `ASFLAGS` to `.S` compilation instead of `CFLAGS`

Fixes #3006.
---
 contrib/linux-kernel/test/Makefile | 2 +-
 lib/Makefile                       | 4 ++--
 lib/libzstd.mk                     | 9 ++++++---
 programs/Makefile                  | 2 +-
 tests/Makefile                     | 4 ++--
 tests/fuzz/Makefile                | 6 ++++--
 6 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile
index dc76a5f401c..be82b3fbac8 100644
--- a/contrib/linux-kernel/test/Makefile
+++ b/contrib/linux-kernel/test/Makefile
@@ -24,7 +24,7 @@ LINUX_ZSTD_OBJECTS0   := $(LINUX_ZSTD_FILES:.c=.o)
 LINUX_ZSTD_OBJECTS    := $(LINUX_ZSTD_OBJECTS0:.S=.o)
 
 %.o: %.S
-	$(CC) -c $(CPPFLAGS) $(CFLAGS) $^ -o $@
+	$(COMPILE.S) $(OUTPUT_OPTION) $<
 
 liblinuxzstd.a: $(LINUX_ZSTD_OBJECTS)
 	$(AR) $(ARFLAGS) $@ $^
diff --git a/lib/Makefile b/lib/Makefile
index 00fc535b0e9..ef202183d84 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -212,11 +212,11 @@ $(ZSTD_STATLIB_DIR)/%.o : %.c $(ZSTD_STATLIB_DIR)/%.d | $(ZSTD_STATLIB_DIR)
 
 $(ZSTD_DYNLIB_DIR)/%.o : %.S | $(ZSTD_DYNLIB_DIR)
 	@echo AS $@
-	$(COMPILE.c) $(OUTPUT_OPTION) $<
+	$(COMPILE.S) $(OUTPUT_OPTION) $<
 
 $(ZSTD_STATLIB_DIR)/%.o : %.S | $(ZSTD_STATLIB_DIR)
 	@echo AS $@
-	$(COMPILE.c) $(OUTPUT_OPTION) $<
+	$(COMPILE.S) $(OUTPUT_OPTION) $<
 
 MKDIR ?= mkdir
 $(BUILD_DIR) $(ZSTD_DYNLIB_DIR) $(ZSTD_STATLIB_DIR):
diff --git a/lib/libzstd.mk b/lib/libzstd.mk
index 73a8cd5d860..6e9a643954b 100644
--- a/lib/libzstd.mk
+++ b/lib/libzstd.mk
@@ -95,8 +95,9 @@ DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
             -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
             -Wredundant-decls -Wmissing-prototypes -Wc++-compat
 CFLAGS   += $(DEBUGFLAGS) $(MOREFLAGS)
+ASFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS) $(CFLAGS)
 LDFLAGS  += $(MOREFLAGS)
-FLAGS     = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+FLAGS     = $(CPPFLAGS) $(CFLAGS) $(ASFLAGS) $(LDFLAGS)
 
 ifndef ALREADY_APPENDED_NOEXECSTACK
 export ALREADY_APPENDED_NOEXECSTACK := 1
@@ -104,10 +105,12 @@ ifeq ($(shell echo "int main(int argc, char* argv[]) { (void)argc; (void)argv; r
 LDFLAGS += -z noexecstack
 endif
 ifeq ($(shell echo | $(CC) $(FLAGS) -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
-CFLAGS += -Wa,--noexecstack
+CFLAGS  += -Wa,--noexecstack
+# CFLAGS are also added to ASFLAGS
 else ifeq ($(shell echo | $(CC) $(FLAGS) -Qunused-arguments -Wa,--noexecstack -x assembler -Werror -c - -o $(VOID) 2>$(VOID) && echo 1 || echo 0),1)
 # See e.g.: https://github.com/android/ndk/issues/171
-CFLAGS += -Qunused-arguments -Wa,--noexecstack
+CFLAGS  += -Qunused-arguments -Wa,--noexecstack
+# CFLAGS are also added to ASFLAGS
 endif
 endif
 
diff --git a/programs/Makefile b/programs/Makefile
index da848eb66bc..f77e1b7f10f 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -320,7 +320,7 @@ $(BUILD_DIR)/%.o : %.c $(BUILD_DIR)/%.d | $(BUILD_DIR)
 
 $(BUILD_DIR)/%.o : %.S | $(BUILD_DIR)
 	@echo AS $@
-	$(COMPILE.c) $(OUTPUT_OPTION) $<
+	$(COMPILE.S) $(OUTPUT_OPTION) $<
 
 MKDIR ?= mkdir
 $(BUILD_DIR): ; $(MKDIR) -p $@
diff --git a/tests/Makefile b/tests/Makefile
index efcbe9761e9..132fa7a0818 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -118,7 +118,7 @@ zstdd_%.o : $(ZSTDDIR)/decompress/%.c
 	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 
 zstdd_%.o : $(ZSTDDIR)/decompress/%.S
-	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+	$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
 
 zstdmt%.o : CPPFLAGS += $(MULTITHREAD_CPP)
 
@@ -132,7 +132,7 @@ zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.c
 	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
 
 zstdmt_d_%.o : $(ZSTDDIR)/decompress/%.S
-	$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
+	$(CC) -c $(CPPFLAGS) $(ASFLAGS) $< -o $@
 
 fullbench32: CPPFLAGS += -m32
 fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) -Wno-deprecated-declarations
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index b707bd95176..912348c3843 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -12,6 +12,7 @@
 CFLAGS ?= -O3
 CXXFLAGS ?= -O3
 CPPFLAGS ?=
+ASFLAGS ?=
 LDFLAGS ?=
 ARFLAGS ?=
 LIB_FUZZING_ENGINE ?= libregression.a
@@ -43,6 +44,7 @@ FUZZ_EXTRA_FLAGS := -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
 	-Wredundant-decls -Wno-deprecated-declarations \
 	-g -fno-omit-frame-pointer
 FUZZ_CFLAGS := $(FUZZ_EXTRA_FLAGS) $(CFLAGS)
+FUZZ_ASFLAGS := $(FUZZ_EXTRA_FLAGS) $(ASFLAGS)
 FUZZ_CXXFLAGS := $(FUZZ_EXTRA_FLAGS) -std=c++11 $(CXXFLAGS)
 FUZZ_LDFLAGS := -pthread $(LDFLAGS)
 FUZZ_ARFLAGS := $(ARFLAGS)
@@ -127,7 +129,7 @@ rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
 
 rt_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S
-	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
+	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
 
 rt_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $(FUZZ_ROUND_TRIP_FLAGS) $< -c -o $@
@@ -151,7 +153,7 @@ d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
 
 d_lib_decompress_%.o: $(ZSTDDIR)/decompress/%.S
-	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@
+	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_ASFLAGS) $< -c -o $@
 
 d_lib_dictBuilder_%.o: $(ZSTDDIR)/dictBuilder/%.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $< -c -o $@

From 8250faa01bf0a3b46b3204bdefee7ae04ab12f80 Mon Sep 17 00:00:00 2001
From: Elliot Gorokhovsky <embg@fb.com>
Date: Fri, 14 Jan 2022 15:38:48 -0700
Subject: [PATCH 52/52] Update CI documentation

---
 CONTRIBUTING.md | 91 +++++++++++++++++++++----------------------------
 1 file changed, 38 insertions(+), 53 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a936d747e06..e7e545129e5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -68,8 +68,8 @@ Our contribution process works in three main stages:
             ```
 2. Code Review and CI tests
     * Ensure CI tests pass:
-        * Before sharing anything to the community, make sure that all CI tests pass on your local fork.
-        See our section on setting up your CI environment for more information on how to do this.
+        * Before sharing anything to the community, create a pull request in your own fork against the dev branch
+        and make sure that all GitHub Actions CI tests pass. See the Continuous Integration section below for more information.
         * Ensure that static analysis passes on your development machine. See the Static Analysis section
         below to see how to do this.
     * Create a pull request:
@@ -140,6 +140,42 @@ It can be useful to look at additional static analyzers once in a while (and we
 
 This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kind of reports can be useful, and are accepted.
 
+## Continuous Integration
+CI tests run every time a pull request (PR) is created or updated. The exact tests
+that get run will depend on the destination branch you specify. Some tests take
+longer to run than others. Currently, our CI is set up to run a short
+series of tests when creating a PR to the dev branch and a longer series of tests
+when creating a PR to the release branch. You can look in the configuration files
+of the respective CI platform for more information on what gets run when.
+
+Most people will just want to create a PR with the destination set to their local dev
+branch of zstd. You can then find the status of the tests on the PR's page. You can also
+re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
+
+Almost all of zstd's CI runs on GitHub Actions (configured at `.github/workflows`), which will automatically run on PRs to your
+own fork. A small number of tests run on other services (e.g. Travis CI, Circle CI, Appveyor).
+These require work to set up on your local fork, and (at least for Travis CI) cost money.
+Therefore, if the PR on your local fork passes GitHub Actions, feel free to submit a PR
+against the main repo.
+
+### Third-party CI
+A small number of tests cannot run on GitHub Actions, or have yet to be migrated.
+For these, we use a variety of third-party services (listed below). It is not necessary to set
+these up on your fork in order to contribute to zstd; however, we do link to instructions for those
+who want earlier signal.
+
+| Service   | Purpose                                                                                                    | Setup Links                                                                                                                                            | Config Path            |
+|-----------|------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
+| Travis CI | Used for testing on non-x86 architectures such as PowerPC                                                  | https://docs.travis-ci.com/user/tutorial/#to-get-started-with-travis-ci-using-github <br> https://github.com/marketplace/travis-ci                     | `.travis.yml`          |
+| AppVeyor  | Used for some Windows testing (e.g. cygwin, mingw)                                                         | https://www.appveyor.com/blog/2018/10/02/github-apps-integration/ <br> https://github.com/marketplace/appveyor                                         | `appveyor.yml`         |
+| Cirrus CI | Used for testing on FreeBSD                                                                                | https://github.com/marketplace/cirrus-ci/                                                                                                              | `.cirrus.yml`          |
+| Circle CI | Historically was used to provide faster signal,<br/> but we may be able to migrate these to Github Actions | https://circleci.com/docs/2.0/getting-started/#setting-up-circleci <br> https://youtu.be/Js3hMUsSZ2c <br> https://circleci.com/docs/2.0/enable-checks/ | `.circleci/config.yml` |
+
+Note: the instructions linked above mostly cover how to set up a repository with CI from scratch. 
+The general idea should be the same for setting up CI on your fork of zstd, but you may have to 
+follow slightly different steps. In particular, please ignore any instructions related to setting up
+config files (since zstd already has configs for each of these services).
+
 ## Performance
 Performance is extremely important for zstd and we only merge pull requests whose performance
 landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
@@ -339,57 +375,6 @@ counter `L1-dcache-load-misses`
 
 TODO
 
-
-## Setting up continuous integration (CI) on your fork
-Zstd uses a number of different continuous integration (CI) tools to ensure that new changes
-are well tested before they make it to an official release. Specifically, we use the platforms
-travis-ci, circle-ci, and appveyor.
-
-Changes cannot be merged into the main dev branch unless they pass all of our CI tests.
-The easiest way to run these CI tests on your own before submitting a PR to our dev branch
-is to configure your personal fork of zstd with each of the CI platforms. Below, you'll find
-instructions for doing this.
-
-### travis-ci
-Follow these steps to link travis-ci with your github fork of zstd
-
-1. Make sure you are logged into your github account
-2. Go to https://travis-ci.org/
-3. Click 'Sign in with Github' on the top right
-4. Click 'Authorize travis-ci'
-5. Click 'Activate all repositories using Github Apps'
-6. Select 'Only select repositories' and select your fork of zstd from the drop down
-7. Click 'Approve and Install'
-8. Click 'Sign in with Github' again. This time, it will be for travis-pro (which will let you view your tests on the web dashboard)
-9. Click 'Authorize travis-pro'
-10. You should have travis set up on your fork now.
-
-### circle-ci
-TODO
-
-### appveyor
-Follow these steps to link circle-ci with your github fork of zstd
-
-1. Make sure you are logged into your github account
-2. Go to https://www.appveyor.com/
-3. Click 'Sign in' on the top right
-4. Select 'Github' on the left panel
-5. Click 'Authorize appveyor'
-6. You might be asked to select which repositories you want to give appveyor permission to. Select your fork of zstd if you're prompted
-7. You should have appveyor set up on your fork now.
-
-### General notes on CI
-CI tests run every time a pull request (PR) is created or updated. The exact tests
-that get run will depend on the destination branch you specify. Some tests take
-longer to run than others. Currently, our CI is set up to run a short
-series of tests when creating a PR to the dev branch and a longer series of tests
-when creating a PR to the release branch. You can look in the configuration files
-of the respective CI platform for more information on what gets run when.
-
-Most people will just want to create a PR with the destination set to their local dev
-branch of zstd. You can then find the status of the tests on the PR's page. You can also
-re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
-
 ## Issues
 We use GitHub issues to track public bugs. Please ensure your description is
 clear and has sufficient instructions to be able to reproduce the issue.